• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 #include "zstd_compress_internal.h"
12 #include "hist.h"
13 #include "zstd_opt.h"
14 
15 
16 #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17 #define ZSTD_MAX_PRICE     (1<<30)
18 
19 #define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
20 
21 
22 /*-*************************************
23 *  Price functions for optimal parser
24 ***************************************/
25 
26 #if 0    /* approximation at bit level (for tests) */
27 #  define BITCOST_ACCURACY 0
28 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29 #  define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
30 #elif 0  /* fractional bit accuracy (for tests) */
31 #  define BITCOST_ACCURACY 8
32 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33 #  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
34 #else    /* opt==approx, ultra==accurate */
35 #  define BITCOST_ACCURACY 8
36 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37 #  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
38 #endif
39 
ZSTD_bitWeight(U32 stat)40 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
41 {
42     return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
43 }
44 
ZSTD_fracWeight(U32 rawStat)45 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
46 {
47     U32 const stat = rawStat + 1;
48     U32 const hb = ZSTD_highbit32(stat);
49     U32 const BWeight = hb * BITCOST_MULTIPLIER;
50     U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
51     U32 const weight = BWeight + FWeight;
52     assert(hb + BITCOST_ACCURACY < 31);
53     return weight;
54 }
55 
56 #if (DEBUGLEVEL>=2)
57 /* debugging function,
58  * @return price in bytes as fractional value
59  * for debug messages only */
ZSTD_fCost(U32 price)60 MEM_STATIC double ZSTD_fCost(U32 price)
61 {
62     return (double)price / (BITCOST_MULTIPLIER*8);
63 }
64 #endif
65 
ZSTD_compressedLiterals(optState_t const * const optPtr)66 static int ZSTD_compressedLiterals(optState_t const* const optPtr)
67 {
68     return optPtr->literalCompressionMode != ZSTD_ps_disable;
69 }
70 
ZSTD_setBasePrices(optState_t * optPtr,int optLevel)71 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
72 {
73     if (ZSTD_compressedLiterals(optPtr))
74         optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
75     optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
76     optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
77     optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
78 }
79 
80 
sum_u32(const unsigned table[],size_t nbElts)81 static U32 sum_u32(const unsigned table[], size_t nbElts)
82 {
83     size_t n;
84     U32 total = 0;
85     for (n=0; n<nbElts; n++) {
86         total += table[n];
87     }
88     return total;
89 }
90 
ZSTD_downscaleStats(unsigned * table,U32 lastEltIndex,U32 shift)91 static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
92 {
93     U32 s, sum=0;
94     DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
95     assert(shift < 30);
96     for (s=0; s<lastEltIndex+1; s++) {
97         table[s] = 1 + (table[s] >> shift);
98         sum += table[s];
99     }
100     return sum;
101 }
102 
103 /* ZSTD_scaleStats() :
104  * reduce all elements in table is sum too large
105  * return the resulting sum of elements */
ZSTD_scaleStats(unsigned * table,U32 lastEltIndex,U32 logTarget)106 static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
107 {
108     U32 const prevsum = sum_u32(table, lastEltIndex+1);
109     U32 const factor = prevsum >> logTarget;
110     DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111     assert(logTarget < 30);
112     if (factor <= 1) return prevsum;
113     return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
114 }
115 
116 /* ZSTD_rescaleFreqs() :
117  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
118  *    take hints from dictionary if there is one
119  *    and init from zero if there is none,
120  *    using src for literals stats, and baseline stats for sequence symbols
121  * otherwise downscale existing stats, to be used as seed for next block.
122  */
123 static void
ZSTD_rescaleFreqs(optState_t * const optPtr,const BYTE * const src,size_t const srcSize,int const optLevel)124 ZSTD_rescaleFreqs(optState_t* const optPtr,
125             const BYTE* const src, size_t const srcSize,
126                   int const optLevel)
127 {
128     int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
129     DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
130     optPtr->priceType = zop_dynamic;
131 
132     if (optPtr->litLengthSum == 0) {  /* first block : init */
133         if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
134             DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
135             optPtr->priceType = zop_predef;
136         }
137 
138         assert(optPtr->symbolCosts != NULL);
139         if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
140             /* huffman table presumed generated by dictionary */
141             optPtr->priceType = zop_dynamic;
142 
143             if (compressedLiterals) {
144                 unsigned lit;
145                 assert(optPtr->litFreq != NULL);
146                 optPtr->litSum = 0;
147                 for (lit=0; lit<=MaxLit; lit++) {
148                     U32 const scaleLog = 11;   /* scale to 2K */
149                     U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
150                     assert(bitCost <= scaleLog);
151                     optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
152                     optPtr->litSum += optPtr->litFreq[lit];
153             }   }
154 
155             {   unsigned ll;
156                 FSE_CState_t llstate;
157                 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
158                 optPtr->litLengthSum = 0;
159                 for (ll=0; ll<=MaxLL; ll++) {
160                     U32 const scaleLog = 10;   /* scale to 1K */
161                     U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
162                     assert(bitCost < scaleLog);
163                     optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
164                     optPtr->litLengthSum += optPtr->litLengthFreq[ll];
165             }   }
166 
167             {   unsigned ml;
168                 FSE_CState_t mlstate;
169                 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
170                 optPtr->matchLengthSum = 0;
171                 for (ml=0; ml<=MaxML; ml++) {
172                     U32 const scaleLog = 10;
173                     U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
174                     assert(bitCost < scaleLog);
175                     optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
176                     optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
177             }   }
178 
179             {   unsigned of;
180                 FSE_CState_t ofstate;
181                 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
182                 optPtr->offCodeSum = 0;
183                 for (of=0; of<=MaxOff; of++) {
184                     U32 const scaleLog = 10;
185                     U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
186                     assert(bitCost < scaleLog);
187                     optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
188                     optPtr->offCodeSum += optPtr->offCodeFreq[of];
189             }   }
190 
191         } else {  /* not a dictionary */
192 
193             assert(optPtr->litFreq != NULL);
194             if (compressedLiterals) {
195                 unsigned lit = MaxLit;
196                 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
197                 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
198             }
199 
200             {   unsigned const baseLLfreqs[MaxLL+1] = {
201                     4, 2, 1, 1, 1, 1, 1, 1,
202                     1, 1, 1, 1, 1, 1, 1, 1,
203                     1, 1, 1, 1, 1, 1, 1, 1,
204                     1, 1, 1, 1, 1, 1, 1, 1,
205                     1, 1, 1, 1
206                 };
207                 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
208             }
209 
210             {   unsigned ml;
211                 for (ml=0; ml<=MaxML; ml++)
212                     optPtr->matchLengthFreq[ml] = 1;
213             }
214             optPtr->matchLengthSum = MaxML+1;
215 
216             {   unsigned const baseOFCfreqs[MaxOff+1] = {
217                     6, 2, 1, 1, 2, 3, 4, 4,
218                     4, 3, 2, 1, 1, 1, 1, 1,
219                     1, 1, 1, 1, 1, 1, 1, 1,
220                     1, 1, 1, 1, 1, 1, 1, 1
221                 };
222                 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
223             }
224 
225 
226         }
227 
228     } else {   /* new block : re-use previous statistics, scaled down */
229 
230         if (compressedLiterals)
231             optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
232         optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
233         optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
234         optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
235     }
236 
237     ZSTD_setBasePrices(optPtr, optLevel);
238 }
239 
240 /* ZSTD_rawLiteralsCost() :
241  * price of literals (only) in specified segment (which length can be 0).
242  * does not include price of literalLength symbol */
ZSTD_rawLiteralsCost(const BYTE * const literals,U32 const litLength,const optState_t * const optPtr,int optLevel)243 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
244                                 const optState_t* const optPtr,
245                                 int optLevel)
246 {
247     if (litLength == 0) return 0;
248 
249     if (!ZSTD_compressedLiterals(optPtr))
250         return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
251 
252     if (optPtr->priceType == zop_predef)
253         return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
254 
255     /* dynamic statistics */
256     {   U32 price = litLength * optPtr->litSumBasePrice;
257         U32 u;
258         for (u=0; u < litLength; u++) {
259             assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
260             price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
261         }
262         return price;
263     }
264 }
265 
266 /* ZSTD_litLengthPrice() :
267  * cost of literalLength symbol */
ZSTD_litLengthPrice(U32 const litLength,const optState_t * const optPtr,int optLevel)268 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
269 {
270     if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
271 
272     /* dynamic statistics */
273     {   U32 const llCode = ZSTD_LLcode(litLength);
274         return (LL_bits[llCode] * BITCOST_MULTIPLIER)
275              + optPtr->litLengthSumBasePrice
276              - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
277     }
278 }
279 
280 /* ZSTD_getMatchPrice() :
281  * Provides the cost of the match part (offset + matchLength) of a sequence
282  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
283  * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
284 FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offset,U32 const matchLength,const optState_t * const optPtr,int const optLevel)285 ZSTD_getMatchPrice(U32 const offset,
286                    U32 const matchLength,
287              const optState_t* const optPtr,
288                    int const optLevel)
289 {
290     U32 price;
291     U32 const offCode = ZSTD_highbit32(offset+1);
292     U32 const mlBase = matchLength - MINMATCH;
293     assert(matchLength >= MINMATCH);
294 
295     if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
296         return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
297 
298     /* dynamic statistics */
299     price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
300     if ((optLevel<2) /*static*/ && offCode >= 20)
301         price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
302 
303     /* match Length */
304     {   U32 const mlCode = ZSTD_MLcode(mlBase);
305         price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
306     }
307 
308     price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
309 
310     DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
311     return price;
312 }
313 
314 /* ZSTD_updateStats() :
315  * assumption : literals + litLengtn <= iend */
ZSTD_updateStats(optState_t * const optPtr,U32 litLength,const BYTE * literals,U32 offsetCode,U32 matchLength)316 static void ZSTD_updateStats(optState_t* const optPtr,
317                              U32 litLength, const BYTE* literals,
318                              U32 offsetCode, U32 matchLength)
319 {
320     /* literals */
321     if (ZSTD_compressedLiterals(optPtr)) {
322         U32 u;
323         for (u=0; u < litLength; u++)
324             optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
325         optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
326     }
327 
328     /* literal Length */
329     {   U32 const llCode = ZSTD_LLcode(litLength);
330         optPtr->litLengthFreq[llCode]++;
331         optPtr->litLengthSum++;
332     }
333 
334     /* match offset code (0-2=>repCode; 3+=>offset+2) */
335     {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
336         assert(offCode <= MaxOff);
337         optPtr->offCodeFreq[offCode]++;
338         optPtr->offCodeSum++;
339     }
340 
341     /* match Length */
342     {   U32 const mlBase = matchLength - MINMATCH;
343         U32 const mlCode = ZSTD_MLcode(mlBase);
344         optPtr->matchLengthFreq[mlCode]++;
345         optPtr->matchLengthSum++;
346     }
347 }
348 
349 
350 /* ZSTD_readMINMATCH() :
351  * function safe only for comparisons
352  * assumption : memPtr must be at least 4 bytes before end of buffer */
ZSTD_readMINMATCH(const void * memPtr,U32 length)353 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
354 {
355     switch (length)
356     {
357     default :
358     case 4 : return MEM_read32(memPtr);
359     case 3 : if (MEM_isLittleEndian())
360                 return MEM_read32(memPtr)<<8;
361              else
362                 return MEM_read32(memPtr)>>8;
363     }
364 }
365 
366 
367 /* Update hashTable3 up to ip (excluded)
368    Assumption : always within prefix (i.e. not within extDict) */
ZSTD_insertAndFindFirstIndexHash3(const ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip)369 static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
370                                               U32* nextToUpdate3,
371                                               const BYTE* const ip)
372 {
373     U32* const hashTable3 = ms->hashTable3;
374     U32 const hashLog3 = ms->hashLog3;
375     const BYTE* const base = ms->window.base;
376     U32 idx = *nextToUpdate3;
377     U32 const target = (U32)(ip - base);
378     size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
379     assert(hashLog3 > 0);
380 
381     while(idx < target) {
382         hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
383         idx++;
384     }
385 
386     *nextToUpdate3 = target;
387     return hashTable3[hash3];
388 }
389 
390 
391 /*-*************************************
392 *  Binary Tree search
393 ***************************************/
394 /** ZSTD_insertBt1() : add one or multiple positions to tree.
395  * @param ip assumed <= iend-8 .
396  * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
397  * @return : nb of positions added */
ZSTD_insertBt1(const ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,U32 const target,U32 const mls,const int extDict)398 static U32 ZSTD_insertBt1(
399                 const ZSTD_matchState_t* ms,
400                 const BYTE* const ip, const BYTE* const iend,
401                 U32 const target,
402                 U32 const mls, const int extDict)
403 {
404     const ZSTD_compressionParameters* const cParams = &ms->cParams;
405     U32*   const hashTable = ms->hashTable;
406     U32    const hashLog = cParams->hashLog;
407     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
408     U32*   const bt = ms->chainTable;
409     U32    const btLog  = cParams->chainLog - 1;
410     U32    const btMask = (1 << btLog) - 1;
411     U32 matchIndex = hashTable[h];
412     size_t commonLengthSmaller=0, commonLengthLarger=0;
413     const BYTE* const base = ms->window.base;
414     const BYTE* const dictBase = ms->window.dictBase;
415     const U32 dictLimit = ms->window.dictLimit;
416     const BYTE* const dictEnd = dictBase + dictLimit;
417     const BYTE* const prefixStart = base + dictLimit;
418     const BYTE* match;
419     const U32 curr = (U32)(ip-base);
420     const U32 btLow = btMask >= curr ? 0 : curr - btMask;
421     U32* smallerPtr = bt + 2*(curr&btMask);
422     U32* largerPtr  = smallerPtr + 1;
423     U32 dummy32;   /* to be nullified at the end */
424     /* windowLow is based on target because
425      * we only need positions that will be in the window at the end of the tree update.
426      */
427     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
428     U32 matchEndIdx = curr+8+1;
429     size_t bestLength = 8;
430     U32 nbCompares = 1U << cParams->searchLog;
431 #ifdef ZSTD_C_PREDICT
432     U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
433     U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
434     predictedSmall += (predictedSmall>0);
435     predictedLarge += (predictedLarge>0);
436 #endif /* ZSTD_C_PREDICT */
437 
438     DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
439 
440     assert(curr <= target);
441     assert(ip <= iend-8);   /* required for h calculation */
442     hashTable[h] = curr;   /* Update Hash Table */
443 
444     assert(windowLow > 0);
445     for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
446         U32* const nextPtr = bt + 2*(matchIndex & btMask);
447         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
448         assert(matchIndex < curr);
449 
450 #ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
451         const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
452         if (matchIndex == predictedSmall) {
453             /* no need to check length, result known */
454             *smallerPtr = matchIndex;
455             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
456             smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
457             matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
458             predictedSmall = predictPtr[1] + (predictPtr[1]>0);
459             continue;
460         }
461         if (matchIndex == predictedLarge) {
462             *largerPtr = matchIndex;
463             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
464             largerPtr = nextPtr;
465             matchIndex = nextPtr[0];
466             predictedLarge = predictPtr[0] + (predictPtr[0]>0);
467             continue;
468         }
469 #endif
470 
471         if (!extDict || (matchIndex+matchLength >= dictLimit)) {
472             assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
473             match = base + matchIndex;
474             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
475         } else {
476             match = dictBase + matchIndex;
477             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
478             if (matchIndex+matchLength >= dictLimit)
479                 match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
480         }
481 
482         if (matchLength > bestLength) {
483             bestLength = matchLength;
484             if (matchLength > matchEndIdx - matchIndex)
485                 matchEndIdx = matchIndex + (U32)matchLength;
486         }
487 
488         if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
489             break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
490         }
491 
492         if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
493             /* match is smaller than current */
494             *smallerPtr = matchIndex;             /* update smaller idx */
495             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
496             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
497             smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
498             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
499         } else {
500             /* match is larger than current */
501             *largerPtr = matchIndex;
502             commonLengthLarger = matchLength;
503             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
504             largerPtr = nextPtr;
505             matchIndex = nextPtr[0];
506     }   }
507 
508     *smallerPtr = *largerPtr = 0;
509     {   U32 positions = 0;
510         if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
511         assert(matchEndIdx > curr + 8);
512         return MAX(positions, matchEndIdx - (curr + 8));
513     }
514 }
515 
516 FORCE_INLINE_TEMPLATE
ZSTD_updateTree_internal(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,const U32 mls,const ZSTD_dictMode_e dictMode)517 void ZSTD_updateTree_internal(
518                 ZSTD_matchState_t* ms,
519                 const BYTE* const ip, const BYTE* const iend,
520                 const U32 mls, const ZSTD_dictMode_e dictMode)
521 {
522     const BYTE* const base = ms->window.base;
523     U32 const target = (U32)(ip - base);
524     U32 idx = ms->nextToUpdate;
525     DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
526                 idx, target, dictMode);
527 
528     while(idx < target) {
529         U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
530         assert(idx < (U32)(idx + forward));
531         idx += forward;
532     }
533     assert((size_t)(ip - base) <= (size_t)(U32)(-1));
534     assert((size_t)(iend - base) <= (size_t)(U32)(-1));
535     ms->nextToUpdate = target;
536 }
537 
ZSTD_updateTree(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * iend)538 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
539     ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
540 }
541 
542 FORCE_INLINE_TEMPLATE
ZSTD_insertBtAndGetAllMatches(ZSTD_match_t * matches,ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip,const BYTE * const iLimit,const ZSTD_dictMode_e dictMode,const U32 rep[ZSTD_REP_NUM],U32 const ll0,const U32 lengthToBeat,U32 const mls)543 U32 ZSTD_insertBtAndGetAllMatches (
544                     ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
545                     ZSTD_matchState_t* ms,
546                     U32* nextToUpdate3,
547                     const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
548                     const U32 rep[ZSTD_REP_NUM],
549                     U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
550                     const U32 lengthToBeat,
551                     U32 const mls /* template */)
552 {
553     const ZSTD_compressionParameters* const cParams = &ms->cParams;
554     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
555     const BYTE* const base = ms->window.base;
556     U32 const curr = (U32)(ip-base);
557     U32 const hashLog = cParams->hashLog;
558     U32 const minMatch = (mls==3) ? 3 : 4;
559     U32* const hashTable = ms->hashTable;
560     size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
561     U32 matchIndex  = hashTable[h];
562     U32* const bt   = ms->chainTable;
563     U32 const btLog = cParams->chainLog - 1;
564     U32 const btMask= (1U << btLog) - 1;
565     size_t commonLengthSmaller=0, commonLengthLarger=0;
566     const BYTE* const dictBase = ms->window.dictBase;
567     U32 const dictLimit = ms->window.dictLimit;
568     const BYTE* const dictEnd = dictBase + dictLimit;
569     const BYTE* const prefixStart = base + dictLimit;
570     U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
571     U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
572     U32 const matchLow = windowLow ? windowLow : 1;
573     U32* smallerPtr = bt + 2*(curr&btMask);
574     U32* largerPtr  = bt + 2*(curr&btMask) + 1;
575     U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
576     U32 dummy32;   /* to be nullified at the end */
577     U32 mnum = 0;
578     U32 nbCompares = 1U << cParams->searchLog;
579 
580     const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
581     const ZSTD_compressionParameters* const dmsCParams =
582                                       dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
583     const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
584     const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
585     U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
586     U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
587     U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
588     U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
589     U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
590     U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
591     U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
592 
593     size_t bestLength = lengthToBeat-1;
594     DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
595 
596     /* check repCode */
597     assert(ll0 <= 1);   /* necessarily 1 or 0 */
598     {   U32 const lastR = ZSTD_REP_NUM + ll0;
599         U32 repCode;
600         for (repCode = ll0; repCode < lastR; repCode++) {
601             U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
602             U32 const repIndex = curr - repOffset;
603             U32 repLen = 0;
604             assert(curr >= dictLimit);
605             if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
606                 /* We must validate the repcode offset because when we're using a dictionary the
607                  * valid offset range shrinks when the dictionary goes out of bounds.
608                  */
609                 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
610                     repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
611                 }
612             } else {  /* repIndex < dictLimit || repIndex >= curr */
613                 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
614                                              dmsBase + repIndex - dmsIndexDelta :
615                                              dictBase + repIndex;
616                 assert(curr >= windowLow);
617                 if ( dictMode == ZSTD_extDict
618                   && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
619                      & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
620                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
621                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
622                 }
623                 if (dictMode == ZSTD_dictMatchState
624                   && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
625                      & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
626                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
627                     repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
628             }   }
629             /* save longer solution */
630             if (repLen > bestLength) {
631                 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
632                             repCode, ll0, repOffset, repLen);
633                 bestLength = repLen;
634                 matches[mnum].off = repCode - ll0;
635                 matches[mnum].len = (U32)repLen;
636                 mnum++;
637                 if ( (repLen > sufficient_len)
638                    | (ip+repLen == iLimit) ) {  /* best possible */
639                     return mnum;
640     }   }   }   }
641 
642     /* HC3 match finder */
643     if ((mls == 3) /*static*/ && (bestLength < mls)) {
644         U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
645         if ((matchIndex3 >= matchLow)
646           & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
647             size_t mlen;
648             if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
649                 const BYTE* const match = base + matchIndex3;
650                 mlen = ZSTD_count(ip, match, iLimit);
651             } else {
652                 const BYTE* const match = dictBase + matchIndex3;
653                 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
654             }
655 
656             /* save best solution */
657             if (mlen >= mls /* == 3 > bestLength */) {
658                 DEBUGLOG(8, "found small match with hlog3, of length %u",
659                             (U32)mlen);
660                 bestLength = mlen;
661                 assert(curr > matchIndex3);
662                 assert(mnum==0);  /* no prior solution */
663                 matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
664                 matches[0].len = (U32)mlen;
665                 mnum = 1;
666                 if ( (mlen > sufficient_len) |
667                      (ip+mlen == iLimit) ) {  /* best possible length */
668                     ms->nextToUpdate = curr+1;  /* skip insertion */
669                     return 1;
670         }   }   }
671         /* no dictMatchState lookup: dicts don't have a populated HC3 table */
672     }  /* if (mls == 3) */
673 
674     hashTable[h] = curr;   /* Update Hash Table */
675 
676     for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
677         U32* const nextPtr = bt + 2*(matchIndex & btMask);
678         const BYTE* match;
679         size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
680         assert(curr > matchIndex);
681 
682         if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
683             assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
684             match = base + matchIndex;
685             if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
686             matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
687         } else {
688             match = dictBase + matchIndex;
689             assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
690             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
691             if (matchIndex+matchLength >= dictLimit)
692                 match = base + matchIndex;   /* prepare for match[matchLength] read */
693         }
694 
695         if (matchLength > bestLength) {
696             DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
697                     (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
698             assert(matchEndIdx > matchIndex);
699             if (matchLength > matchEndIdx - matchIndex)
700                 matchEndIdx = matchIndex + (U32)matchLength;
701             bestLength = matchLength;
702             matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
703             matches[mnum].len = (U32)matchLength;
704             mnum++;
705             if ( (matchLength > ZSTD_OPT_NUM)
706                | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
707                 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
708                 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
709         }   }
710 
711         if (match[matchLength] < ip[matchLength]) {
712             /* match smaller than current */
713             *smallerPtr = matchIndex;             /* update smaller idx */
714             commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
715             if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
716             smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
717             matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
718         } else {
719             *largerPtr = matchIndex;
720             commonLengthLarger = matchLength;
721             if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
722             largerPtr = nextPtr;
723             matchIndex = nextPtr[0];
724     }   }
725 
726     *smallerPtr = *largerPtr = 0;
727 
728     assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
729     if (dictMode == ZSTD_dictMatchState && nbCompares) {
730         size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
731         U32 dictMatchIndex = dms->hashTable[dmsH];
732         const U32* const dmsBt = dms->chainTable;
733         commonLengthSmaller = commonLengthLarger = 0;
734         for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
735             const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
736             size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
737             const BYTE* match = dmsBase + dictMatchIndex;
738             matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
739             if (dictMatchIndex+matchLength >= dmsHighLimit)
740                 match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
741 
742             if (matchLength > bestLength) {
743                 matchIndex = dictMatchIndex + dmsIndexDelta;
744                 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
745                         (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
746                 if (matchLength > matchEndIdx - matchIndex)
747                     matchEndIdx = matchIndex + (U32)matchLength;
748                 bestLength = matchLength;
749                 matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
750                 matches[mnum].len = (U32)matchLength;
751                 mnum++;
752                 if ( (matchLength > ZSTD_OPT_NUM)
753                    | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
754                     break;   /* drop, to guarantee consistency (miss a little bit of compression) */
755             }   }
756 
757             if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
758             if (match[matchLength] < ip[matchLength]) {
759                 commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
760                 dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
761             } else {
762                 /* match is larger than current */
763                 commonLengthLarger = matchLength;
764                 dictMatchIndex = nextPtr[0];
765     }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
766 
767     assert(matchEndIdx > curr+8);
768     ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
769     return mnum;
770 }
771 
772 typedef U32 (*ZSTD_getAllMatchesFn)(
773     ZSTD_match_t*,
774     ZSTD_matchState_t*,
775     U32*,
776     const BYTE*,
777     const BYTE*,
778     const U32 rep[ZSTD_REP_NUM],
779     U32 const ll0,
780     U32 const lengthToBeat);
781 
ZSTD_btGetAllMatches_internal(ZSTD_match_t * matches,ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * ip,const BYTE * const iHighLimit,const U32 rep[ZSTD_REP_NUM],U32 const ll0,U32 const lengthToBeat,const ZSTD_dictMode_e dictMode,const U32 mls)782 FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
783         ZSTD_match_t* matches,
784         ZSTD_matchState_t* ms,
785         U32* nextToUpdate3,
786         const BYTE* ip,
787         const BYTE* const iHighLimit,
788         const U32 rep[ZSTD_REP_NUM],
789         U32 const ll0,
790         U32 const lengthToBeat,
791         const ZSTD_dictMode_e dictMode,
792         const U32 mls)
793 {
794     assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
795     DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
796     if (ip < ms->window.base + ms->nextToUpdate)
797         return 0;   /* skipped area */
798     ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
799     return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
800 }
801 
802 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
803 
804 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
805     static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
806             ZSTD_match_t* matches,                             \
807             ZSTD_matchState_t* ms,                             \
808             U32* nextToUpdate3,                                \
809             const BYTE* ip,                                    \
810             const BYTE* const iHighLimit,                      \
811             const U32 rep[ZSTD_REP_NUM],                       \
812             U32 const ll0,                                     \
813             U32 const lengthToBeat)                            \
814     {                                                          \
815         return ZSTD_btGetAllMatches_internal(                  \
816                 matches, ms, nextToUpdate3, ip, iHighLimit,    \
817                 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
818     }
819 
820 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
821     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
822     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
823     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
824     GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
825 
826 GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)827 GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
828 GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
829 
830 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
831     {                                            \
832         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
833         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
834         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
835         ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
836     }
837 
838 static ZSTD_getAllMatchesFn ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
839 {
840     ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
841         ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
842         ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
843         ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
844     };
845     U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
846     assert((U32)dictMode < 3);
847     assert(mls - 3 < 4);
848     return getAllMatchesFns[(int)dictMode][mls - 3];
849 }
850 
851 /*************************
852 *  LDM helper functions  *
853 *************************/
854 
855 /* Struct containing info needed to make decision about ldm inclusion */
856 typedef struct {
857     rawSeqStore_t seqStore;         /* External match candidates store for this block */
858     U32 startPosInBlock;            /* Start position of the current match candidate */
859     U32 endPosInBlock;              /* End position of the current match candidate */
860     U32 offset;                     /* Offset of the match candidate */
861 } ZSTD_optLdm_t;
862 
863 /* ZSTD_optLdm_skipRawSeqStoreBytes():
864  * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
865  */
ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t * rawSeqStore,size_t nbBytes)866 static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
867     U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
868     while (currPos && rawSeqStore->pos < rawSeqStore->size) {
869         rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
870         if (currPos >= currSeq.litLength + currSeq.matchLength) {
871             currPos -= currSeq.litLength + currSeq.matchLength;
872             rawSeqStore->pos++;
873         } else {
874             rawSeqStore->posInSequence = currPos;
875             break;
876         }
877     }
878     if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
879         rawSeqStore->posInSequence = 0;
880     }
881 }
882 
883 /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
884  * Calculates the beginning and end of the next match in the current block.
885  * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
886  */
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t * optLdm,U32 currPosInBlock,U32 blockBytesRemaining)887 static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
888                                                    U32 blockBytesRemaining) {
889     rawSeq currSeq;
890     U32 currBlockEndPos;
891     U32 literalsBytesRemaining;
892     U32 matchBytesRemaining;
893 
894     /* Setting match end position to MAX to ensure we never use an LDM during this block */
895     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
896         optLdm->startPosInBlock = UINT_MAX;
897         optLdm->endPosInBlock = UINT_MAX;
898         return;
899     }
900     /* Calculate appropriate bytes left in matchLength and litLength after adjusting
901        based on ldmSeqStore->posInSequence */
902     currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
903     assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
904     currBlockEndPos = currPosInBlock + blockBytesRemaining;
905     literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
906             currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
907             0;
908     matchBytesRemaining = (literalsBytesRemaining == 0) ?
909             currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
910             currSeq.matchLength;
911 
912     /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
913     if (literalsBytesRemaining >= blockBytesRemaining) {
914         optLdm->startPosInBlock = UINT_MAX;
915         optLdm->endPosInBlock = UINT_MAX;
916         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
917         return;
918     }
919 
920     /* Matches may be < MINMATCH by this process. In that case, we will reject them
921        when we are deciding whether or not to add the ldm */
922     optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
923     optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
924     optLdm->offset = currSeq.offset;
925 
926     if (optLdm->endPosInBlock > currBlockEndPos) {
927         /* Match ends after the block ends, we can't use the whole match */
928         optLdm->endPosInBlock = currBlockEndPos;
929         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
930     } else {
931         /* Consume nb of bytes equal to size of sequence left */
932         ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
933     }
934 }
935 
936 /* ZSTD_optLdm_maybeAddMatch():
937  * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
938  * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
939  */
ZSTD_optLdm_maybeAddMatch(ZSTD_match_t * matches,U32 * nbMatches,ZSTD_optLdm_t * optLdm,U32 currPosInBlock)940 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
941                                       ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
942     U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
943     /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
944     U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
945     U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
946 
947     /* Ensure that current block position is not outside of the match */
948     if (currPosInBlock < optLdm->startPosInBlock
949       || currPosInBlock >= optLdm->endPosInBlock
950       || candidateMatchLength < MINMATCH) {
951         return;
952     }
953 
954     if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
955         DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
956                  candidateOffCode, candidateMatchLength, currPosInBlock);
957         matches[*nbMatches].len = candidateMatchLength;
958         matches[*nbMatches].off = candidateOffCode;
959         (*nbMatches)++;
960     }
961 }
962 
963 /* ZSTD_optLdm_processMatchCandidate():
964  * Wrapper function to update ldm seq store and call ldm functions as necessary.
965  */
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t * optLdm,ZSTD_match_t * matches,U32 * nbMatches,U32 currPosInBlock,U32 remainingBytes)966 static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
967                                               U32 currPosInBlock, U32 remainingBytes) {
968     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
969         return;
970     }
971 
972     if (currPosInBlock >= optLdm->endPosInBlock) {
973         if (currPosInBlock > optLdm->endPosInBlock) {
974             /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
975              * at the end of a match from the ldm seq store, and will often be some bytes
976              * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
977              */
978             U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
979             ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
980         }
981         ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
982     }
983     ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
984 }
985 
986 
987 /*-*******************************
988 *  Optimal parser
989 *********************************/
990 
ZSTD_totalLen(ZSTD_optimal_t sol)991 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
992 {
993     return sol.litlen + sol.mlen;
994 }
995 
996 #if 0 /* debug */
997 
998 static void
999 listStats(const U32* table, int lastEltID)
1000 {
1001     int const nbElts = lastEltID + 1;
1002     int enb;
1003     for (enb=0; enb < nbElts; enb++) {
1004         (void)table;
1005         /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
1006         RAWLOG(2, "%4i,", table[enb]);
1007     }
1008     RAWLOG(2, " \n");
1009 }
1010 
1011 #endif
1012 
1013 FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const int optLevel,const ZSTD_dictMode_e dictMode)1014 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1015                                seqStore_t* seqStore,
1016                                U32 rep[ZSTD_REP_NUM],
1017                          const void* src, size_t srcSize,
1018                          const int optLevel,
1019                          const ZSTD_dictMode_e dictMode)
1020 {
1021     optState_t* const optStatePtr = &ms->opt;
1022     const BYTE* const istart = (const BYTE*)src;
1023     const BYTE* ip = istart;
1024     const BYTE* anchor = istart;
1025     const BYTE* const iend = istart + srcSize;
1026     const BYTE* const ilimit = iend - 8;
1027     const BYTE* const base = ms->window.base;
1028     const BYTE* const prefixStart = base + ms->window.dictLimit;
1029     const ZSTD_compressionParameters* const cParams = &ms->cParams;
1030 
1031     ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1032 
1033     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1034     U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1035     U32 nextToUpdate3 = ms->nextToUpdate;
1036 
1037     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1038     ZSTD_match_t* const matches = optStatePtr->matchTable;
1039     ZSTD_optimal_t lastSequence;
1040     ZSTD_optLdm_t optLdm;
1041 
1042     optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1043     optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1044     ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1045 
1046     /* init */
1047     DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1048                 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1049     assert(optLevel <= 2);
1050     ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1051     ip += (ip==prefixStart);
1052 
1053     /* Match Loop */
1054     while (ip < ilimit) {
1055         U32 cur, last_pos = 0;
1056 
1057         /* find first match */
1058         {   U32 const litlen = (U32)(ip - anchor);
1059             U32 const ll0 = !litlen;
1060             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1061             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1062                                               (U32)(ip-istart), (U32)(iend - ip));
1063             if (!nbMatches) { ip++; continue; }
1064 
1065             /* initialize opt[0] */
1066             { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
1067             opt[0].mlen = 0;  /* means is_a_literal */
1068             opt[0].litlen = litlen;
1069             /* We don't need to include the actual price of the literals because
1070              * it is static for the duration of the forward pass, and is included
1071              * in every price. We include the literal length to avoid negative
1072              * prices when we subtract the previous literal length.
1073              */
1074             opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1075 
1076             /* large match -> immediate encoding */
1077             {   U32 const maxML = matches[nbMatches-1].len;
1078                 U32 const maxOffset = matches[nbMatches-1].off;
1079                 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
1080                             nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
1081 
1082                 if (maxML > sufficient_len) {
1083                     lastSequence.litlen = litlen;
1084                     lastSequence.mlen = maxML;
1085                     lastSequence.off = maxOffset;
1086                     DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1087                                 maxML, sufficient_len);
1088                     cur = 0;
1089                     last_pos = ZSTD_totalLen(lastSequence);
1090                     goto _shortestPath;
1091             }   }
1092 
1093             /* set prices for first matches starting position == 0 */
1094             assert(opt[0].price >= 0);
1095             {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1096                 U32 pos;
1097                 U32 matchNb;
1098                 for (pos = 1; pos < minMatch; pos++) {
1099                     opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
1100                 }
1101                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1102                     U32 const offset = matches[matchNb].off;
1103                     U32 const end = matches[matchNb].len;
1104                     for ( ; pos <= end ; pos++ ) {
1105                         U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
1106                         U32 const sequencePrice = literalsPrice + matchPrice;
1107                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1108                                     pos, ZSTD_fCost(sequencePrice));
1109                         opt[pos].mlen = pos;
1110                         opt[pos].off = offset;
1111                         opt[pos].litlen = litlen;
1112                         opt[pos].price = (int)sequencePrice;
1113                 }   }
1114                 last_pos = pos-1;
1115             }
1116         }
1117 
1118         /* check further positions */
1119         for (cur = 1; cur <= last_pos; cur++) {
1120             const BYTE* const inr = ip + cur;
1121             assert(cur < ZSTD_OPT_NUM);
1122             DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1123 
1124             /* Fix current position with one literal if cheaper */
1125             {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1126                 int const price = opt[cur-1].price
1127                                 + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1128                                 + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1129                                 - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1130                 assert(price < 1000000000); /* overflow check */
1131                 if (price <= opt[cur].price) {
1132                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1133                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1134                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1135                     opt[cur].mlen = 0;
1136                     opt[cur].off = 0;
1137                     opt[cur].litlen = litlen;
1138                     opt[cur].price = price;
1139                 } else {
1140                     DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1141                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1142                                 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1143                 }
1144             }
1145 
1146             /* Set the repcodes of the current position. We must do it here
1147              * because we rely on the repcodes of the 2nd to last sequence being
1148              * correct to set the next chunks repcodes during the backward
1149              * traversal.
1150              */
1151             ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1152             assert(cur >= opt[cur].mlen);
1153             if (opt[cur].mlen != 0) {
1154                 U32 const prev = cur - opt[cur].mlen;
1155                 repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1156                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1157             } else {
1158                 ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1159             }
1160 
1161             /* last match must start at a minimum distance of 8 from oend */
1162             if (inr > ilimit) continue;
1163 
1164             if (cur == last_pos) break;
1165 
1166             if ( (optLevel==0) /*static_test*/
1167               && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1168                 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1169                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1170             }
1171 
1172             assert(opt[cur].price >= 0);
1173             {   U32 const ll0 = (opt[cur].mlen != 0);
1174                 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1175                 U32 const previousPrice = (U32)opt[cur].price;
1176                 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1177                 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1178                 U32 matchNb;
1179 
1180                 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1181                                                   (U32)(inr-istart), (U32)(iend-inr));
1182 
1183                 if (!nbMatches) {
1184                     DEBUGLOG(7, "rPos:%u : no match found", cur);
1185                     continue;
1186                 }
1187 
1188                 {   U32 const maxML = matches[nbMatches-1].len;
1189                     DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1190                                 inr-istart, cur, nbMatches, maxML);
1191 
1192                     if ( (maxML > sufficient_len)
1193                       || (cur + maxML >= ZSTD_OPT_NUM) ) {
1194                         lastSequence.mlen = maxML;
1195                         lastSequence.off = matches[nbMatches-1].off;
1196                         lastSequence.litlen = litlen;
1197                         cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1198                         last_pos = cur + ZSTD_totalLen(lastSequence);
1199                         if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
1200                         goto _shortestPath;
1201                 }   }
1202 
1203                 /* set prices using matches found at position == cur */
1204                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1205                     U32 const offset = matches[matchNb].off;
1206                     U32 const lastML = matches[matchNb].len;
1207                     U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1208                     U32 mlen;
1209 
1210                     DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1211                                 matchNb, matches[matchNb].off, lastML, litlen);
1212 
1213                     for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
1214                         U32 const pos = cur + mlen;
1215                         int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1216 
1217                         if ((pos > last_pos) || (price < opt[pos].price)) {
1218                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1219                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1220                             while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
1221                             opt[pos].mlen = mlen;
1222                             opt[pos].off = offset;
1223                             opt[pos].litlen = litlen;
1224                             opt[pos].price = price;
1225                         } else {
1226                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1227                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1228                             if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1229                         }
1230             }   }   }
1231         }  /* for (cur = 1; cur <= last_pos; cur++) */
1232 
1233         lastSequence = opt[last_pos];
1234         cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
1235         assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
1236 
1237 _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
1238         assert(opt[0].mlen == 0);
1239 
1240         /* Set the next chunk's repcodes based on the repcodes of the beginning
1241          * of the last match, and the last sequence. This avoids us having to
1242          * update them while traversing the sequences.
1243          */
1244         if (lastSequence.mlen != 0) {
1245             repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1246             ZSTD_memcpy(rep, &reps, sizeof(reps));
1247         } else {
1248             ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1249         }
1250 
1251         {   U32 const storeEnd = cur + 1;
1252             U32 storeStart = storeEnd;
1253             U32 seqPos = cur;
1254 
1255             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1256                         last_pos, cur); (void)last_pos;
1257             assert(storeEnd < ZSTD_OPT_NUM);
1258             DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1259                         storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1260             opt[storeEnd] = lastSequence;
1261             while (seqPos > 0) {
1262                 U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1263                 storeStart--;
1264                 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1265                             seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1266                 opt[storeStart] = opt[seqPos];
1267                 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1268             }
1269 
1270             /* save sequences */
1271             DEBUGLOG(6, "sending selected sequences into seqStore")
1272             {   U32 storePos;
1273                 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1274                     U32 const llen = opt[storePos].litlen;
1275                     U32 const mlen = opt[storePos].mlen;
1276                     U32 const offCode = opt[storePos].off;
1277                     U32 const advance = llen + mlen;
1278                     DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1279                                 anchor - istart, (unsigned)llen, (unsigned)mlen);
1280 
1281                     if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
1282                         assert(storePos == storeEnd);   /* must be last sequence */
1283                         ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
1284                         continue;   /* will finish */
1285                     }
1286 
1287                     assert(anchor + llen <= iend);
1288                     ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1289                     ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
1290                     anchor += advance;
1291                     ip = anchor;
1292             }   }
1293             ZSTD_setBasePrices(optStatePtr, optLevel);
1294         }
1295     }   /* while (ip < ilimit) */
1296 
1297     /* Return the last literals size */
1298     return (size_t)(iend - anchor);
1299 }
1300 
ZSTD_compressBlock_opt0(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1301 static size_t ZSTD_compressBlock_opt0(
1302         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1303         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1304 {
1305     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1306 }
1307 
ZSTD_compressBlock_opt2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1308 static size_t ZSTD_compressBlock_opt2(
1309         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1310         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1311 {
1312     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1313 }
1314 
ZSTD_compressBlock_btopt(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1315 size_t ZSTD_compressBlock_btopt(
1316         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1317         const void* src, size_t srcSize)
1318 {
1319     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1320     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1321 }
1322 
1323 
1324 
1325 
1326 /* ZSTD_initStats_ultra():
1327  * make a first compression pass, just to seed stats with more accurate starting values.
1328  * only works on first block, with no dictionary and no ldm.
1329  * this function cannot error, hence its contract must be respected.
1330  */
1331 static void
ZSTD_initStats_ultra(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1332 ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1333                      seqStore_t* seqStore,
1334                      U32 rep[ZSTD_REP_NUM],
1335                const void* src, size_t srcSize)
1336 {
1337     U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
1338     ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1339 
1340     DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1341     assert(ms->opt.litLengthSum == 0);    /* first block */
1342     assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
1343     assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
1344     assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
1345 
1346     ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
1347 
1348     /* invalidate first scan from history */
1349     ZSTD_resetSeqStore(seqStore);
1350     ms->window.base -= srcSize;
1351     ms->window.dictLimit += (U32)srcSize;
1352     ms->window.lowLimit = ms->window.dictLimit;
1353     ms->nextToUpdate = ms->window.dictLimit;
1354 
1355 }
1356 
ZSTD_compressBlock_btultra(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1357 size_t ZSTD_compressBlock_btultra(
1358         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1359         const void* src, size_t srcSize)
1360 {
1361     DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1362     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1363 }
1364 
ZSTD_compressBlock_btultra2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1365 size_t ZSTD_compressBlock_btultra2(
1366         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1367         const void* src, size_t srcSize)
1368 {
1369     U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1370     DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1371 
1372     /* 2-pass strategy:
1373      * this strategy makes a first pass over first block to collect statistics
1374      * and seed next round's statistics with it.
1375      * After 1st pass, function forgets everything, and starts a new block.
1376      * Consequently, this can only work if no data has been previously loaded in tables,
1377      * aka, no dictionary, no prefix, no ldm preprocessing.
1378      * The compression ratio gain is generally small (~0.5% on first block),
1379      * the cost is 2x cpu time on first block. */
1380     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1381     if ( (ms->opt.litLengthSum==0)   /* first block */
1382       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
1383       && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
1384       && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
1385       && (srcSize > ZSTD_PREDEF_THRESHOLD)
1386       ) {
1387         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1388     }
1389 
1390     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1391 }
1392 
ZSTD_compressBlock_btopt_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1393 size_t ZSTD_compressBlock_btopt_dictMatchState(
1394         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1395         const void* src, size_t srcSize)
1396 {
1397     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1398 }
1399 
ZSTD_compressBlock_btultra_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1400 size_t ZSTD_compressBlock_btultra_dictMatchState(
1401         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1402         const void* src, size_t srcSize)
1403 {
1404     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1405 }
1406 
ZSTD_compressBlock_btopt_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1407 size_t ZSTD_compressBlock_btopt_extDict(
1408         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1409         const void* src, size_t srcSize)
1410 {
1411     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1412 }
1413 
ZSTD_compressBlock_btultra_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1414 size_t ZSTD_compressBlock_btultra_extDict(
1415         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1416         const void* src, size_t srcSize)
1417 {
1418     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1419 }
1420 
1421 /* note : no btultra2 variant for extDict nor dictMatchState,
1422  * because btultra2 is not meant to work with dictionaries
1423  * and is only specific for the first block (no prefix) */
1424