1 /*
2 * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 #include "zstd_compress_internal.h"
12 #include "hist.h"
13 #include "zstd_opt.h"
14
15
16 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17 #define ZSTD_MAX_PRICE (1<<30)
18
19 #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
20
21
22 /*-*************************************
23 * Price functions for optimal parser
24 ***************************************/
25
26 #if 0 /* approximation at bit level (for tests) */
27 # define BITCOST_ACCURACY 0
28 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29 # define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
30 #elif 0 /* fractional bit accuracy (for tests) */
31 # define BITCOST_ACCURACY 8
32 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33 # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
34 #else /* opt==approx, ultra==accurate */
35 # define BITCOST_ACCURACY 8
36 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37 # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
38 #endif
39
ZSTD_bitWeight(U32 stat)40 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
41 {
42 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
43 }
44
ZSTD_fracWeight(U32 rawStat)45 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
46 {
47 U32 const stat = rawStat + 1;
48 U32 const hb = ZSTD_highbit32(stat);
49 U32 const BWeight = hb * BITCOST_MULTIPLIER;
50 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
51 U32 const weight = BWeight + FWeight;
52 assert(hb + BITCOST_ACCURACY < 31);
53 return weight;
54 }
55
56 #if (DEBUGLEVEL>=2)
57 /* debugging function,
58 * @return price in bytes as fractional value
59 * for debug messages only */
ZSTD_fCost(U32 price)60 MEM_STATIC double ZSTD_fCost(U32 price)
61 {
62 return (double)price / (BITCOST_MULTIPLIER*8);
63 }
64 #endif
65
ZSTD_compressedLiterals(optState_t const * const optPtr)66 static int ZSTD_compressedLiterals(optState_t const* const optPtr)
67 {
68 return optPtr->literalCompressionMode != ZSTD_ps_disable;
69 }
70
ZSTD_setBasePrices(optState_t * optPtr,int optLevel)71 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
72 {
73 if (ZSTD_compressedLiterals(optPtr))
74 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
75 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
76 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
77 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
78 }
79
80
sum_u32(const unsigned table[],size_t nbElts)81 static U32 sum_u32(const unsigned table[], size_t nbElts)
82 {
83 size_t n;
84 U32 total = 0;
85 for (n=0; n<nbElts; n++) {
86 total += table[n];
87 }
88 return total;
89 }
90
ZSTD_downscaleStats(unsigned * table,U32 lastEltIndex,U32 shift)91 static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
92 {
93 U32 s, sum=0;
94 DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
95 assert(shift < 30);
96 for (s=0; s<lastEltIndex+1; s++) {
97 table[s] = 1 + (table[s] >> shift);
98 sum += table[s];
99 }
100 return sum;
101 }
102
103 /* ZSTD_scaleStats() :
104 * reduce all elements in table is sum too large
105 * return the resulting sum of elements */
ZSTD_scaleStats(unsigned * table,U32 lastEltIndex,U32 logTarget)106 static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
107 {
108 U32 const prevsum = sum_u32(table, lastEltIndex+1);
109 U32 const factor = prevsum >> logTarget;
110 DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111 assert(logTarget < 30);
112 if (factor <= 1) return prevsum;
113 return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
114 }
115
116 /* ZSTD_rescaleFreqs() :
117 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
118 * take hints from dictionary if there is one
119 * and init from zero if there is none,
120 * using src for literals stats, and baseline stats for sequence symbols
121 * otherwise downscale existing stats, to be used as seed for next block.
122 */
123 static void
ZSTD_rescaleFreqs(optState_t * const optPtr,const BYTE * const src,size_t const srcSize,int const optLevel)124 ZSTD_rescaleFreqs(optState_t* const optPtr,
125 const BYTE* const src, size_t const srcSize,
126 int const optLevel)
127 {
128 int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
129 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
130 optPtr->priceType = zop_dynamic;
131
132 if (optPtr->litLengthSum == 0) { /* first block : init */
133 if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
134 DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
135 optPtr->priceType = zop_predef;
136 }
137
138 assert(optPtr->symbolCosts != NULL);
139 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
140 /* huffman table presumed generated by dictionary */
141 optPtr->priceType = zop_dynamic;
142
143 if (compressedLiterals) {
144 unsigned lit;
145 assert(optPtr->litFreq != NULL);
146 optPtr->litSum = 0;
147 for (lit=0; lit<=MaxLit; lit++) {
148 U32 const scaleLog = 11; /* scale to 2K */
149 U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
150 assert(bitCost <= scaleLog);
151 optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
152 optPtr->litSum += optPtr->litFreq[lit];
153 } }
154
155 { unsigned ll;
156 FSE_CState_t llstate;
157 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
158 optPtr->litLengthSum = 0;
159 for (ll=0; ll<=MaxLL; ll++) {
160 U32 const scaleLog = 10; /* scale to 1K */
161 U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
162 assert(bitCost < scaleLog);
163 optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
164 optPtr->litLengthSum += optPtr->litLengthFreq[ll];
165 } }
166
167 { unsigned ml;
168 FSE_CState_t mlstate;
169 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
170 optPtr->matchLengthSum = 0;
171 for (ml=0; ml<=MaxML; ml++) {
172 U32 const scaleLog = 10;
173 U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
174 assert(bitCost < scaleLog);
175 optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
176 optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
177 } }
178
179 { unsigned of;
180 FSE_CState_t ofstate;
181 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
182 optPtr->offCodeSum = 0;
183 for (of=0; of<=MaxOff; of++) {
184 U32 const scaleLog = 10;
185 U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
186 assert(bitCost < scaleLog);
187 optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
188 optPtr->offCodeSum += optPtr->offCodeFreq[of];
189 } }
190
191 } else { /* not a dictionary */
192
193 assert(optPtr->litFreq != NULL);
194 if (compressedLiterals) {
195 unsigned lit = MaxLit;
196 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
197 optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
198 }
199
200 { unsigned const baseLLfreqs[MaxLL+1] = {
201 4, 2, 1, 1, 1, 1, 1, 1,
202 1, 1, 1, 1, 1, 1, 1, 1,
203 1, 1, 1, 1, 1, 1, 1, 1,
204 1, 1, 1, 1, 1, 1, 1, 1,
205 1, 1, 1, 1
206 };
207 ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
208 }
209
210 { unsigned ml;
211 for (ml=0; ml<=MaxML; ml++)
212 optPtr->matchLengthFreq[ml] = 1;
213 }
214 optPtr->matchLengthSum = MaxML+1;
215
216 { unsigned const baseOFCfreqs[MaxOff+1] = {
217 6, 2, 1, 1, 2, 3, 4, 4,
218 4, 3, 2, 1, 1, 1, 1, 1,
219 1, 1, 1, 1, 1, 1, 1, 1,
220 1, 1, 1, 1, 1, 1, 1, 1
221 };
222 ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
223 }
224
225
226 }
227
228 } else { /* new block : re-use previous statistics, scaled down */
229
230 if (compressedLiterals)
231 optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
232 optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
233 optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
234 optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
235 }
236
237 ZSTD_setBasePrices(optPtr, optLevel);
238 }
239
240 /* ZSTD_rawLiteralsCost() :
241 * price of literals (only) in specified segment (which length can be 0).
242 * does not include price of literalLength symbol */
ZSTD_rawLiteralsCost(const BYTE * const literals,U32 const litLength,const optState_t * const optPtr,int optLevel)243 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
244 const optState_t* const optPtr,
245 int optLevel)
246 {
247 if (litLength == 0) return 0;
248
249 if (!ZSTD_compressedLiterals(optPtr))
250 return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
251
252 if (optPtr->priceType == zop_predef)
253 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
254
255 /* dynamic statistics */
256 { U32 price = litLength * optPtr->litSumBasePrice;
257 U32 u;
258 for (u=0; u < litLength; u++) {
259 assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
260 price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
261 }
262 return price;
263 }
264 }
265
266 /* ZSTD_litLengthPrice() :
267 * cost of literalLength symbol */
ZSTD_litLengthPrice(U32 const litLength,const optState_t * const optPtr,int optLevel)268 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
269 {
270 if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
271
272 /* dynamic statistics */
273 { U32 const llCode = ZSTD_LLcode(litLength);
274 return (LL_bits[llCode] * BITCOST_MULTIPLIER)
275 + optPtr->litLengthSumBasePrice
276 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
277 }
278 }
279
280 /* ZSTD_getMatchPrice() :
281 * Provides the cost of the match part (offset + matchLength) of a sequence
282 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
283 * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
284 FORCE_INLINE_TEMPLATE U32
ZSTD_getMatchPrice(U32 const offset,U32 const matchLength,const optState_t * const optPtr,int const optLevel)285 ZSTD_getMatchPrice(U32 const offset,
286 U32 const matchLength,
287 const optState_t* const optPtr,
288 int const optLevel)
289 {
290 U32 price;
291 U32 const offCode = ZSTD_highbit32(offset+1);
292 U32 const mlBase = matchLength - MINMATCH;
293 assert(matchLength >= MINMATCH);
294
295 if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
296 return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
297
298 /* dynamic statistics */
299 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
300 if ((optLevel<2) /*static*/ && offCode >= 20)
301 price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
302
303 /* match Length */
304 { U32 const mlCode = ZSTD_MLcode(mlBase);
305 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
306 }
307
308 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
309
310 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
311 return price;
312 }
313
314 /* ZSTD_updateStats() :
315 * assumption : literals + litLengtn <= iend */
ZSTD_updateStats(optState_t * const optPtr,U32 litLength,const BYTE * literals,U32 offsetCode,U32 matchLength)316 static void ZSTD_updateStats(optState_t* const optPtr,
317 U32 litLength, const BYTE* literals,
318 U32 offsetCode, U32 matchLength)
319 {
320 /* literals */
321 if (ZSTD_compressedLiterals(optPtr)) {
322 U32 u;
323 for (u=0; u < litLength; u++)
324 optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
325 optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
326 }
327
328 /* literal Length */
329 { U32 const llCode = ZSTD_LLcode(litLength);
330 optPtr->litLengthFreq[llCode]++;
331 optPtr->litLengthSum++;
332 }
333
334 /* match offset code (0-2=>repCode; 3+=>offset+2) */
335 { U32 const offCode = ZSTD_highbit32(offsetCode+1);
336 assert(offCode <= MaxOff);
337 optPtr->offCodeFreq[offCode]++;
338 optPtr->offCodeSum++;
339 }
340
341 /* match Length */
342 { U32 const mlBase = matchLength - MINMATCH;
343 U32 const mlCode = ZSTD_MLcode(mlBase);
344 optPtr->matchLengthFreq[mlCode]++;
345 optPtr->matchLengthSum++;
346 }
347 }
348
349
350 /* ZSTD_readMINMATCH() :
351 * function safe only for comparisons
352 * assumption : memPtr must be at least 4 bytes before end of buffer */
ZSTD_readMINMATCH(const void * memPtr,U32 length)353 MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
354 {
355 switch (length)
356 {
357 default :
358 case 4 : return MEM_read32(memPtr);
359 case 3 : if (MEM_isLittleEndian())
360 return MEM_read32(memPtr)<<8;
361 else
362 return MEM_read32(memPtr)>>8;
363 }
364 }
365
366
367 /* Update hashTable3 up to ip (excluded)
368 Assumption : always within prefix (i.e. not within extDict) */
ZSTD_insertAndFindFirstIndexHash3(const ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip)369 static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
370 U32* nextToUpdate3,
371 const BYTE* const ip)
372 {
373 U32* const hashTable3 = ms->hashTable3;
374 U32 const hashLog3 = ms->hashLog3;
375 const BYTE* const base = ms->window.base;
376 U32 idx = *nextToUpdate3;
377 U32 const target = (U32)(ip - base);
378 size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
379 assert(hashLog3 > 0);
380
381 while(idx < target) {
382 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
383 idx++;
384 }
385
386 *nextToUpdate3 = target;
387 return hashTable3[hash3];
388 }
389
390
391 /*-*************************************
392 * Binary Tree search
393 ***************************************/
394 /** ZSTD_insertBt1() : add one or multiple positions to tree.
395 * @param ip assumed <= iend-8 .
396 * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
397 * @return : nb of positions added */
ZSTD_insertBt1(const ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,U32 const target,U32 const mls,const int extDict)398 static U32 ZSTD_insertBt1(
399 const ZSTD_matchState_t* ms,
400 const BYTE* const ip, const BYTE* const iend,
401 U32 const target,
402 U32 const mls, const int extDict)
403 {
404 const ZSTD_compressionParameters* const cParams = &ms->cParams;
405 U32* const hashTable = ms->hashTable;
406 U32 const hashLog = cParams->hashLog;
407 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
408 U32* const bt = ms->chainTable;
409 U32 const btLog = cParams->chainLog - 1;
410 U32 const btMask = (1 << btLog) - 1;
411 U32 matchIndex = hashTable[h];
412 size_t commonLengthSmaller=0, commonLengthLarger=0;
413 const BYTE* const base = ms->window.base;
414 const BYTE* const dictBase = ms->window.dictBase;
415 const U32 dictLimit = ms->window.dictLimit;
416 const BYTE* const dictEnd = dictBase + dictLimit;
417 const BYTE* const prefixStart = base + dictLimit;
418 const BYTE* match;
419 const U32 curr = (U32)(ip-base);
420 const U32 btLow = btMask >= curr ? 0 : curr - btMask;
421 U32* smallerPtr = bt + 2*(curr&btMask);
422 U32* largerPtr = smallerPtr + 1;
423 U32 dummy32; /* to be nullified at the end */
424 /* windowLow is based on target because
425 * we only need positions that will be in the window at the end of the tree update.
426 */
427 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
428 U32 matchEndIdx = curr+8+1;
429 size_t bestLength = 8;
430 U32 nbCompares = 1U << cParams->searchLog;
431 #ifdef ZSTD_C_PREDICT
432 U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
433 U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
434 predictedSmall += (predictedSmall>0);
435 predictedLarge += (predictedLarge>0);
436 #endif /* ZSTD_C_PREDICT */
437
438 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
439
440 assert(curr <= target);
441 assert(ip <= iend-8); /* required for h calculation */
442 hashTable[h] = curr; /* Update Hash Table */
443
444 assert(windowLow > 0);
445 for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
446 U32* const nextPtr = bt + 2*(matchIndex & btMask);
447 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
448 assert(matchIndex < curr);
449
450 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
451 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
452 if (matchIndex == predictedSmall) {
453 /* no need to check length, result known */
454 *smallerPtr = matchIndex;
455 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
456 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
457 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
458 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
459 continue;
460 }
461 if (matchIndex == predictedLarge) {
462 *largerPtr = matchIndex;
463 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
464 largerPtr = nextPtr;
465 matchIndex = nextPtr[0];
466 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
467 continue;
468 }
469 #endif
470
471 if (!extDict || (matchIndex+matchLength >= dictLimit)) {
472 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
473 match = base + matchIndex;
474 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
475 } else {
476 match = dictBase + matchIndex;
477 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
478 if (matchIndex+matchLength >= dictLimit)
479 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
480 }
481
482 if (matchLength > bestLength) {
483 bestLength = matchLength;
484 if (matchLength > matchEndIdx - matchIndex)
485 matchEndIdx = matchIndex + (U32)matchLength;
486 }
487
488 if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
489 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
490 }
491
492 if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
493 /* match is smaller than current */
494 *smallerPtr = matchIndex; /* update smaller idx */
495 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
496 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
497 smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
498 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
499 } else {
500 /* match is larger than current */
501 *largerPtr = matchIndex;
502 commonLengthLarger = matchLength;
503 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
504 largerPtr = nextPtr;
505 matchIndex = nextPtr[0];
506 } }
507
508 *smallerPtr = *largerPtr = 0;
509 { U32 positions = 0;
510 if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
511 assert(matchEndIdx > curr + 8);
512 return MAX(positions, matchEndIdx - (curr + 8));
513 }
514 }
515
516 FORCE_INLINE_TEMPLATE
ZSTD_updateTree_internal(ZSTD_matchState_t * ms,const BYTE * const ip,const BYTE * const iend,const U32 mls,const ZSTD_dictMode_e dictMode)517 void ZSTD_updateTree_internal(
518 ZSTD_matchState_t* ms,
519 const BYTE* const ip, const BYTE* const iend,
520 const U32 mls, const ZSTD_dictMode_e dictMode)
521 {
522 const BYTE* const base = ms->window.base;
523 U32 const target = (U32)(ip - base);
524 U32 idx = ms->nextToUpdate;
525 DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
526 idx, target, dictMode);
527
528 while(idx < target) {
529 U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
530 assert(idx < (U32)(idx + forward));
531 idx += forward;
532 }
533 assert((size_t)(ip - base) <= (size_t)(U32)(-1));
534 assert((size_t)(iend - base) <= (size_t)(U32)(-1));
535 ms->nextToUpdate = target;
536 }
537
ZSTD_updateTree(ZSTD_matchState_t * ms,const BYTE * ip,const BYTE * iend)538 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
539 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
540 }
541
542 FORCE_INLINE_TEMPLATE
ZSTD_insertBtAndGetAllMatches(ZSTD_match_t * matches,ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * const ip,const BYTE * const iLimit,const ZSTD_dictMode_e dictMode,const U32 rep[ZSTD_REP_NUM],U32 const ll0,const U32 lengthToBeat,U32 const mls)543 U32 ZSTD_insertBtAndGetAllMatches (
544 ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
545 ZSTD_matchState_t* ms,
546 U32* nextToUpdate3,
547 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
548 const U32 rep[ZSTD_REP_NUM],
549 U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
550 const U32 lengthToBeat,
551 U32 const mls /* template */)
552 {
553 const ZSTD_compressionParameters* const cParams = &ms->cParams;
554 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
555 const BYTE* const base = ms->window.base;
556 U32 const curr = (U32)(ip-base);
557 U32 const hashLog = cParams->hashLog;
558 U32 const minMatch = (mls==3) ? 3 : 4;
559 U32* const hashTable = ms->hashTable;
560 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
561 U32 matchIndex = hashTable[h];
562 U32* const bt = ms->chainTable;
563 U32 const btLog = cParams->chainLog - 1;
564 U32 const btMask= (1U << btLog) - 1;
565 size_t commonLengthSmaller=0, commonLengthLarger=0;
566 const BYTE* const dictBase = ms->window.dictBase;
567 U32 const dictLimit = ms->window.dictLimit;
568 const BYTE* const dictEnd = dictBase + dictLimit;
569 const BYTE* const prefixStart = base + dictLimit;
570 U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
571 U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
572 U32 const matchLow = windowLow ? windowLow : 1;
573 U32* smallerPtr = bt + 2*(curr&btMask);
574 U32* largerPtr = bt + 2*(curr&btMask) + 1;
575 U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
576 U32 dummy32; /* to be nullified at the end */
577 U32 mnum = 0;
578 U32 nbCompares = 1U << cParams->searchLog;
579
580 const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
581 const ZSTD_compressionParameters* const dmsCParams =
582 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
583 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
584 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
585 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
586 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
587 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
588 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
589 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
590 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
591 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
592
593 size_t bestLength = lengthToBeat-1;
594 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
595
596 /* check repCode */
597 assert(ll0 <= 1); /* necessarily 1 or 0 */
598 { U32 const lastR = ZSTD_REP_NUM + ll0;
599 U32 repCode;
600 for (repCode = ll0; repCode < lastR; repCode++) {
601 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
602 U32 const repIndex = curr - repOffset;
603 U32 repLen = 0;
604 assert(curr >= dictLimit);
605 if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
606 /* We must validate the repcode offset because when we're using a dictionary the
607 * valid offset range shrinks when the dictionary goes out of bounds.
608 */
609 if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
610 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
611 }
612 } else { /* repIndex < dictLimit || repIndex >= curr */
613 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
614 dmsBase + repIndex - dmsIndexDelta :
615 dictBase + repIndex;
616 assert(curr >= windowLow);
617 if ( dictMode == ZSTD_extDict
618 && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
619 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
620 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
621 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
622 }
623 if (dictMode == ZSTD_dictMatchState
624 && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
625 & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
626 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
627 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
628 } }
629 /* save longer solution */
630 if (repLen > bestLength) {
631 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
632 repCode, ll0, repOffset, repLen);
633 bestLength = repLen;
634 matches[mnum].off = repCode - ll0;
635 matches[mnum].len = (U32)repLen;
636 mnum++;
637 if ( (repLen > sufficient_len)
638 | (ip+repLen == iLimit) ) { /* best possible */
639 return mnum;
640 } } } }
641
642 /* HC3 match finder */
643 if ((mls == 3) /*static*/ && (bestLength < mls)) {
644 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
645 if ((matchIndex3 >= matchLow)
646 & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
647 size_t mlen;
648 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
649 const BYTE* const match = base + matchIndex3;
650 mlen = ZSTD_count(ip, match, iLimit);
651 } else {
652 const BYTE* const match = dictBase + matchIndex3;
653 mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
654 }
655
656 /* save best solution */
657 if (mlen >= mls /* == 3 > bestLength */) {
658 DEBUGLOG(8, "found small match with hlog3, of length %u",
659 (U32)mlen);
660 bestLength = mlen;
661 assert(curr > matchIndex3);
662 assert(mnum==0); /* no prior solution */
663 matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
664 matches[0].len = (U32)mlen;
665 mnum = 1;
666 if ( (mlen > sufficient_len) |
667 (ip+mlen == iLimit) ) { /* best possible length */
668 ms->nextToUpdate = curr+1; /* skip insertion */
669 return 1;
670 } } }
671 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
672 } /* if (mls == 3) */
673
674 hashTable[h] = curr; /* Update Hash Table */
675
676 for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
677 U32* const nextPtr = bt + 2*(matchIndex & btMask);
678 const BYTE* match;
679 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
680 assert(curr > matchIndex);
681
682 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
683 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
684 match = base + matchIndex;
685 if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
686 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
687 } else {
688 match = dictBase + matchIndex;
689 assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
690 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
691 if (matchIndex+matchLength >= dictLimit)
692 match = base + matchIndex; /* prepare for match[matchLength] read */
693 }
694
695 if (matchLength > bestLength) {
696 DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
697 (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
698 assert(matchEndIdx > matchIndex);
699 if (matchLength > matchEndIdx - matchIndex)
700 matchEndIdx = matchIndex + (U32)matchLength;
701 bestLength = matchLength;
702 matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
703 matches[mnum].len = (U32)matchLength;
704 mnum++;
705 if ( (matchLength > ZSTD_OPT_NUM)
706 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
707 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
708 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
709 } }
710
711 if (match[matchLength] < ip[matchLength]) {
712 /* match smaller than current */
713 *smallerPtr = matchIndex; /* update smaller idx */
714 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
715 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
716 smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
717 matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
718 } else {
719 *largerPtr = matchIndex;
720 commonLengthLarger = matchLength;
721 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
722 largerPtr = nextPtr;
723 matchIndex = nextPtr[0];
724 } }
725
726 *smallerPtr = *largerPtr = 0;
727
728 assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
729 if (dictMode == ZSTD_dictMatchState && nbCompares) {
730 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
731 U32 dictMatchIndex = dms->hashTable[dmsH];
732 const U32* const dmsBt = dms->chainTable;
733 commonLengthSmaller = commonLengthLarger = 0;
734 for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
735 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
736 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
737 const BYTE* match = dmsBase + dictMatchIndex;
738 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
739 if (dictMatchIndex+matchLength >= dmsHighLimit)
740 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
741
742 if (matchLength > bestLength) {
743 matchIndex = dictMatchIndex + dmsIndexDelta;
744 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
745 (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
746 if (matchLength > matchEndIdx - matchIndex)
747 matchEndIdx = matchIndex + (U32)matchLength;
748 bestLength = matchLength;
749 matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
750 matches[mnum].len = (U32)matchLength;
751 mnum++;
752 if ( (matchLength > ZSTD_OPT_NUM)
753 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
754 break; /* drop, to guarantee consistency (miss a little bit of compression) */
755 } }
756
757 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
758 if (match[matchLength] < ip[matchLength]) {
759 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
760 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
761 } else {
762 /* match is larger than current */
763 commonLengthLarger = matchLength;
764 dictMatchIndex = nextPtr[0];
765 } } } /* if (dictMode == ZSTD_dictMatchState) */
766
767 assert(matchEndIdx > curr+8);
768 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
769 return mnum;
770 }
771
772 typedef U32 (*ZSTD_getAllMatchesFn)(
773 ZSTD_match_t*,
774 ZSTD_matchState_t*,
775 U32*,
776 const BYTE*,
777 const BYTE*,
778 const U32 rep[ZSTD_REP_NUM],
779 U32 const ll0,
780 U32 const lengthToBeat);
781
ZSTD_btGetAllMatches_internal(ZSTD_match_t * matches,ZSTD_matchState_t * ms,U32 * nextToUpdate3,const BYTE * ip,const BYTE * const iHighLimit,const U32 rep[ZSTD_REP_NUM],U32 const ll0,U32 const lengthToBeat,const ZSTD_dictMode_e dictMode,const U32 mls)782 FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
783 ZSTD_match_t* matches,
784 ZSTD_matchState_t* ms,
785 U32* nextToUpdate3,
786 const BYTE* ip,
787 const BYTE* const iHighLimit,
788 const U32 rep[ZSTD_REP_NUM],
789 U32 const ll0,
790 U32 const lengthToBeat,
791 const ZSTD_dictMode_e dictMode,
792 const U32 mls)
793 {
794 assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
795 DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
796 if (ip < ms->window.base + ms->nextToUpdate)
797 return 0; /* skipped area */
798 ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
799 return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
800 }
801
802 #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
803
804 #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
805 static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
806 ZSTD_match_t* matches, \
807 ZSTD_matchState_t* ms, \
808 U32* nextToUpdate3, \
809 const BYTE* ip, \
810 const BYTE* const iHighLimit, \
811 const U32 rep[ZSTD_REP_NUM], \
812 U32 const ll0, \
813 U32 const lengthToBeat) \
814 { \
815 return ZSTD_btGetAllMatches_internal( \
816 matches, ms, nextToUpdate3, ip, iHighLimit, \
817 rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
818 }
819
820 #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
821 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
822 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
823 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
824 GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
825
826 GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)827 GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
828 GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
829
830 #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
831 { \
832 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
833 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
834 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
835 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
836 }
837
838 static ZSTD_getAllMatchesFn ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
839 {
840 ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
841 ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
842 ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
843 ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
844 };
845 U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
846 assert((U32)dictMode < 3);
847 assert(mls - 3 < 4);
848 return getAllMatchesFns[(int)dictMode][mls - 3];
849 }
850
851 /*************************
852 * LDM helper functions *
853 *************************/
854
855 /* Struct containing info needed to make decision about ldm inclusion */
856 typedef struct {
857 rawSeqStore_t seqStore; /* External match candidates store for this block */
858 U32 startPosInBlock; /* Start position of the current match candidate */
859 U32 endPosInBlock; /* End position of the current match candidate */
860 U32 offset; /* Offset of the match candidate */
861 } ZSTD_optLdm_t;
862
863 /* ZSTD_optLdm_skipRawSeqStoreBytes():
864 * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
865 */
ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t * rawSeqStore,size_t nbBytes)866 static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
867 U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
868 while (currPos && rawSeqStore->pos < rawSeqStore->size) {
869 rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
870 if (currPos >= currSeq.litLength + currSeq.matchLength) {
871 currPos -= currSeq.litLength + currSeq.matchLength;
872 rawSeqStore->pos++;
873 } else {
874 rawSeqStore->posInSequence = currPos;
875 break;
876 }
877 }
878 if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
879 rawSeqStore->posInSequence = 0;
880 }
881 }
882
883 /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
884 * Calculates the beginning and end of the next match in the current block.
885 * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
886 */
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t * optLdm,U32 currPosInBlock,U32 blockBytesRemaining)887 static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
888 U32 blockBytesRemaining) {
889 rawSeq currSeq;
890 U32 currBlockEndPos;
891 U32 literalsBytesRemaining;
892 U32 matchBytesRemaining;
893
894 /* Setting match end position to MAX to ensure we never use an LDM during this block */
895 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
896 optLdm->startPosInBlock = UINT_MAX;
897 optLdm->endPosInBlock = UINT_MAX;
898 return;
899 }
900 /* Calculate appropriate bytes left in matchLength and litLength after adjusting
901 based on ldmSeqStore->posInSequence */
902 currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
903 assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
904 currBlockEndPos = currPosInBlock + blockBytesRemaining;
905 literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
906 currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
907 0;
908 matchBytesRemaining = (literalsBytesRemaining == 0) ?
909 currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
910 currSeq.matchLength;
911
912 /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
913 if (literalsBytesRemaining >= blockBytesRemaining) {
914 optLdm->startPosInBlock = UINT_MAX;
915 optLdm->endPosInBlock = UINT_MAX;
916 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
917 return;
918 }
919
920 /* Matches may be < MINMATCH by this process. In that case, we will reject them
921 when we are deciding whether or not to add the ldm */
922 optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
923 optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
924 optLdm->offset = currSeq.offset;
925
926 if (optLdm->endPosInBlock > currBlockEndPos) {
927 /* Match ends after the block ends, we can't use the whole match */
928 optLdm->endPosInBlock = currBlockEndPos;
929 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
930 } else {
931 /* Consume nb of bytes equal to size of sequence left */
932 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
933 }
934 }
935
936 /* ZSTD_optLdm_maybeAddMatch():
937 * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
938 * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
939 */
ZSTD_optLdm_maybeAddMatch(ZSTD_match_t * matches,U32 * nbMatches,ZSTD_optLdm_t * optLdm,U32 currPosInBlock)940 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
941 ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
942 U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
943 /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
944 U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
945 U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
946
947 /* Ensure that current block position is not outside of the match */
948 if (currPosInBlock < optLdm->startPosInBlock
949 || currPosInBlock >= optLdm->endPosInBlock
950 || candidateMatchLength < MINMATCH) {
951 return;
952 }
953
954 if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
955 DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
956 candidateOffCode, candidateMatchLength, currPosInBlock);
957 matches[*nbMatches].len = candidateMatchLength;
958 matches[*nbMatches].off = candidateOffCode;
959 (*nbMatches)++;
960 }
961 }
962
963 /* ZSTD_optLdm_processMatchCandidate():
964 * Wrapper function to update ldm seq store and call ldm functions as necessary.
965 */
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t * optLdm,ZSTD_match_t * matches,U32 * nbMatches,U32 currPosInBlock,U32 remainingBytes)966 static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
967 U32 currPosInBlock, U32 remainingBytes) {
968 if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
969 return;
970 }
971
972 if (currPosInBlock >= optLdm->endPosInBlock) {
973 if (currPosInBlock > optLdm->endPosInBlock) {
974 /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
975 * at the end of a match from the ldm seq store, and will often be some bytes
976 * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
977 */
978 U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
979 ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
980 }
981 ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
982 }
983 ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
984 }
985
986
987 /*-*******************************
988 * Optimal parser
989 *********************************/
990
ZSTD_totalLen(ZSTD_optimal_t sol)991 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
992 {
993 return sol.litlen + sol.mlen;
994 }
995
996 #if 0 /* debug */
997
998 static void
999 listStats(const U32* table, int lastEltID)
1000 {
1001 int const nbElts = lastEltID + 1;
1002 int enb;
1003 for (enb=0; enb < nbElts; enb++) {
1004 (void)table;
1005 /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
1006 RAWLOG(2, "%4i,", table[enb]);
1007 }
1008 RAWLOG(2, " \n");
1009 }
1010
1011 #endif
1012
1013 FORCE_INLINE_TEMPLATE size_t
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const int optLevel,const ZSTD_dictMode_e dictMode)1014 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1015 seqStore_t* seqStore,
1016 U32 rep[ZSTD_REP_NUM],
1017 const void* src, size_t srcSize,
1018 const int optLevel,
1019 const ZSTD_dictMode_e dictMode)
1020 {
1021 optState_t* const optStatePtr = &ms->opt;
1022 const BYTE* const istart = (const BYTE*)src;
1023 const BYTE* ip = istart;
1024 const BYTE* anchor = istart;
1025 const BYTE* const iend = istart + srcSize;
1026 const BYTE* const ilimit = iend - 8;
1027 const BYTE* const base = ms->window.base;
1028 const BYTE* const prefixStart = base + ms->window.dictLimit;
1029 const ZSTD_compressionParameters* const cParams = &ms->cParams;
1030
1031 ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
1032
1033 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
1034 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
1035 U32 nextToUpdate3 = ms->nextToUpdate;
1036
1037 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1038 ZSTD_match_t* const matches = optStatePtr->matchTable;
1039 ZSTD_optimal_t lastSequence;
1040 ZSTD_optLdm_t optLdm;
1041
1042 optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1043 optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1044 ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
1045
1046 /* init */
1047 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
1048 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
1049 assert(optLevel <= 2);
1050 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
1051 ip += (ip==prefixStart);
1052
1053 /* Match Loop */
1054 while (ip < ilimit) {
1055 U32 cur, last_pos = 0;
1056
1057 /* find first match */
1058 { U32 const litlen = (U32)(ip - anchor);
1059 U32 const ll0 = !litlen;
1060 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1061 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1062 (U32)(ip-istart), (U32)(iend - ip));
1063 if (!nbMatches) { ip++; continue; }
1064
1065 /* initialize opt[0] */
1066 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
1067 opt[0].mlen = 0; /* means is_a_literal */
1068 opt[0].litlen = litlen;
1069 /* We don't need to include the actual price of the literals because
1070 * it is static for the duration of the forward pass, and is included
1071 * in every price. We include the literal length to avoid negative
1072 * prices when we subtract the previous literal length.
1073 */
1074 opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1075
1076 /* large match -> immediate encoding */
1077 { U32 const maxML = matches[nbMatches-1].len;
1078 U32 const maxOffset = matches[nbMatches-1].off;
1079 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
1080 nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
1081
1082 if (maxML > sufficient_len) {
1083 lastSequence.litlen = litlen;
1084 lastSequence.mlen = maxML;
1085 lastSequence.off = maxOffset;
1086 DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1087 maxML, sufficient_len);
1088 cur = 0;
1089 last_pos = ZSTD_totalLen(lastSequence);
1090 goto _shortestPath;
1091 } }
1092
1093 /* set prices for first matches starting position == 0 */
1094 assert(opt[0].price >= 0);
1095 { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1096 U32 pos;
1097 U32 matchNb;
1098 for (pos = 1; pos < minMatch; pos++) {
1099 opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
1100 }
1101 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1102 U32 const offset = matches[matchNb].off;
1103 U32 const end = matches[matchNb].len;
1104 for ( ; pos <= end ; pos++ ) {
1105 U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
1106 U32 const sequencePrice = literalsPrice + matchPrice;
1107 DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1108 pos, ZSTD_fCost(sequencePrice));
1109 opt[pos].mlen = pos;
1110 opt[pos].off = offset;
1111 opt[pos].litlen = litlen;
1112 opt[pos].price = (int)sequencePrice;
1113 } }
1114 last_pos = pos-1;
1115 }
1116 }
1117
1118 /* check further positions */
1119 for (cur = 1; cur <= last_pos; cur++) {
1120 const BYTE* const inr = ip + cur;
1121 assert(cur < ZSTD_OPT_NUM);
1122 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1123
1124 /* Fix current position with one literal if cheaper */
1125 { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1126 int const price = opt[cur-1].price
1127 + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1128 + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1129 - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1130 assert(price < 1000000000); /* overflow check */
1131 if (price <= opt[cur].price) {
1132 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1133 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1134 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1135 opt[cur].mlen = 0;
1136 opt[cur].off = 0;
1137 opt[cur].litlen = litlen;
1138 opt[cur].price = price;
1139 } else {
1140 DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1141 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1142 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1143 }
1144 }
1145
1146 /* Set the repcodes of the current position. We must do it here
1147 * because we rely on the repcodes of the 2nd to last sequence being
1148 * correct to set the next chunks repcodes during the backward
1149 * traversal.
1150 */
1151 ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1152 assert(cur >= opt[cur].mlen);
1153 if (opt[cur].mlen != 0) {
1154 U32 const prev = cur - opt[cur].mlen;
1155 repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1156 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1157 } else {
1158 ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1159 }
1160
1161 /* last match must start at a minimum distance of 8 from oend */
1162 if (inr > ilimit) continue;
1163
1164 if (cur == last_pos) break;
1165
1166 if ( (optLevel==0) /*static_test*/
1167 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1168 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1169 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1170 }
1171
1172 assert(opt[cur].price >= 0);
1173 { U32 const ll0 = (opt[cur].mlen != 0);
1174 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1175 U32 const previousPrice = (U32)opt[cur].price;
1176 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1177 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1178 U32 matchNb;
1179
1180 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1181 (U32)(inr-istart), (U32)(iend-inr));
1182
1183 if (!nbMatches) {
1184 DEBUGLOG(7, "rPos:%u : no match found", cur);
1185 continue;
1186 }
1187
1188 { U32 const maxML = matches[nbMatches-1].len;
1189 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1190 inr-istart, cur, nbMatches, maxML);
1191
1192 if ( (maxML > sufficient_len)
1193 || (cur + maxML >= ZSTD_OPT_NUM) ) {
1194 lastSequence.mlen = maxML;
1195 lastSequence.off = matches[nbMatches-1].off;
1196 lastSequence.litlen = litlen;
1197 cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1198 last_pos = cur + ZSTD_totalLen(lastSequence);
1199 if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
1200 goto _shortestPath;
1201 } }
1202
1203 /* set prices using matches found at position == cur */
1204 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1205 U32 const offset = matches[matchNb].off;
1206 U32 const lastML = matches[matchNb].len;
1207 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1208 U32 mlen;
1209
1210 DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1211 matchNb, matches[matchNb].off, lastML, litlen);
1212
1213 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1214 U32 const pos = cur + mlen;
1215 int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1216
1217 if ((pos > last_pos) || (price < opt[pos].price)) {
1218 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1219 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1220 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
1221 opt[pos].mlen = mlen;
1222 opt[pos].off = offset;
1223 opt[pos].litlen = litlen;
1224 opt[pos].price = price;
1225 } else {
1226 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1227 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1228 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1229 }
1230 } } }
1231 } /* for (cur = 1; cur <= last_pos; cur++) */
1232
1233 lastSequence = opt[last_pos];
1234 cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
1235 assert(cur < ZSTD_OPT_NUM); /* control overflow*/
1236
1237 _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1238 assert(opt[0].mlen == 0);
1239
1240 /* Set the next chunk's repcodes based on the repcodes of the beginning
1241 * of the last match, and the last sequence. This avoids us having to
1242 * update them while traversing the sequences.
1243 */
1244 if (lastSequence.mlen != 0) {
1245 repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1246 ZSTD_memcpy(rep, &reps, sizeof(reps));
1247 } else {
1248 ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1249 }
1250
1251 { U32 const storeEnd = cur + 1;
1252 U32 storeStart = storeEnd;
1253 U32 seqPos = cur;
1254
1255 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1256 last_pos, cur); (void)last_pos;
1257 assert(storeEnd < ZSTD_OPT_NUM);
1258 DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1259 storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1260 opt[storeEnd] = lastSequence;
1261 while (seqPos > 0) {
1262 U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1263 storeStart--;
1264 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1265 seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1266 opt[storeStart] = opt[seqPos];
1267 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1268 }
1269
1270 /* save sequences */
1271 DEBUGLOG(6, "sending selected sequences into seqStore")
1272 { U32 storePos;
1273 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1274 U32 const llen = opt[storePos].litlen;
1275 U32 const mlen = opt[storePos].mlen;
1276 U32 const offCode = opt[storePos].off;
1277 U32 const advance = llen + mlen;
1278 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1279 anchor - istart, (unsigned)llen, (unsigned)mlen);
1280
1281 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1282 assert(storePos == storeEnd); /* must be last sequence */
1283 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
1284 continue; /* will finish */
1285 }
1286
1287 assert(anchor + llen <= iend);
1288 ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1289 ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
1290 anchor += advance;
1291 ip = anchor;
1292 } }
1293 ZSTD_setBasePrices(optStatePtr, optLevel);
1294 }
1295 } /* while (ip < ilimit) */
1296
1297 /* Return the last literals size */
1298 return (size_t)(iend - anchor);
1299 }
1300
ZSTD_compressBlock_opt0(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1301 static size_t ZSTD_compressBlock_opt0(
1302 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1303 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1304 {
1305 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1306 }
1307
ZSTD_compressBlock_opt2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize,const ZSTD_dictMode_e dictMode)1308 static size_t ZSTD_compressBlock_opt2(
1309 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1310 const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1311 {
1312 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1313 }
1314
ZSTD_compressBlock_btopt(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1315 size_t ZSTD_compressBlock_btopt(
1316 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1317 const void* src, size_t srcSize)
1318 {
1319 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1320 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1321 }
1322
1323
1324
1325
1326 /* ZSTD_initStats_ultra():
1327 * make a first compression pass, just to seed stats with more accurate starting values.
1328 * only works on first block, with no dictionary and no ldm.
1329 * this function cannot error, hence its contract must be respected.
1330 */
1331 static void
ZSTD_initStats_ultra(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1332 ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1333 seqStore_t* seqStore,
1334 U32 rep[ZSTD_REP_NUM],
1335 const void* src, size_t srcSize)
1336 {
1337 U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1338 ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
1339
1340 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
1341 assert(ms->opt.litLengthSum == 0); /* first block */
1342 assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
1343 assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
1344 assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1345
1346 ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1347
1348 /* invalidate first scan from history */
1349 ZSTD_resetSeqStore(seqStore);
1350 ms->window.base -= srcSize;
1351 ms->window.dictLimit += (U32)srcSize;
1352 ms->window.lowLimit = ms->window.dictLimit;
1353 ms->nextToUpdate = ms->window.dictLimit;
1354
1355 }
1356
ZSTD_compressBlock_btultra(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1357 size_t ZSTD_compressBlock_btultra(
1358 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1359 const void* src, size_t srcSize)
1360 {
1361 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1362 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1363 }
1364
ZSTD_compressBlock_btultra2(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1365 size_t ZSTD_compressBlock_btultra2(
1366 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1367 const void* src, size_t srcSize)
1368 {
1369 U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1370 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1371
1372 /* 2-pass strategy:
1373 * this strategy makes a first pass over first block to collect statistics
1374 * and seed next round's statistics with it.
1375 * After 1st pass, function forgets everything, and starts a new block.
1376 * Consequently, this can only work if no data has been previously loaded in tables,
1377 * aka, no dictionary, no prefix, no ldm preprocessing.
1378 * The compression ratio gain is generally small (~0.5% on first block),
1379 * the cost is 2x cpu time on first block. */
1380 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1381 if ( (ms->opt.litLengthSum==0) /* first block */
1382 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1383 && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1384 && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1385 && (srcSize > ZSTD_PREDEF_THRESHOLD)
1386 ) {
1387 ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1388 }
1389
1390 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1391 }
1392
ZSTD_compressBlock_btopt_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1393 size_t ZSTD_compressBlock_btopt_dictMatchState(
1394 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1395 const void* src, size_t srcSize)
1396 {
1397 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1398 }
1399
ZSTD_compressBlock_btultra_dictMatchState(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1400 size_t ZSTD_compressBlock_btultra_dictMatchState(
1401 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1402 const void* src, size_t srcSize)
1403 {
1404 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1405 }
1406
ZSTD_compressBlock_btopt_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1407 size_t ZSTD_compressBlock_btopt_extDict(
1408 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1409 const void* src, size_t srcSize)
1410 {
1411 return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1412 }
1413
ZSTD_compressBlock_btultra_extDict(ZSTD_matchState_t * ms,seqStore_t * seqStore,U32 rep[ZSTD_REP_NUM],const void * src,size_t srcSize)1414 size_t ZSTD_compressBlock_btultra_extDict(
1415 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1416 const void* src, size_t srcSize)
1417 {
1418 return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1419 }
1420
1421 /* note : no btultra2 variant for extDict nor dictMatchState,
1422 * because btultra2 is not meant to work with dictionaries
1423 * and is only specific for the first block (no prefix) */
1424