• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* This header contains definitions
12  * that shall **only** be used by modules within lib/compress.
13  */
14 
15 #ifndef ZSTD_COMPRESS_H
16 #define ZSTD_COMPRESS_H
17 
18 /*-*************************************
19 *  Dependencies
20 ***************************************/
21 #include "../common/zstd_internal.h"
22 #include "zstd_cwksp.h"
23 #ifdef ZSTD_MULTITHREAD
24 #  include "zstdmt_compress.h"
25 #endif
26 
27 #if defined (__cplusplus)
28 extern "C" {
29 #endif
30 
31 /*-*************************************
32 *  Constants
33 ***************************************/
34 #define kSearchStrength      8
35 #define HASH_READ_SIZE       8
36 #define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
37                                        It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
38                                        It's not a big deal though : candidate will just be sorted again.
39                                        Additionally, candidate position 1 will be lost.
40                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
41                                        The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
42                                        This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
43 
44 
45 /*-*************************************
46 *  Context memory management
47 ***************************************/
48 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
49 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
50 
51 typedef struct ZSTD_prefixDict_s {
52     const void* dict;
53     size_t dictSize;
54     ZSTD_dictContentType_e dictContentType;
55 } ZSTD_prefixDict;
56 
57 typedef struct {
58     void* dictBuffer;
59     void const* dict;
60     size_t dictSize;
61     ZSTD_dictContentType_e dictContentType;
62     ZSTD_CDict* cdict;
63 } ZSTD_localDict;
64 
65 typedef struct {
66     HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
67     HUF_repeat repeatMode;
68 } ZSTD_hufCTables_t;
69 
70 typedef struct {
71     FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
72     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
73     FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
74     FSE_repeat offcode_repeatMode;
75     FSE_repeat matchlength_repeatMode;
76     FSE_repeat litlength_repeatMode;
77 } ZSTD_fseCTables_t;
78 
79 typedef struct {
80     ZSTD_hufCTables_t huf;
81     ZSTD_fseCTables_t fse;
82 } ZSTD_entropyCTables_t;
83 
84 /***********************************************
85 *  Entropy buffer statistics structs and funcs *
86 ***********************************************/
87 /** ZSTD_hufCTablesMetadata_t :
88  *  Stores Literals Block Type for a super-block in hType, and
89  *  huffman tree description in hufDesBuffer.
90  *  hufDesSize refers to the size of huffman tree description in bytes.
91  *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
92 typedef struct {
93     symbolEncodingType_e hType;
94     BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
95     size_t hufDesSize;
96 } ZSTD_hufCTablesMetadata_t;
97 
98 /** ZSTD_fseCTablesMetadata_t :
99  *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
100  *  fse tables in fseTablesBuffer.
101  *  fseTablesSize refers to the size of fse tables in bytes.
102  *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
103 typedef struct {
104     symbolEncodingType_e llType;
105     symbolEncodingType_e ofType;
106     symbolEncodingType_e mlType;
107     BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
108     size_t fseTablesSize;
109     size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
110 } ZSTD_fseCTablesMetadata_t;
111 
112 typedef struct {
113     ZSTD_hufCTablesMetadata_t hufMetadata;
114     ZSTD_fseCTablesMetadata_t fseMetadata;
115 } ZSTD_entropyCTablesMetadata_t;
116 
117 /** ZSTD_buildBlockEntropyStats() :
118  *  Builds entropy for the block.
119  *  @return : 0 on success or error code */
120 size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
121                              const ZSTD_entropyCTables_t* prevEntropy,
122                                    ZSTD_entropyCTables_t* nextEntropy,
123                              const ZSTD_CCtx_params* cctxParams,
124                                    ZSTD_entropyCTablesMetadata_t* entropyMetadata,
125                                    void* workspace, size_t wkspSize);
126 
127 /*********************************
128 *  Compression internals structs *
129 *********************************/
130 
131 typedef struct {
132     U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
133     U32 len;            /* Raw length of match */
134 } ZSTD_match_t;
135 
136 typedef struct {
137     U32 offset;         /* Offset of sequence */
138     U32 litLength;      /* Length of literals prior to match */
139     U32 matchLength;    /* Raw length of match */
140 } rawSeq;
141 
142 typedef struct {
143   rawSeq* seq;          /* The start of the sequences */
144   size_t pos;           /* The index in seq where reading stopped. pos <= size. */
145   size_t posInSequence; /* The position within the sequence at seq[pos] where reading
146                            stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
147   size_t size;          /* The number of sequences. <= capacity. */
148   size_t capacity;      /* The capacity starting from `seq` pointer */
149 } rawSeqStore_t;
150 
151 UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
152 
153 typedef struct {
154     int price;
155     U32 off;
156     U32 mlen;
157     U32 litlen;
158     U32 rep[ZSTD_REP_NUM];
159 } ZSTD_optimal_t;
160 
161 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
162 
163 typedef struct {
164     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
165     unsigned* litFreq;           /* table of literals statistics, of size 256 */
166     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
167     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
168     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
169     ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
170     ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
171 
172     U32  litSum;                 /* nb of literals */
173     U32  litLengthSum;           /* nb of litLength codes */
174     U32  matchLengthSum;         /* nb of matchLength codes */
175     U32  offCodeSum;             /* nb of offset codes */
176     U32  litSumBasePrice;        /* to compare to log2(litfreq) */
177     U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
178     U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
179     U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
180     ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
181     const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
182     ZSTD_paramSwitch_e literalCompressionMode;
183 } optState_t;
184 
185 typedef struct {
186   ZSTD_entropyCTables_t entropy;
187   U32 rep[ZSTD_REP_NUM];
188 } ZSTD_compressedBlockState_t;
189 
190 typedef struct {
191     BYTE const* nextSrc;       /* next block here to continue on current prefix */
192     BYTE const* base;          /* All regular indexes relative to this position */
193     BYTE const* dictBase;      /* extDict indexes relative to this position */
194     U32 dictLimit;             /* below that point, need extDict */
195     U32 lowLimit;              /* below that point, no more valid data */
196     U32 nbOverflowCorrections; /* Number of times overflow correction has run since
197                                 * ZSTD_window_init(). Useful for debugging coredumps
198                                 * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
199                                 */
200 } ZSTD_window_t;
201 
202 #define ZSTD_WINDOW_START_INDEX 2
203 
204 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
205 
206 #define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
207 
208 struct ZSTD_matchState_t {
209     ZSTD_window_t window;   /* State for window round buffer management */
210     U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
211                              * When loadedDictEnd != 0, a dictionary is in use, and still valid.
212                              * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
213                              * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
214                              * When dict referential is copied into active context (i.e. not attached),
215                              * loadedDictEnd == dictSize, since referential starts from zero.
216                              */
217     U32 nextToUpdate;       /* index from which to continue table update */
218     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
219 
220     U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
221     U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
222     U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
223 
224     U32* hashTable;
225     U32* hashTable3;
226     U32* chainTable;
227 
228     U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
229 
230     int dedicatedDictSearch;  /* Indicates whether this matchState is using the
231                                * dedicated dictionary search structure.
232                                */
233     optState_t opt;         /* optimal parser state */
234     const ZSTD_matchState_t* dictMatchState;
235     ZSTD_compressionParameters cParams;
236     const rawSeqStore_t* ldmSeqStore;
237 };
238 
239 typedef struct {
240     ZSTD_compressedBlockState_t* prevCBlock;
241     ZSTD_compressedBlockState_t* nextCBlock;
242     ZSTD_matchState_t matchState;
243 } ZSTD_blockState_t;
244 
245 typedef struct {
246     U32 offset;
247     U32 checksum;
248 } ldmEntry_t;
249 
250 typedef struct {
251     BYTE const* split;
252     U32 hash;
253     U32 checksum;
254     ldmEntry_t* bucket;
255 } ldmMatchCandidate_t;
256 
257 #define LDM_BATCH_SIZE 64
258 
259 typedef struct {
260     ZSTD_window_t window;   /* State for the window round buffer management */
261     ldmEntry_t* hashTable;
262     U32 loadedDictEnd;
263     BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
264     size_t splitIndices[LDM_BATCH_SIZE];
265     ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
266 } ldmState_t;
267 
268 typedef struct {
269     ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
270     U32 hashLog;            /* Log size of hashTable */
271     U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
272     U32 minMatchLength;     /* Minimum match length */
273     U32 hashRateLog;       /* Log number of entries to skip */
274     U32 windowLog;          /* Window log for the LDM */
275 } ldmParams_t;
276 
277 typedef struct {
278     int collectSequences;
279     ZSTD_Sequence* seqStart;
280     size_t seqIndex;
281     size_t maxSequences;
282 } SeqCollector;
283 
284 struct ZSTD_CCtx_params_s {
285     ZSTD_format_e format;
286     ZSTD_compressionParameters cParams;
287     ZSTD_frameParameters fParams;
288 
289     int compressionLevel;
290     int forceWindow;           /* force back-references to respect limit of
291                                 * 1<<wLog, even for dictionary */
292     size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
293                                 * No target when targetCBlockSize == 0.
294                                 * There is no guarantee on compressed block size */
295     int srcSizeHint;           /* User's best guess of source size.
296                                 * Hint is not valid when srcSizeHint == 0.
297                                 * There is no guarantee that hint is close to actual source size */
298 
299     ZSTD_dictAttachPref_e attachDictPref;
300     ZSTD_paramSwitch_e literalCompressionMode;
301 
302     /* Multithreading: used to pass parameters to mtctx */
303     int nbWorkers;
304     size_t jobSize;
305     int overlapLog;
306     int rsyncable;
307 
308     /* Long distance matching parameters */
309     ldmParams_t ldmParams;
310 
311     /* Dedicated dict search algorithm trigger */
312     int enableDedicatedDictSearch;
313 
314     /* Input/output buffer modes */
315     ZSTD_bufferMode_e inBufferMode;
316     ZSTD_bufferMode_e outBufferMode;
317 
318     /* Sequence compression API */
319     ZSTD_sequenceFormat_e blockDelimiters;
320     int validateSequences;
321 
322     /* Block splitting */
323     ZSTD_paramSwitch_e useBlockSplitter;
324 
325     /* Param for deciding whether to use row-based matchfinder */
326     ZSTD_paramSwitch_e useRowMatchFinder;
327 
328     /* Always load a dictionary in ext-dict mode (not prefix mode)? */
329     int deterministicRefPrefix;
330 
331     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
332     ZSTD_customMem customMem;
333 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
334 
335 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
336 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
337 
338 /**
339  * Indicates whether this compression proceeds directly from user-provided
340  * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
341  * whether the context needs to buffer the input/output (ZSTDb_buffered).
342  */
343 typedef enum {
344     ZSTDb_not_buffered,
345     ZSTDb_buffered
346 } ZSTD_buffered_policy_e;
347 
348 /**
349  * Struct that contains all elements of block splitter that should be allocated
350  * in a wksp.
351  */
352 #define ZSTD_MAX_NB_BLOCK_SPLITS 196
353 typedef struct {
354     seqStore_t fullSeqStoreChunk;
355     seqStore_t firstHalfSeqStore;
356     seqStore_t secondHalfSeqStore;
357     seqStore_t currSeqStore;
358     seqStore_t nextSeqStore;
359 
360     U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
361     ZSTD_entropyCTablesMetadata_t entropyMetadata;
362 } ZSTD_blockSplitCtx;
363 
364 struct ZSTD_CCtx_s {
365     ZSTD_compressionStage_e stage;
366     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
367     int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
368     ZSTD_CCtx_params requestedParams;
369     ZSTD_CCtx_params appliedParams;
370     ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
371     U32   dictID;
372     size_t dictContentSize;
373 
374     ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
375     size_t blockSize;
376     unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
377     unsigned long long consumedSrcSize;
378     unsigned long long producedCSize;
379     XXH64_state_t xxhState;
380     ZSTD_customMem customMem;
381     ZSTD_threadPool* pool;
382     size_t staticSize;
383     SeqCollector seqCollector;
384     int isFirstBlock;
385     int initialized;
386 
387     seqStore_t seqStore;      /* sequences storage ptrs */
388     ldmState_t ldmState;      /* long distance matching state */
389     rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
390     size_t maxNbLdmSequences;
391     rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
392     ZSTD_blockState_t blockState;
393     U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
394 
395     /* Whether we are streaming or not */
396     ZSTD_buffered_policy_e bufferedPolicy;
397 
398     /* streaming */
399     char*  inBuff;
400     size_t inBuffSize;
401     size_t inToCompress;
402     size_t inBuffPos;
403     size_t inBuffTarget;
404     char*  outBuff;
405     size_t outBuffSize;
406     size_t outBuffContentSize;
407     size_t outBuffFlushedSize;
408     ZSTD_cStreamStage streamStage;
409     U32    frameEnded;
410 
411     /* Stable in/out buffer verification */
412     ZSTD_inBuffer expectedInBuffer;
413     size_t expectedOutBufferSize;
414 
415     /* Dictionary */
416     ZSTD_localDict localDict;
417     const ZSTD_CDict* cdict;
418     ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
419 
420     /* Multi-threading */
421 #ifdef ZSTD_MULTITHREAD
422     ZSTDMT_CCtx* mtctx;
423 #endif
424 
425     /* Tracing */
426 #if ZSTD_TRACE
427     ZSTD_TraceCtx traceCtx;
428 #endif
429 
430     /* Workspace for block splitter */
431     ZSTD_blockSplitCtx blockSplitCtx;
432 };
433 
434 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
435 
436 typedef enum {
437     ZSTD_noDict = 0,
438     ZSTD_extDict = 1,
439     ZSTD_dictMatchState = 2,
440     ZSTD_dedicatedDictSearch = 3
441 } ZSTD_dictMode_e;
442 
443 typedef enum {
444     ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
445                                  * In this mode we use both the srcSize and the dictSize
446                                  * when selecting and adjusting parameters.
447                                  */
448     ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
449                                  * In this mode we only take the srcSize into account when selecting
450                                  * and adjusting parameters.
451                                  */
452     ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
453                                  * In this mode we take both the source size and the dictionary size
454                                  * into account when selecting and adjusting the parameters.
455                                  */
456     ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
457                                  * We don't know what these parameters are for. We default to the legacy
458                                  * behavior of taking both the source size and the dict size into account
459                                  * when selecting and adjusting parameters.
460                                  */
461 } ZSTD_cParamMode_e;
462 
463 typedef size_t (*ZSTD_blockCompressor) (
464         ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
465         void const* src, size_t srcSize);
466 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
467 
468 
ZSTD_LLcode(U32 litLength)469 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
470 {
471     static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
472                                        8,  9, 10, 11, 12, 13, 14, 15,
473                                       16, 16, 17, 17, 18, 18, 19, 19,
474                                       20, 20, 20, 20, 21, 21, 21, 21,
475                                       22, 22, 22, 22, 22, 22, 22, 22,
476                                       23, 23, 23, 23, 23, 23, 23, 23,
477                                       24, 24, 24, 24, 24, 24, 24, 24,
478                                       24, 24, 24, 24, 24, 24, 24, 24 };
479     static const U32 LL_deltaCode = 19;
480     return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
481 }
482 
483 /* ZSTD_MLcode() :
484  * note : mlBase = matchLength - MINMATCH;
485  *        because it's the format it's stored in seqStore->sequences */
ZSTD_MLcode(U32 mlBase)486 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
487 {
488     static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
489                                       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
490                                       32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
491                                       38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
492                                       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
493                                       41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
494                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
495                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
496     static const U32 ML_deltaCode = 36;
497     return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
498 }
499 
500 typedef struct repcodes_s {
501     U32 rep[3];
502 } repcodes_t;
503 
ZSTD_updateRep(U32 const rep[3],U32 const offset,U32 const ll0)504 MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
505 {
506     repcodes_t newReps;
507     if (offset >= ZSTD_REP_NUM) {  /* full offset */
508         newReps.rep[2] = rep[1];
509         newReps.rep[1] = rep[0];
510         newReps.rep[0] = offset - ZSTD_REP_MOVE;
511     } else {   /* repcode */
512         U32 const repCode = offset + ll0;
513         if (repCode > 0) {  /* note : if repCode==0, no change */
514             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
515             newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
516             newReps.rep[1] = rep[0];
517             newReps.rep[0] = currentOffset;
518         } else {   /* repCode == 0 */
519             ZSTD_memcpy(&newReps, rep, sizeof(newReps));
520         }
521     }
522     return newReps;
523 }
524 
525 /* ZSTD_cParam_withinBounds:
526  * @return 1 if value is within cParam bounds,
527  * 0 otherwise */
ZSTD_cParam_withinBounds(ZSTD_cParameter cParam,int value)528 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
529 {
530     ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
531     if (ZSTD_isError(bounds.error)) return 0;
532     if (value < bounds.lowerBound) return 0;
533     if (value > bounds.upperBound) return 0;
534     return 1;
535 }
536 
537 /* ZSTD_noCompressBlock() :
538  * Writes uncompressed block to dst buffer from given src.
539  * Returns the size of the block */
ZSTD_noCompressBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)540 MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
541 {
542     U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
543     RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
544                     dstSize_tooSmall, "dst buf too small for uncompressed block");
545     MEM_writeLE24(dst, cBlockHeader24);
546     ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
547     return ZSTD_blockHeaderSize + srcSize;
548 }
549 
ZSTD_rleCompressBlock(void * dst,size_t dstCapacity,BYTE src,size_t srcSize,U32 lastBlock)550 MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
551 {
552     BYTE* const op = (BYTE*)dst;
553     U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
554     RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
555     MEM_writeLE24(op, cBlockHeader);
556     op[3] = src;
557     return 4;
558 }
559 
560 
561 /* ZSTD_minGain() :
562  * minimum compression required
563  * to generate a compress block or a compressed literals section.
564  * note : use same formula for both situations */
ZSTD_minGain(size_t srcSize,ZSTD_strategy strat)565 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
566 {
567     U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
568     ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
569     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
570     return (srcSize >> minlog) + 2;
571 }
572 
ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params * cctxParams)573 MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
574 {
575     switch (cctxParams->literalCompressionMode) {
576     case ZSTD_ps_enable:
577         return 0;
578     case ZSTD_ps_disable:
579         return 1;
580     default:
581         assert(0 /* impossible: pre-validated */);
582         ZSTD_FALLTHROUGH;
583     case ZSTD_ps_auto:
584         return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
585     }
586 }
587 
588 /*! ZSTD_safecopyLiterals() :
589  *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
590  *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
591  *  large copies.
592  */
ZSTD_safecopyLiterals(BYTE * op,BYTE const * ip,BYTE const * const iend,BYTE const * ilimit_w)593 static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
594     assert(iend > ilimit_w);
595     if (ip <= ilimit_w) {
596         ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
597         op += ilimit_w - ip;
598         ip = ilimit_w;
599     }
600     while (ip < iend) *op++ = *ip++;
601 }
602 
603 /*! ZSTD_storeSeq() :
604  *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
605  *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
606  *  `mlBase` : matchLength - MINMATCH
607  *  Allowed to overread literals up to litLimit.
608 */
609 HINT_INLINE UNUSED_ATTR
ZSTD_storeSeq(seqStore_t * seqStorePtr,size_t litLength,const BYTE * literals,const BYTE * litLimit,U32 offCode,size_t mlBase)610 void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
611 {
612     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
613     BYTE const* const litEnd = literals + litLength;
614 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
615     static const BYTE* g_start = NULL;
616     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
617     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
618         DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
619                pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
620     }
621 #endif
622     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
623     /* copy Literals */
624     assert(seqStorePtr->maxNbLit <= 128 KB);
625     assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
626     assert(literals + litLength <= litLimit);
627     if (litEnd <= litLimit_w) {
628         /* Common case we can use wildcopy.
629 	 * First copy 16 bytes, because literals are likely short.
630 	 */
631         assert(WILDCOPY_OVERLENGTH >= 16);
632         ZSTD_copy16(seqStorePtr->lit, literals);
633         if (litLength > 16) {
634             ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
635         }
636     } else {
637         ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
638     }
639     seqStorePtr->lit += litLength;
640 
641     /* literal Length */
642     if (litLength>0xFFFF) {
643         assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
644         seqStorePtr->longLengthType = ZSTD_llt_literalLength;
645         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
646     }
647     seqStorePtr->sequences[0].litLength = (U16)litLength;
648 
649     /* match offset */
650     seqStorePtr->sequences[0].offset = offCode + 1;
651 
652     /* match Length */
653     if (mlBase>0xFFFF) {
654         assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
655         seqStorePtr->longLengthType = ZSTD_llt_matchLength;
656         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
657     }
658     seqStorePtr->sequences[0].matchLength = (U16)mlBase;
659 
660     seqStorePtr->sequences++;
661 }
662 
663 
664 /*-*************************************
665 *  Match length counter
666 ***************************************/
ZSTD_NbCommonBytes(size_t val)667 static unsigned ZSTD_NbCommonBytes (size_t val)
668 {
669     if (MEM_isLittleEndian()) {
670         if (MEM_64bits()) {
671 #       if defined(_MSC_VER) && defined(_WIN64)
672 #           if STATIC_BMI2
673                 return _tzcnt_u64(val) >> 3;
674 #           else
675                 if (val != 0) {
676                     unsigned long r;
677                     _BitScanForward64(&r, (U64)val);
678                     return (unsigned)(r >> 3);
679                 } else {
680                     /* Should not reach this code path */
681                     __assume(0);
682                 }
683 #           endif
684 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
685             return (__builtin_ctzll((U64)val) >> 3);
686 #       else
687             static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
688                                                      0, 3, 1, 3, 1, 4, 2, 7,
689                                                      0, 2, 3, 6, 1, 5, 3, 5,
690                                                      1, 3, 4, 4, 2, 5, 6, 7,
691                                                      7, 0, 1, 2, 3, 3, 4, 6,
692                                                      2, 6, 5, 5, 3, 4, 5, 6,
693                                                      7, 1, 2, 4, 6, 4, 4, 5,
694                                                      7, 2, 6, 5, 7, 6, 7, 7 };
695             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
696 #       endif
697         } else { /* 32 bits */
698 #       if defined(_MSC_VER)
699             if (val != 0) {
700                 unsigned long r;
701                 _BitScanForward(&r, (U32)val);
702                 return (unsigned)(r >> 3);
703             } else {
704                 /* Should not reach this code path */
705                 __assume(0);
706             }
707 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
708             return (__builtin_ctz((U32)val) >> 3);
709 #       else
710             static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
711                                                      3, 2, 2, 1, 3, 2, 0, 1,
712                                                      3, 3, 1, 2, 2, 2, 2, 0,
713                                                      3, 1, 2, 0, 1, 0, 1, 1 };
714             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
715 #       endif
716         }
717     } else {  /* Big Endian CPU */
718         if (MEM_64bits()) {
719 #       if defined(_MSC_VER) && defined(_WIN64)
720 #           if STATIC_BMI2
721 			    return _lzcnt_u64(val) >> 3;
722 #           else
723                 if (val != 0) {
724                     unsigned long r;
725                     _BitScanReverse64(&r, (U64)val);
726                     return (unsigned)(r >> 3);
727                 } else {
728                     /* Should not reach this code path */
729                     __assume(0);
730                 }
731 #           endif
732 #       elif defined(__GNUC__) && (__GNUC__ >= 4)
733             return (__builtin_clzll(val) >> 3);
734 #       else
735             unsigned r;
736             const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
737             if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
738             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
739             r += (!val);
740             return r;
741 #       endif
742         } else { /* 32 bits */
743 #       if defined(_MSC_VER)
744             if (val != 0) {
745                 unsigned long r;
746                 _BitScanReverse(&r, (unsigned long)val);
747                 return (unsigned)(r >> 3);
748             } else {
749                 /* Should not reach this code path */
750                 __assume(0);
751             }
752 #       elif defined(__GNUC__) && (__GNUC__ >= 3)
753             return (__builtin_clz((U32)val) >> 3);
754 #       else
755             unsigned r;
756             if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
757             r += (!val);
758             return r;
759 #       endif
760     }   }
761 }
762 
763 
ZSTD_count(const BYTE * pIn,const BYTE * pMatch,const BYTE * const pInLimit)764 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
765 {
766     const BYTE* const pStart = pIn;
767     const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
768 
769     if (pIn < pInLoopLimit) {
770         { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
771           if (diff) return ZSTD_NbCommonBytes(diff); }
772         pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
773         while (pIn < pInLoopLimit) {
774             size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
775             if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
776             pIn += ZSTD_NbCommonBytes(diff);
777             return (size_t)(pIn - pStart);
778     }   }
779     if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
780     if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
781     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
782     return (size_t)(pIn - pStart);
783 }
784 
785 /** ZSTD_count_2segments() :
786  *  can count match length with `ip` & `match` in 2 different segments.
787  *  convention : on reaching mEnd, match count continue starting from iStart
788  */
789 MEM_STATIC size_t
ZSTD_count_2segments(const BYTE * ip,const BYTE * match,const BYTE * iEnd,const BYTE * mEnd,const BYTE * iStart)790 ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
791                      const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
792 {
793     const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
794     size_t const matchLength = ZSTD_count(ip, match, vEnd);
795     if (match + matchLength != mEnd) return matchLength;
796     DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
797     DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
798     DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
799     DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
800     DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
801     return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
802 }
803 
804 
805 /*-*************************************
806  *  Hashes
807  ***************************************/
808 static const U32 prime3bytes = 506832829U;
ZSTD_hash3(U32 u,U32 h)809 static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
ZSTD_hash3Ptr(const void * ptr,U32 h)810 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
811 
812 static const U32 prime4bytes = 2654435761U;
ZSTD_hash4(U32 u,U32 h)813 static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
ZSTD_hash4Ptr(const void * ptr,U32 h)814 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
815 
816 static const U64 prime5bytes = 889523592379ULL;
ZSTD_hash5(U64 u,U32 h)817 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
ZSTD_hash5Ptr(const void * p,U32 h)818 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
819 
820 static const U64 prime6bytes = 227718039650203ULL;
ZSTD_hash6(U64 u,U32 h)821 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
ZSTD_hash6Ptr(const void * p,U32 h)822 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
823 
824 static const U64 prime7bytes = 58295818150454627ULL;
ZSTD_hash7(U64 u,U32 h)825 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
ZSTD_hash7Ptr(const void * p,U32 h)826 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
827 
828 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
ZSTD_hash8(U64 u,U32 h)829 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
ZSTD_hash8Ptr(const void * p,U32 h)830 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
831 
832 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtr(const void * p,U32 hBits,U32 mls)833 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
834 {
835     switch(mls)
836     {
837     default:
838     case 4: return ZSTD_hash4Ptr(p, hBits);
839     case 5: return ZSTD_hash5Ptr(p, hBits);
840     case 6: return ZSTD_hash6Ptr(p, hBits);
841     case 7: return ZSTD_hash7Ptr(p, hBits);
842     case 8: return ZSTD_hash8Ptr(p, hBits);
843     }
844 }
845 
846 /** ZSTD_ipow() :
847  * Return base^exponent.
848  */
ZSTD_ipow(U64 base,U64 exponent)849 static U64 ZSTD_ipow(U64 base, U64 exponent)
850 {
851     U64 power = 1;
852     while (exponent) {
853       if (exponent & 1) power *= base;
854       exponent >>= 1;
855       base *= base;
856     }
857     return power;
858 }
859 
860 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
861 
862 /** ZSTD_rollingHash_append() :
863  * Add the buffer to the hash value.
864  */
ZSTD_rollingHash_append(U64 hash,void const * buf,size_t size)865 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
866 {
867     BYTE const* istart = (BYTE const*)buf;
868     size_t pos;
869     for (pos = 0; pos < size; ++pos) {
870         hash *= prime8bytes;
871         hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
872     }
873     return hash;
874 }
875 
876 /** ZSTD_rollingHash_compute() :
877  * Compute the rolling hash value of the buffer.
878  */
ZSTD_rollingHash_compute(void const * buf,size_t size)879 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
880 {
881     return ZSTD_rollingHash_append(0, buf, size);
882 }
883 
884 /** ZSTD_rollingHash_primePower() :
885  * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
886  * over a window of length bytes.
887  */
ZSTD_rollingHash_primePower(U32 length)888 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
889 {
890     return ZSTD_ipow(prime8bytes, length - 1);
891 }
892 
893 /** ZSTD_rollingHash_rotate() :
894  * Rotate the rolling hash by one byte.
895  */
ZSTD_rollingHash_rotate(U64 hash,BYTE toRemove,BYTE toAdd,U64 primePower)896 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
897 {
898     hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
899     hash *= prime8bytes;
900     hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
901     return hash;
902 }
903 
904 /*-*************************************
905 *  Round buffer management
906 ***************************************/
907 #if (ZSTD_WINDOWLOG_MAX_64 > 31)
908 # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
909 #endif
910 /* Max current allowed */
911 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
912 /* Maximum chunk size before overflow correction needs to be called again */
913 #define ZSTD_CHUNKSIZE_MAX                                                     \
914     ( ((U32)-1)                  /* Maximum ending current index */            \
915     - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
916 
917 /**
918  * ZSTD_window_clear():
919  * Clears the window containing the history by simply setting it to empty.
920  */
ZSTD_window_clear(ZSTD_window_t * window)921 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
922 {
923     size_t const endT = (size_t)(window->nextSrc - window->base);
924     U32 const end = (U32)endT;
925 
926     window->lowLimit = end;
927     window->dictLimit = end;
928 }
929 
ZSTD_window_isEmpty(ZSTD_window_t const window)930 MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
931 {
932     return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
933            window.lowLimit == ZSTD_WINDOW_START_INDEX &&
934            (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
935 }
936 
937 /**
938  * ZSTD_window_hasExtDict():
939  * Returns non-zero if the window has a non-empty extDict.
940  */
ZSTD_window_hasExtDict(ZSTD_window_t const window)941 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
942 {
943     return window.lowLimit < window.dictLimit;
944 }
945 
946 /**
947  * ZSTD_matchState_dictMode():
948  * Inspects the provided matchState and figures out what dictMode should be
949  * passed to the compressor.
950  */
ZSTD_matchState_dictMode(const ZSTD_matchState_t * ms)951 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
952 {
953     return ZSTD_window_hasExtDict(ms->window) ?
954         ZSTD_extDict :
955         ms->dictMatchState != NULL ?
956             (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
957             ZSTD_noDict;
958 }
959 
960 /* Defining this macro to non-zero tells zstd to run the overflow correction
961  * code much more frequently. This is very inefficient, and should only be
962  * used for tests and fuzzers.
963  */
964 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
965 #  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
966 #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
967 #  else
968 #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
969 #  endif
970 #endif
971 
972 /**
973  * ZSTD_window_canOverflowCorrect():
974  * Returns non-zero if the indices are large enough for overflow correction
975  * to work correctly without impacting compression ratio.
976  */
ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src)977 MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
978                                               U32 cycleLog,
979                                               U32 maxDist,
980                                               U32 loadedDictEnd,
981                                               void const* src)
982 {
983     U32 const cycleSize = 1u << cycleLog;
984     U32 const curr = (U32)((BYTE const*)src - window.base);
985     U32 const minIndexToOverflowCorrect = cycleSize
986                                         + MAX(maxDist, cycleSize)
987                                         + ZSTD_WINDOW_START_INDEX;
988 
989     /* Adjust the min index to backoff the overflow correction frequency,
990      * so we don't waste too much CPU in overflow correction. If this
991      * computation overflows we don't really care, we just need to make
992      * sure it is at least minIndexToOverflowCorrect.
993      */
994     U32 const adjustment = window.nbOverflowCorrections + 1;
995     U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
996                                   minIndexToOverflowCorrect);
997     U32 const indexLargeEnough = curr > adjustedIndex;
998 
999     /* Only overflow correct early if the dictionary is invalidated already,
1000      * so we don't hurt compression ratio.
1001      */
1002     U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
1003 
1004     return indexLargeEnough && dictionaryInvalidated;
1005 }
1006 
1007 /**
1008  * ZSTD_window_needOverflowCorrection():
1009  * Returns non-zero if the indices are getting too large and need overflow
1010  * protection.
1011  */
ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src,void const * srcEnd)1012 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
1013                                                   U32 cycleLog,
1014                                                   U32 maxDist,
1015                                                   U32 loadedDictEnd,
1016                                                   void const* src,
1017                                                   void const* srcEnd)
1018 {
1019     U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
1020     if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1021         if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
1022             return 1;
1023         }
1024     }
1025     return curr > ZSTD_CURRENT_MAX;
1026 }
1027 
1028 /**
1029  * ZSTD_window_correctOverflow():
1030  * Reduces the indices to protect from index overflow.
1031  * Returns the correction made to the indices, which must be applied to every
1032  * stored index.
1033  *
1034  * The least significant cycleLog bits of the indices must remain the same,
1035  * which may be 0. Every index up to maxDist in the past must be valid.
1036  */
ZSTD_window_correctOverflow(ZSTD_window_t * window,U32 cycleLog,U32 maxDist,void const * src)1037 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1038                                            U32 maxDist, void const* src)
1039 {
1040     /* preemptive overflow correction:
1041      * 1. correction is large enough:
1042      *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
1043      *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
1044      *
1045      *    current - newCurrent
1046      *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1047      *    > (3<<29) - (1<<chainLog)
1048      *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
1049      *    > 1<<29
1050      *
1051      * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1052      *    After correction, current is less than (1<<chainLog + 1<<windowLog).
1053      *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1054      *    In 32-bit mode we are safe, because (chainLog <= 29), so
1055      *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1056      * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1057      *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1058      */
1059     U32 const cycleSize = 1u << cycleLog;
1060     U32 const cycleMask = cycleSize - 1;
1061     U32 const curr = (U32)((BYTE const*)src - window->base);
1062     U32 const currentCycle = curr & cycleMask;
1063     /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1064     U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1065                                      ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1066                                      : 0;
1067     U32 const newCurrent = currentCycle
1068                          + currentCycleCorrection
1069                          + MAX(maxDist, cycleSize);
1070     U32 const correction = curr - newCurrent;
1071     /* maxDist must be a power of two so that:
1072      *   (newCurrent & cycleMask) == (curr & cycleMask)
1073      * This is required to not corrupt the chains / binary tree.
1074      */
1075     assert((maxDist & (maxDist - 1)) == 0);
1076     assert((curr & cycleMask) == (newCurrent & cycleMask));
1077     assert(curr > newCurrent);
1078     if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1079         /* Loose bound, should be around 1<<29 (see above) */
1080         assert(correction > 1<<28);
1081     }
1082 
1083     window->base += correction;
1084     window->dictBase += correction;
1085     if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1086         window->lowLimit = ZSTD_WINDOW_START_INDEX;
1087     } else {
1088         window->lowLimit -= correction;
1089     }
1090     if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1091         window->dictLimit = ZSTD_WINDOW_START_INDEX;
1092     } else {
1093         window->dictLimit -= correction;
1094     }
1095 
1096     /* Ensure we can still reference the full window. */
1097     assert(newCurrent >= maxDist);
1098     assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1099     /* Ensure that lowLimit and dictLimit didn't underflow. */
1100     assert(window->lowLimit <= newCurrent);
1101     assert(window->dictLimit <= newCurrent);
1102 
1103     ++window->nbOverflowCorrections;
1104 
1105     DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1106              window->lowLimit);
1107     return correction;
1108 }
1109 
1110 /**
1111  * ZSTD_window_enforceMaxDist():
1112  * Updates lowLimit so that:
1113  *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1114  *
1115  * It ensures index is valid as long as index >= lowLimit.
1116  * This must be called before a block compression call.
1117  *
1118  * loadedDictEnd is only defined if a dictionary is in use for current compression.
1119  * As the name implies, loadedDictEnd represents the index at end of dictionary.
1120  * The value lies within context's referential, it can be directly compared to blockEndIdx.
1121  *
1122  * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1123  * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1124  * This is because dictionaries are allowed to be referenced fully
1125  * as long as the last byte of the dictionary is in the window.
1126  * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1127  *
1128  * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1129  * In dictMatchState mode, lowLimit and dictLimit are the same,
1130  * and the dictionary is below them.
1131  * forceWindow and dictMatchState are therefore incompatible.
1132  */
1133 MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1134 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1135                      const void* blockEnd,
1136                            U32   maxDist,
1137                            U32*  loadedDictEndPtr,
1138                      const ZSTD_matchState_t** dictMatchStatePtr)
1139 {
1140     U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1141     U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1142     DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1143                 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1144 
1145     /* - When there is no dictionary : loadedDictEnd == 0.
1146          In which case, the test (blockEndIdx > maxDist) is merely to avoid
1147          overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1148        - When there is a standard dictionary :
1149          Index referential is copied from the dictionary,
1150          which means it starts from 0.
1151          In which case, loadedDictEnd == dictSize,
1152          and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1153          since `blockEndIdx` also starts from zero.
1154        - When there is an attached dictionary :
1155          loadedDictEnd is expressed within the referential of the context,
1156          so it can be directly compared against blockEndIdx.
1157     */
1158     if (blockEndIdx > maxDist + loadedDictEnd) {
1159         U32 const newLowLimit = blockEndIdx - maxDist;
1160         if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1161         if (window->dictLimit < window->lowLimit) {
1162             DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1163                         (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1164             window->dictLimit = window->lowLimit;
1165         }
1166         /* On reaching window size, dictionaries are invalidated */
1167         if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1168         if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1169     }
1170 }
1171 
1172 /* Similar to ZSTD_window_enforceMaxDist(),
1173  * but only invalidates dictionary
1174  * when input progresses beyond window size.
1175  * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1176  *              loadedDictEnd uses same referential as window->base
1177  *              maxDist is the window size */
1178 MEM_STATIC void
ZSTD_checkDictValidity(const ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1179 ZSTD_checkDictValidity(const ZSTD_window_t* window,
1180                        const void* blockEnd,
1181                              U32   maxDist,
1182                              U32*  loadedDictEndPtr,
1183                        const ZSTD_matchState_t** dictMatchStatePtr)
1184 {
1185     assert(loadedDictEndPtr != NULL);
1186     assert(dictMatchStatePtr != NULL);
1187     {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1188         U32 const loadedDictEnd = *loadedDictEndPtr;
1189         DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1190                     (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1191         assert(blockEndIdx >= loadedDictEnd);
1192 
1193         if (blockEndIdx > loadedDictEnd + maxDist) {
1194             /* On reaching window size, dictionaries are invalidated.
1195              * For simplification, if window size is reached anywhere within next block,
1196              * the dictionary is invalidated for the full block.
1197              */
1198             DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1199             *loadedDictEndPtr = 0;
1200             *dictMatchStatePtr = NULL;
1201         } else {
1202             if (*loadedDictEndPtr != 0) {
1203                 DEBUGLOG(6, "dictionary considered valid for current block");
1204     }   }   }
1205 }
1206 
ZSTD_window_init(ZSTD_window_t * window)1207 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1208     ZSTD_memset(window, 0, sizeof(*window));
1209     window->base = (BYTE const*)" ";
1210     window->dictBase = (BYTE const*)" ";
1211     ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1212     window->dictLimit = ZSTD_WINDOW_START_INDEX;    /* start from >0, so that 1st position is valid */
1213     window->lowLimit = ZSTD_WINDOW_START_INDEX;     /* it ensures first and later CCtx usages compress the same */
1214     window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX;   /* see issue #1241 */
1215     window->nbOverflowCorrections = 0;
1216 }
1217 
1218 /**
1219  * ZSTD_window_update():
1220  * Updates the window by appending [src, src + srcSize) to the window.
1221  * If it is not contiguous, the current prefix becomes the extDict, and we
1222  * forget about the extDict. Handles overlap of the prefix and extDict.
1223  * Returns non-zero if the segment is contiguous.
1224  */
ZSTD_window_update(ZSTD_window_t * window,void const * src,size_t srcSize,int forceNonContiguous)1225 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
1226                                   void const* src, size_t srcSize,
1227                                   int forceNonContiguous)
1228 {
1229     BYTE const* const ip = (BYTE const*)src;
1230     U32 contiguous = 1;
1231     DEBUGLOG(5, "ZSTD_window_update");
1232     if (srcSize == 0)
1233         return contiguous;
1234     assert(window->base != NULL);
1235     assert(window->dictBase != NULL);
1236     /* Check if blocks follow each other */
1237     if (src != window->nextSrc || forceNonContiguous) {
1238         /* not contiguous */
1239         size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1240         DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1241         window->lowLimit = window->dictLimit;
1242         assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
1243         window->dictLimit = (U32)distanceFromBase;
1244         window->dictBase = window->base;
1245         window->base = ip - distanceFromBase;
1246         /* ms->nextToUpdate = window->dictLimit; */
1247         if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
1248         contiguous = 0;
1249     }
1250     window->nextSrc = ip + srcSize;
1251     /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1252     if ( (ip+srcSize > window->dictBase + window->lowLimit)
1253        & (ip < window->dictBase + window->dictLimit)) {
1254         ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
1255         U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1256         window->lowLimit = lowLimitMax;
1257         DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1258     }
1259     return contiguous;
1260 }
1261 
1262 /**
1263  * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1264  */
ZSTD_getLowestMatchIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1265 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1266 {
1267     U32 const maxDistance = 1U << windowLog;
1268     U32 const lowestValid = ms->window.lowLimit;
1269     U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1270     U32 const isDictionary = (ms->loadedDictEnd != 0);
1271     /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1272      * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1273      * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1274      */
1275     U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1276     return matchLowest;
1277 }
1278 
1279 /**
1280  * Returns the lowest allowed match index in the prefix.
1281  */
ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1282 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1283 {
1284     U32    const maxDistance = 1U << windowLog;
1285     U32    const lowestValid = ms->window.dictLimit;
1286     U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1287     U32    const isDictionary = (ms->loadedDictEnd != 0);
1288     /* When computing the lowest prefix index we need to take the dictionary into account to handle
1289      * the edge case where the dictionary and the source are contiguous in memory.
1290      */
1291     U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
1292     return matchLowest;
1293 }
1294 
1295 
1296 
1297 /* debug functions */
1298 #if (DEBUGLEVEL>=2)
1299 
ZSTD_fWeight(U32 rawStat)1300 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1301 {
1302     U32 const fp_accuracy = 8;
1303     U32 const fp_multiplier = (1 << fp_accuracy);
1304     U32 const newStat = rawStat + 1;
1305     U32 const hb = ZSTD_highbit32(newStat);
1306     U32 const BWeight = hb * fp_multiplier;
1307     U32 const FWeight = (newStat << fp_accuracy) >> hb;
1308     U32 const weight = BWeight + FWeight;
1309     assert(hb + fp_accuracy < 31);
1310     return (double)weight / fp_multiplier;
1311 }
1312 
1313 /* display a table content,
1314  * listing each element, its frequency, and its predicted bit cost */
ZSTD_debugTable(const U32 * table,U32 max)1315 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1316 {
1317     unsigned u, sum;
1318     for (u=0, sum=0; u<=max; u++) sum += table[u];
1319     DEBUGLOG(2, "total nb elts: %u", sum);
1320     for (u=0; u<=max; u++) {
1321         DEBUGLOG(2, "%2u: %5u  (%.2f)",
1322                 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1323     }
1324 }
1325 
1326 #endif
1327 
1328 
1329 #if defined (__cplusplus)
1330 }
1331 #endif
1332 
1333 /* ===============================================================
1334  * Shared internal declarations
1335  * These prototypes may be called from sources not in lib/compress
1336  * =============================================================== */
1337 
1338 /* ZSTD_loadCEntropy() :
1339  * dict : must point at beginning of a valid zstd dictionary.
1340  * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1341  * assumptions : magic number supposed already checked
1342  *               and dictSize >= 8 */
1343 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1344                          const void* const dict, size_t dictSize);
1345 
1346 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1347 
1348 /* ==============================================================
1349  * Private declarations
1350  * These prototypes shall only be called from within lib/compress
1351  * ============================================================== */
1352 
1353 /* ZSTD_getCParamsFromCCtxParams() :
1354  * cParams are built depending on compressionLevel, src size hints,
1355  * LDM and manually set compression parameters.
1356  * Note: srcSizeHint == 0 means 0!
1357  */
1358 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1359         const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1360 
1361 /*! ZSTD_initCStream_internal() :
1362  *  Private use only. Init streaming operation.
1363  *  expects params to be valid.
1364  *  must receive dict, or cdict, or none, but not both.
1365  *  @return : 0, or an error code */
1366 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1367                      const void* dict, size_t dictSize,
1368                      const ZSTD_CDict* cdict,
1369                      const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1370 
1371 void ZSTD_resetSeqStore(seqStore_t* ssPtr);
1372 
1373 /*! ZSTD_getCParamsFromCDict() :
1374  *  as the name implies */
1375 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1376 
1377 /* ZSTD_compressBegin_advanced_internal() :
1378  * Private use only. To be called from zstdmt_compress.c. */
1379 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1380                                     const void* dict, size_t dictSize,
1381                                     ZSTD_dictContentType_e dictContentType,
1382                                     ZSTD_dictTableLoadMethod_e dtlm,
1383                                     const ZSTD_CDict* cdict,
1384                                     const ZSTD_CCtx_params* params,
1385                                     unsigned long long pledgedSrcSize);
1386 
1387 /* ZSTD_compress_advanced_internal() :
1388  * Private use only. To be called from zstdmt_compress.c. */
1389 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1390                                        void* dst, size_t dstCapacity,
1391                                  const void* src, size_t srcSize,
1392                                  const void* dict,size_t dictSize,
1393                                  const ZSTD_CCtx_params* params);
1394 
1395 
1396 /* ZSTD_writeLastEmptyBlock() :
1397  * output an empty Block with end-of-frame mark to complete a frame
1398  * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1399  *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1400  */
1401 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1402 
1403 
1404 /* ZSTD_referenceExternalSequences() :
1405  * Must be called before starting a compression operation.
1406  * seqs must parse a prefix of the source.
1407  * This cannot be used when long range matching is enabled.
1408  * Zstd will use these sequences, and pass the literals to a secondary block
1409  * compressor.
1410  * @return : An error code on failure.
1411  * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1412  * access and data corruption.
1413  */
1414 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1415 
1416 /** ZSTD_cycleLog() :
1417  *  condition for correct operation : hashLog > 1 */
1418 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1419 
1420 /** ZSTD_CCtx_trace() :
1421  *  Trace the end of a compression call.
1422  */
1423 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1424 
1425 #endif /* ZSTD_COMPRESS_H */
1426