• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* This header contains definitions
12  * that shall **only** be used by modules within lib/compress.
13  */
14 
15 #ifndef ZSTD_COMPRESS_H
16 #define ZSTD_COMPRESS_H
17 
18 /*-*************************************
19 *  Dependencies
20 ***************************************/
21 #include "../common/zstd_internal.h"
22 #include "zstd_cwksp.h"
23 #ifdef ZSTD_MULTITHREAD
24 #  include "zstdmt_compress.h"
25 #endif
26 #include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
27 #include "zstd_preSplit.h" /* ZSTD_SLIPBLOCK_WORKSPACESIZE */
28 
29 /*-*************************************
30 *  Constants
31 ***************************************/
32 #define kSearchStrength      8
33 #define HASH_READ_SIZE       8
34 #define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
35                                        It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
36                                        It's not a big deal though : candidate will just be sorted again.
37                                        Additionally, candidate position 1 will be lost.
38                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
39                                        The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
40                                        This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
41 
42 
43 /*-*************************************
44 *  Context memory management
45 ***************************************/
46 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
47 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
48 
49 typedef struct ZSTD_prefixDict_s {
50     const void* dict;
51     size_t dictSize;
52     ZSTD_dictContentType_e dictContentType;
53 } ZSTD_prefixDict;
54 
55 typedef struct {
56     void* dictBuffer;
57     void const* dict;
58     size_t dictSize;
59     ZSTD_dictContentType_e dictContentType;
60     ZSTD_CDict* cdict;
61 } ZSTD_localDict;
62 
63 typedef struct {
64     HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
65     HUF_repeat repeatMode;
66 } ZSTD_hufCTables_t;
67 
68 typedef struct {
69     FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
70     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
71     FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
72     FSE_repeat offcode_repeatMode;
73     FSE_repeat matchlength_repeatMode;
74     FSE_repeat litlength_repeatMode;
75 } ZSTD_fseCTables_t;
76 
77 typedef struct {
78     ZSTD_hufCTables_t huf;
79     ZSTD_fseCTables_t fse;
80 } ZSTD_entropyCTables_t;
81 
82 /***********************************************
83 *  Sequences *
84 ***********************************************/
85 typedef struct SeqDef_s {
86     U32 offBase;   /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
87     U16 litLength;
88     U16 mlBase;    /* mlBase == matchLength - MINMATCH */
89 } SeqDef;
90 
91 /* Controls whether seqStore has a single "long" litLength or matchLength. See SeqStore_t. */
92 typedef enum {
93     ZSTD_llt_none = 0,             /* no longLengthType */
94     ZSTD_llt_literalLength = 1,    /* represents a long literal */
95     ZSTD_llt_matchLength = 2       /* represents a long match */
96 } ZSTD_longLengthType_e;
97 
98 typedef struct {
99     SeqDef* sequencesStart;
100     SeqDef* sequences;      /* ptr to end of sequences */
101     BYTE*  litStart;
102     BYTE*  lit;             /* ptr to end of literals */
103     BYTE*  llCode;
104     BYTE*  mlCode;
105     BYTE*  ofCode;
106     size_t maxNbSeq;
107     size_t maxNbLit;
108 
109     /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
110      * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
111      * the existing value of the litLength or matchLength by 0x10000.
112      */
113     ZSTD_longLengthType_e longLengthType;
114     U32                   longLengthPos;  /* Index of the sequence to apply long length modification to */
115 } SeqStore_t;
116 
117 typedef struct {
118     U32 litLength;
119     U32 matchLength;
120 } ZSTD_SequenceLength;
121 
122 /**
123  * Returns the ZSTD_SequenceLength for the given sequences. It handles the decoding of long sequences
124  * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
125  */
ZSTD_getSequenceLength(SeqStore_t const * seqStore,SeqDef const * seq)126 MEM_STATIC ZSTD_SequenceLength ZSTD_getSequenceLength(SeqStore_t const* seqStore, SeqDef const* seq)
127 {
128     ZSTD_SequenceLength seqLen;
129     seqLen.litLength = seq->litLength;
130     seqLen.matchLength = seq->mlBase + MINMATCH;
131     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
132         if (seqStore->longLengthType == ZSTD_llt_literalLength) {
133             seqLen.litLength += 0x10000;
134         }
135         if (seqStore->longLengthType == ZSTD_llt_matchLength) {
136             seqLen.matchLength += 0x10000;
137         }
138     }
139     return seqLen;
140 }
141 
142 const SeqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
143 int ZSTD_seqToCodes(const SeqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
144 
145 
146 /***********************************************
147 *  Entropy buffer statistics structs and funcs *
148 ***********************************************/
149 /** ZSTD_hufCTablesMetadata_t :
150  *  Stores Literals Block Type for a super-block in hType, and
151  *  huffman tree description in hufDesBuffer.
152  *  hufDesSize refers to the size of huffman tree description in bytes.
153  *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
154 typedef struct {
155     SymbolEncodingType_e hType;
156     BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
157     size_t hufDesSize;
158 } ZSTD_hufCTablesMetadata_t;
159 
160 /** ZSTD_fseCTablesMetadata_t :
161  *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
162  *  fse tables in fseTablesBuffer.
163  *  fseTablesSize refers to the size of fse tables in bytes.
164  *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
165 typedef struct {
166     SymbolEncodingType_e llType;
167     SymbolEncodingType_e ofType;
168     SymbolEncodingType_e mlType;
169     BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
170     size_t fseTablesSize;
171     size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
172 } ZSTD_fseCTablesMetadata_t;
173 
174 typedef struct {
175     ZSTD_hufCTablesMetadata_t hufMetadata;
176     ZSTD_fseCTablesMetadata_t fseMetadata;
177 } ZSTD_entropyCTablesMetadata_t;
178 
179 /** ZSTD_buildBlockEntropyStats() :
180  *  Builds entropy for the block.
181  *  @return : 0 on success or error code */
182 size_t ZSTD_buildBlockEntropyStats(
183                     const SeqStore_t* seqStorePtr,
184                     const ZSTD_entropyCTables_t* prevEntropy,
185                           ZSTD_entropyCTables_t* nextEntropy,
186                     const ZSTD_CCtx_params* cctxParams,
187                           ZSTD_entropyCTablesMetadata_t* entropyMetadata,
188                           void* workspace, size_t wkspSize);
189 
190 /*********************************
191 *  Compression internals structs *
192 *********************************/
193 
194 typedef struct {
195     U32 off;            /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
196     U32 len;            /* Raw length of match */
197 } ZSTD_match_t;
198 
199 typedef struct {
200     U32 offset;         /* Offset of sequence */
201     U32 litLength;      /* Length of literals prior to match */
202     U32 matchLength;    /* Raw length of match */
203 } rawSeq;
204 
205 typedef struct {
206   rawSeq* seq;          /* The start of the sequences */
207   size_t pos;           /* The index in seq where reading stopped. pos <= size. */
208   size_t posInSequence; /* The position within the sequence at seq[pos] where reading
209                            stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
210   size_t size;          /* The number of sequences. <= capacity. */
211   size_t capacity;      /* The capacity starting from `seq` pointer */
212 } RawSeqStore_t;
213 
214 UNUSED_ATTR static const RawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
215 
216 typedef struct {
217     int price;  /* price from beginning of segment to this position */
218     U32 off;    /* offset of previous match */
219     U32 mlen;   /* length of previous match */
220     U32 litlen; /* nb of literals since previous match */
221     U32 rep[ZSTD_REP_NUM];  /* offset history after previous match */
222 } ZSTD_optimal_t;
223 
224 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
225 
226 #define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
227 typedef struct {
228     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
229     unsigned* litFreq;           /* table of literals statistics, of size 256 */
230     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
231     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
232     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
233     ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_SIZE */
234     ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */
235 
236     U32  litSum;                 /* nb of literals */
237     U32  litLengthSum;           /* nb of litLength codes */
238     U32  matchLengthSum;         /* nb of matchLength codes */
239     U32  offCodeSum;             /* nb of offset codes */
240     U32  litSumBasePrice;        /* to compare to log2(litfreq) */
241     U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
242     U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
243     U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
244     ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
245     const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
246     ZSTD_ParamSwitch_e literalCompressionMode;
247 } optState_t;
248 
249 typedef struct {
250   ZSTD_entropyCTables_t entropy;
251   U32 rep[ZSTD_REP_NUM];
252 } ZSTD_compressedBlockState_t;
253 
254 typedef struct {
255     BYTE const* nextSrc;       /* next block here to continue on current prefix */
256     BYTE const* base;          /* All regular indexes relative to this position */
257     BYTE const* dictBase;      /* extDict indexes relative to this position */
258     U32 dictLimit;             /* below that point, need extDict */
259     U32 lowLimit;              /* below that point, no more valid data */
260     U32 nbOverflowCorrections; /* Number of times overflow correction has run since
261                                 * ZSTD_window_init(). Useful for debugging coredumps
262                                 * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
263                                 */
264 } ZSTD_window_t;
265 
266 #define ZSTD_WINDOW_START_INDEX 2
267 
268 typedef struct ZSTD_MatchState_t ZSTD_MatchState_t;
269 
270 #define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
271 
272 struct ZSTD_MatchState_t {
273     ZSTD_window_t window;   /* State for window round buffer management */
274     U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
275                              * When loadedDictEnd != 0, a dictionary is in use, and still valid.
276                              * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
277                              * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
278                              * When dict referential is copied into active context (i.e. not attached),
279                              * loadedDictEnd == dictSize, since referential starts from zero.
280                              */
281     U32 nextToUpdate;       /* index from which to continue table update */
282     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
283 
284     U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
285     BYTE* tagTable;                          /* For row-based matchFinder: A row-based table containing the hashes and head index. */
286     U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
287     U64 hashSalt;                            /* For row-based matchFinder: salts the hash for reuse of tag table */
288     U32 hashSaltEntropy;                     /* For row-based matchFinder: collects entropy for salt generation */
289 
290     U32* hashTable;
291     U32* hashTable3;
292     U32* chainTable;
293 
294     int forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
295 
296     int dedicatedDictSearch;  /* Indicates whether this matchState is using the
297                                * dedicated dictionary search structure.
298                                */
299     optState_t opt;         /* optimal parser state */
300     const ZSTD_MatchState_t* dictMatchState;
301     ZSTD_compressionParameters cParams;
302     const RawSeqStore_t* ldmSeqStore;
303 
304     /* Controls prefetching in some dictMatchState matchfinders.
305      * This behavior is controlled from the cctx ms.
306      * This parameter has no effect in the cdict ms. */
307     int prefetchCDictTables;
308 
309     /* When == 0, lazy match finders insert every position.
310      * When != 0, lazy match finders only insert positions they search.
311      * This allows them to skip much faster over incompressible data,
312      * at a small cost to compression ratio.
313      */
314     int lazySkipping;
315 };
316 
317 typedef struct {
318     ZSTD_compressedBlockState_t* prevCBlock;
319     ZSTD_compressedBlockState_t* nextCBlock;
320     ZSTD_MatchState_t matchState;
321 } ZSTD_blockState_t;
322 
323 typedef struct {
324     U32 offset;
325     U32 checksum;
326 } ldmEntry_t;
327 
328 typedef struct {
329     BYTE const* split;
330     U32 hash;
331     U32 checksum;
332     ldmEntry_t* bucket;
333 } ldmMatchCandidate_t;
334 
335 #define LDM_BATCH_SIZE 64
336 
337 typedef struct {
338     ZSTD_window_t window;   /* State for the window round buffer management */
339     ldmEntry_t* hashTable;
340     U32 loadedDictEnd;
341     BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
342     size_t splitIndices[LDM_BATCH_SIZE];
343     ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
344 } ldmState_t;
345 
346 typedef struct {
347     ZSTD_ParamSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
348     U32 hashLog;            /* Log size of hashTable */
349     U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
350     U32 minMatchLength;     /* Minimum match length */
351     U32 hashRateLog;       /* Log number of entries to skip */
352     U32 windowLog;          /* Window log for the LDM */
353 } ldmParams_t;
354 
355 typedef struct {
356     int collectSequences;
357     ZSTD_Sequence* seqStart;
358     size_t seqIndex;
359     size_t maxSequences;
360 } SeqCollector;
361 
362 struct ZSTD_CCtx_params_s {
363     ZSTD_format_e format;
364     ZSTD_compressionParameters cParams;
365     ZSTD_frameParameters fParams;
366 
367     int compressionLevel;
368     int forceWindow;           /* force back-references to respect limit of
369                                 * 1<<wLog, even for dictionary */
370     size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
371                                 * No target when targetCBlockSize == 0.
372                                 * There is no guarantee on compressed block size */
373     int srcSizeHint;           /* User's best guess of source size.
374                                 * Hint is not valid when srcSizeHint == 0.
375                                 * There is no guarantee that hint is close to actual source size */
376 
377     ZSTD_dictAttachPref_e attachDictPref;
378     ZSTD_ParamSwitch_e literalCompressionMode;
379 
380     /* Multithreading: used to pass parameters to mtctx */
381     int nbWorkers;
382     size_t jobSize;
383     int overlapLog;
384     int rsyncable;
385 
386     /* Long distance matching parameters */
387     ldmParams_t ldmParams;
388 
389     /* Dedicated dict search algorithm trigger */
390     int enableDedicatedDictSearch;
391 
392     /* Input/output buffer modes */
393     ZSTD_bufferMode_e inBufferMode;
394     ZSTD_bufferMode_e outBufferMode;
395 
396     /* Sequence compression API */
397     ZSTD_SequenceFormat_e blockDelimiters;
398     int validateSequences;
399 
400     /* Block splitting
401      * @postBlockSplitter executes split analysis after sequences are produced,
402      * it's more accurate but consumes more resources.
403      * @preBlockSplitter_level splits before knowing sequences,
404      * it's more approximative but also cheaper.
405      * Valid @preBlockSplitter_level values range from 0 to 6 (included).
406      * 0 means auto, 1 means do not split,
407      * then levels are sorted in increasing cpu budget, from 2 (fastest) to 6 (slowest).
408      * Highest @preBlockSplitter_level combines well with @postBlockSplitter.
409      */
410     ZSTD_ParamSwitch_e postBlockSplitter;
411     int preBlockSplitter_level;
412 
413     /* Adjust the max block size*/
414     size_t maxBlockSize;
415 
416     /* Param for deciding whether to use row-based matchfinder */
417     ZSTD_ParamSwitch_e useRowMatchFinder;
418 
419     /* Always load a dictionary in ext-dict mode (not prefix mode)? */
420     int deterministicRefPrefix;
421 
422     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
423     ZSTD_customMem customMem;
424 
425     /* Controls prefetching in some dictMatchState matchfinders */
426     ZSTD_ParamSwitch_e prefetchCDictTables;
427 
428     /* Controls whether zstd will fall back to an internal matchfinder
429      * if the external matchfinder returns an error code. */
430     int enableMatchFinderFallback;
431 
432     /* Parameters for the external sequence producer API.
433      * Users set these parameters through ZSTD_registerSequenceProducer().
434      * It is not possible to set these parameters individually through the public API. */
435     void* extSeqProdState;
436     ZSTD_sequenceProducer_F extSeqProdFunc;
437 
438     /* Controls repcode search in external sequence parsing */
439     ZSTD_ParamSwitch_e searchForExternalRepcodes;
440 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
441 
442 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
443 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
444 #define TMP_WORKSPACE_SIZE (MAX(ENTROPY_WORKSPACE_SIZE, ZSTD_SLIPBLOCK_WORKSPACESIZE))
445 
446 /**
447  * Indicates whether this compression proceeds directly from user-provided
448  * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
449  * whether the context needs to buffer the input/output (ZSTDb_buffered).
450  */
451 typedef enum {
452     ZSTDb_not_buffered,
453     ZSTDb_buffered
454 } ZSTD_buffered_policy_e;
455 
456 /**
457  * Struct that contains all elements of block splitter that should be allocated
458  * in a wksp.
459  */
460 #define ZSTD_MAX_NB_BLOCK_SPLITS 196
461 typedef struct {
462     SeqStore_t fullSeqStoreChunk;
463     SeqStore_t firstHalfSeqStore;
464     SeqStore_t secondHalfSeqStore;
465     SeqStore_t currSeqStore;
466     SeqStore_t nextSeqStore;
467 
468     U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
469     ZSTD_entropyCTablesMetadata_t entropyMetadata;
470 } ZSTD_blockSplitCtx;
471 
472 struct ZSTD_CCtx_s {
473     ZSTD_compressionStage_e stage;
474     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
475     int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
476     ZSTD_CCtx_params requestedParams;
477     ZSTD_CCtx_params appliedParams;
478     ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
479     U32   dictID;
480     size_t dictContentSize;
481 
482     ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
483     size_t blockSizeMax;
484     unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
485     unsigned long long consumedSrcSize;
486     unsigned long long producedCSize;
487     XXH64_state_t xxhState;
488     ZSTD_customMem customMem;
489     ZSTD_threadPool* pool;
490     size_t staticSize;
491     SeqCollector seqCollector;
492     int isFirstBlock;
493     int initialized;
494 
495     SeqStore_t seqStore;      /* sequences storage ptrs */
496     ldmState_t ldmState;      /* long distance matching state */
497     rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
498     size_t maxNbLdmSequences;
499     RawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
500     ZSTD_blockState_t blockState;
501     void* tmpWorkspace;  /* used as substitute of stack space - must be aligned for S64 type */
502     size_t tmpWkspSize;
503 
504     /* Whether we are streaming or not */
505     ZSTD_buffered_policy_e bufferedPolicy;
506 
507     /* streaming */
508     char*  inBuff;
509     size_t inBuffSize;
510     size_t inToCompress;
511     size_t inBuffPos;
512     size_t inBuffTarget;
513     char*  outBuff;
514     size_t outBuffSize;
515     size_t outBuffContentSize;
516     size_t outBuffFlushedSize;
517     ZSTD_cStreamStage streamStage;
518     U32    frameEnded;
519 
520     /* Stable in/out buffer verification */
521     ZSTD_inBuffer expectedInBuffer;
522     size_t stableIn_notConsumed; /* nb bytes within stable input buffer that are said to be consumed but are not */
523     size_t expectedOutBufferSize;
524 
525     /* Dictionary */
526     ZSTD_localDict localDict;
527     const ZSTD_CDict* cdict;
528     ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
529 
530     /* Multi-threading */
531 #ifdef ZSTD_MULTITHREAD
532     ZSTDMT_CCtx* mtctx;
533 #endif
534 
535     /* Tracing */
536 #if ZSTD_TRACE
537     ZSTD_TraceCtx traceCtx;
538 #endif
539 
540     /* Workspace for block splitter */
541     ZSTD_blockSplitCtx blockSplitCtx;
542 
543     /* Buffer for output from external sequence producer */
544     ZSTD_Sequence* extSeqBuf;
545     size_t extSeqBufCapacity;
546 };
547 
548 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
549 typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
550 
551 typedef enum {
552     ZSTD_noDict = 0,
553     ZSTD_extDict = 1,
554     ZSTD_dictMatchState = 2,
555     ZSTD_dedicatedDictSearch = 3
556 } ZSTD_dictMode_e;
557 
558 typedef enum {
559     ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
560                                  * In this mode we use both the srcSize and the dictSize
561                                  * when selecting and adjusting parameters.
562                                  */
563     ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
564                                  * In this mode we only take the srcSize into account when selecting
565                                  * and adjusting parameters.
566                                  */
567     ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
568                                  * In this mode we take both the source size and the dictionary size
569                                  * into account when selecting and adjusting the parameters.
570                                  */
571     ZSTD_cpm_unknown = 3        /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
572                                  * We don't know what these parameters are for. We default to the legacy
573                                  * behavior of taking both the source size and the dict size into account
574                                  * when selecting and adjusting parameters.
575                                  */
576 } ZSTD_CParamMode_e;
577 
578 typedef size_t (*ZSTD_BlockCompressor_f) (
579         ZSTD_MatchState_t* bs, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
580         void const* src, size_t srcSize);
581 ZSTD_BlockCompressor_f ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_ParamSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
582 
583 
ZSTD_LLcode(U32 litLength)584 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
585 {
586     static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
587                                        8,  9, 10, 11, 12, 13, 14, 15,
588                                       16, 16, 17, 17, 18, 18, 19, 19,
589                                       20, 20, 20, 20, 21, 21, 21, 21,
590                                       22, 22, 22, 22, 22, 22, 22, 22,
591                                       23, 23, 23, 23, 23, 23, 23, 23,
592                                       24, 24, 24, 24, 24, 24, 24, 24,
593                                       24, 24, 24, 24, 24, 24, 24, 24 };
594     static const U32 LL_deltaCode = 19;
595     return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
596 }
597 
598 /* ZSTD_MLcode() :
599  * note : mlBase = matchLength - MINMATCH;
600  *        because it's the format it's stored in seqStore->sequences */
ZSTD_MLcode(U32 mlBase)601 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
602 {
603     static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
604                                       16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
605                                       32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
606                                       38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
607                                       40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
608                                       41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
609                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
610                                       42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
611     static const U32 ML_deltaCode = 36;
612     return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
613 }
614 
615 /* ZSTD_cParam_withinBounds:
616  * @return 1 if value is within cParam bounds,
617  * 0 otherwise */
ZSTD_cParam_withinBounds(ZSTD_cParameter cParam,int value)618 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
619 {
620     ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
621     if (ZSTD_isError(bounds.error)) return 0;
622     if (value < bounds.lowerBound) return 0;
623     if (value > bounds.upperBound) return 0;
624     return 1;
625 }
626 
627 /* ZSTD_selectAddr:
628  * @return index >= lowLimit ? candidate : backup,
629  * tries to force branchless codegen. */
630 MEM_STATIC const BYTE*
ZSTD_selectAddr(U32 index,U32 lowLimit,const BYTE * candidate,const BYTE * backup)631 ZSTD_selectAddr(U32 index, U32 lowLimit, const BYTE* candidate, const BYTE* backup)
632 {
633 #if defined(__GNUC__) && defined(__x86_64__)
634     __asm__ (
635         "cmp %1, %2\n"
636         "cmova %3, %0\n"
637         : "+r"(candidate)
638         : "r"(index), "r"(lowLimit), "r"(backup)
639         );
640     return candidate;
641 #else
642     return index >= lowLimit ? candidate : backup;
643 #endif
644 }
645 
646 /* ZSTD_noCompressBlock() :
647  * Writes uncompressed block to dst buffer from given src.
648  * Returns the size of the block */
649 MEM_STATIC size_t
ZSTD_noCompressBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)650 ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
651 {
652     U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
653     DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity);
654     RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
655                     dstSize_tooSmall, "dst buf too small for uncompressed block");
656     MEM_writeLE24(dst, cBlockHeader24);
657     ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
658     return ZSTD_blockHeaderSize + srcSize;
659 }
660 
661 MEM_STATIC size_t
ZSTD_rleCompressBlock(void * dst,size_t dstCapacity,BYTE src,size_t srcSize,U32 lastBlock)662 ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
663 {
664     BYTE* const op = (BYTE*)dst;
665     U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
666     RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
667     MEM_writeLE24(op, cBlockHeader);
668     op[3] = src;
669     return 4;
670 }
671 
672 
673 /* ZSTD_minGain() :
674  * minimum compression required
675  * to generate a compress block or a compressed literals section.
676  * note : use same formula for both situations */
ZSTD_minGain(size_t srcSize,ZSTD_strategy strat)677 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
678 {
679     U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
680     ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
681     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat));
682     return (srcSize >> minlog) + 2;
683 }
684 
ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params * cctxParams)685 MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
686 {
687     switch (cctxParams->literalCompressionMode) {
688     case ZSTD_ps_enable:
689         return 0;
690     case ZSTD_ps_disable:
691         return 1;
692     default:
693         assert(0 /* impossible: pre-validated */);
694         ZSTD_FALLTHROUGH;
695     case ZSTD_ps_auto:
696         return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
697     }
698 }
699 
700 /*! ZSTD_safecopyLiterals() :
701  *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
702  *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
703  *  large copies.
704  */
705 static void
ZSTD_safecopyLiterals(BYTE * op,BYTE const * ip,BYTE const * const iend,BYTE const * ilimit_w)706 ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
707 {
708     assert(iend > ilimit_w);
709     if (ip <= ilimit_w) {
710         ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
711         op += ilimit_w - ip;
712         ip = ilimit_w;
713     }
714     while (ip < iend) *op++ = *ip++;
715 }
716 
717 
718 #define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
719 #define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
720 #define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
721 #define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
722 #define OFFSET_TO_OFFBASE(o)  (assert((o)>0), o + ZSTD_REP_NUM)
723 #define OFFBASE_IS_OFFSET(o)  ((o) > ZSTD_REP_NUM)
724 #define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
725 #define OFFBASE_TO_OFFSET(o)  (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
726 #define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o))  /* returns ID 1,2,3 */
727 
728 /*! ZSTD_storeSeqOnly() :
729  *  Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t.
730  *  Literals themselves are not copied, but @litPtr is updated.
731  *  @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
732  *  @matchLength : must be >= MINMATCH
733 */
734 HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeqOnly(SeqStore_t * seqStorePtr,size_t litLength,U32 offBase,size_t matchLength)735 ZSTD_storeSeqOnly(SeqStore_t* seqStorePtr,
736               size_t litLength,
737               U32 offBase,
738               size_t matchLength)
739 {
740     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
741 
742     /* literal Length */
743     assert(litLength <= ZSTD_BLOCKSIZE_MAX);
744     if (UNLIKELY(litLength>0xFFFF)) {
745         assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
746         seqStorePtr->longLengthType = ZSTD_llt_literalLength;
747         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
748     }
749     seqStorePtr->sequences[0].litLength = (U16)litLength;
750 
751     /* match offset */
752     seqStorePtr->sequences[0].offBase = offBase;
753 
754     /* match Length */
755     assert(matchLength <= ZSTD_BLOCKSIZE_MAX);
756     assert(matchLength >= MINMATCH);
757     {   size_t const mlBase = matchLength - MINMATCH;
758         if (UNLIKELY(mlBase>0xFFFF)) {
759             assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
760             seqStorePtr->longLengthType = ZSTD_llt_matchLength;
761             seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
762         }
763         seqStorePtr->sequences[0].mlBase = (U16)mlBase;
764     }
765 
766     seqStorePtr->sequences++;
767 }
768 
769 /*! ZSTD_storeSeq() :
770  *  Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t.
771  *  @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
772  *  @matchLength : must be >= MINMATCH
773  *  Allowed to over-read literals up to litLimit.
774 */
775 HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeq(SeqStore_t * seqStorePtr,size_t litLength,const BYTE * literals,const BYTE * litLimit,U32 offBase,size_t matchLength)776 ZSTD_storeSeq(SeqStore_t* seqStorePtr,
777               size_t litLength, const BYTE* literals, const BYTE* litLimit,
778               U32 offBase,
779               size_t matchLength)
780 {
781     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
782     BYTE const* const litEnd = literals + litLength;
783 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
784     static const BYTE* g_start = NULL;
785     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
786     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
787         DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
788                pos, (U32)litLength, (U32)matchLength, (U32)offBase);
789     }
790 #endif
791     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
792     /* copy Literals */
793     assert(seqStorePtr->maxNbLit <= 128 KB);
794     assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
795     assert(literals + litLength <= litLimit);
796     if (litEnd <= litLimit_w) {
797         /* Common case we can use wildcopy.
798          * First copy 16 bytes, because literals are likely short.
799          */
800         ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16);
801         ZSTD_copy16(seqStorePtr->lit, literals);
802         if (litLength > 16) {
803             ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
804         }
805     } else {
806         ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
807     }
808     seqStorePtr->lit += litLength;
809 
810     ZSTD_storeSeqOnly(seqStorePtr, litLength, offBase, matchLength);
811 }
812 
813 /* ZSTD_updateRep() :
814  * updates in-place @rep (array of repeat offsets)
815  * @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
816  */
817 MEM_STATIC void
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM],U32 const offBase,U32 const ll0)818 ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
819 {
820     if (OFFBASE_IS_OFFSET(offBase)) {  /* full offset */
821         rep[2] = rep[1];
822         rep[1] = rep[0];
823         rep[0] = OFFBASE_TO_OFFSET(offBase);
824     } else {   /* repcode */
825         U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0;
826         if (repCode > 0) {  /* note : if repCode==0, no change */
827             U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
828             rep[2] = (repCode >= 2) ? rep[1] : rep[2];
829             rep[1] = rep[0];
830             rep[0] = currentOffset;
831         } else {   /* repCode == 0 */
832             /* nothing to do */
833         }
834     }
835 }
836 
837 typedef struct repcodes_s {
838     U32 rep[3];
839 } Repcodes_t;
840 
841 MEM_STATIC Repcodes_t
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM],U32 const offBase,U32 const ll0)842 ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
843 {
844     Repcodes_t newReps;
845     ZSTD_memcpy(&newReps, rep, sizeof(newReps));
846     ZSTD_updateRep(newReps.rep, offBase, ll0);
847     return newReps;
848 }
849 
850 
851 /*-*************************************
852 *  Match length counter
853 ***************************************/
ZSTD_count(const BYTE * pIn,const BYTE * pMatch,const BYTE * const pInLimit)854 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
855 {
856     const BYTE* const pStart = pIn;
857     const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
858 
859     if (pIn < pInLoopLimit) {
860         { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
861           if (diff) return ZSTD_NbCommonBytes(diff); }
862         pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
863         while (pIn < pInLoopLimit) {
864             size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
865             if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
866             pIn += ZSTD_NbCommonBytes(diff);
867             return (size_t)(pIn - pStart);
868     }   }
869     if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
870     if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
871     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
872     return (size_t)(pIn - pStart);
873 }
874 
875 /** ZSTD_count_2segments() :
876  *  can count match length with `ip` & `match` in 2 different segments.
877  *  convention : on reaching mEnd, match count continue starting from iStart
878  */
879 MEM_STATIC size_t
ZSTD_count_2segments(const BYTE * ip,const BYTE * match,const BYTE * iEnd,const BYTE * mEnd,const BYTE * iStart)880 ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
881                      const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
882 {
883     const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
884     size_t const matchLength = ZSTD_count(ip, match, vEnd);
885     if (match + matchLength != mEnd) return matchLength;
886     DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
887     DEBUGLOG(7, "distance from match beginning to end dictionary = %i", (int)(mEnd - match));
888     DEBUGLOG(7, "distance from current pos to end buffer = %i", (int)(iEnd - ip));
889     DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
890     DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
891     return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
892 }
893 
894 
895 /*-*************************************
896  *  Hashes
897  ***************************************/
898 static const U32 prime3bytes = 506832829U;
ZSTD_hash3(U32 u,U32 h,U32 s)899 static U32    ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= 32); return (((u << (32-24)) * prime3bytes) ^ s)  >> (32-h) ; }
ZSTD_hash3Ptr(const void * ptr,U32 h)900 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */
ZSTD_hash3PtrS(const void * ptr,U32 h,U32 s)901 MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); }
902 
903 static const U32 prime4bytes = 2654435761U;
ZSTD_hash4(U32 u,U32 h,U32 s)904 static U32    ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; }
ZSTD_hash4Ptr(const void * ptr,U32 h)905 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); }
ZSTD_hash4PtrS(const void * ptr,U32 h,U32 s)906 static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); }
907 
908 static const U64 prime5bytes = 889523592379ULL;
ZSTD_hash5(U64 u,U32 h,U64 s)909 static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; }
ZSTD_hash5Ptr(const void * p,U32 h)910 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); }
ZSTD_hash5PtrS(const void * p,U32 h,U64 s)911 static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); }
912 
913 static const U64 prime6bytes = 227718039650203ULL;
ZSTD_hash6(U64 u,U32 h,U64 s)914 static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; }
ZSTD_hash6Ptr(const void * p,U32 h)915 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); }
ZSTD_hash6PtrS(const void * p,U32 h,U64 s)916 static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); }
917 
918 static const U64 prime7bytes = 58295818150454627ULL;
ZSTD_hash7(U64 u,U32 h,U64 s)919 static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u  << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; }
ZSTD_hash7Ptr(const void * p,U32 h)920 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); }
ZSTD_hash7PtrS(const void * p,U32 h,U64 s)921 static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); }
922 
923 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
ZSTD_hash8(U64 u,U32 h,U64 s)924 static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes)  ^ s) >> (64-h)) ; }
ZSTD_hash8Ptr(const void * p,U32 h)925 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); }
ZSTD_hash8PtrS(const void * p,U32 h,U64 s)926 static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); }
927 
928 
929 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtr(const void * p,U32 hBits,U32 mls)930 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
931 {
932     /* Although some of these hashes do support hBits up to 64, some do not.
933      * To be on the safe side, always avoid hBits > 32. */
934     assert(hBits <= 32);
935 
936     switch(mls)
937     {
938     default:
939     case 4: return ZSTD_hash4Ptr(p, hBits);
940     case 5: return ZSTD_hash5Ptr(p, hBits);
941     case 6: return ZSTD_hash6Ptr(p, hBits);
942     case 7: return ZSTD_hash7Ptr(p, hBits);
943     case 8: return ZSTD_hash8Ptr(p, hBits);
944     }
945 }
946 
947 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtrSalted(const void * p,U32 hBits,U32 mls,const U64 hashSalt)948 size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
949     /* Although some of these hashes do support hBits up to 64, some do not.
950      * To be on the safe side, always avoid hBits > 32. */
951     assert(hBits <= 32);
952 
953     switch(mls)
954     {
955         default:
956         case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt);
957         case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt);
958         case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt);
959         case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt);
960         case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt);
961     }
962 }
963 
964 
965 /** ZSTD_ipow() :
966  * Return base^exponent.
967  */
ZSTD_ipow(U64 base,U64 exponent)968 static U64 ZSTD_ipow(U64 base, U64 exponent)
969 {
970     U64 power = 1;
971     while (exponent) {
972       if (exponent & 1) power *= base;
973       exponent >>= 1;
974       base *= base;
975     }
976     return power;
977 }
978 
979 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
980 
981 /** ZSTD_rollingHash_append() :
982  * Add the buffer to the hash value.
983  */
ZSTD_rollingHash_append(U64 hash,void const * buf,size_t size)984 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
985 {
986     BYTE const* istart = (BYTE const*)buf;
987     size_t pos;
988     for (pos = 0; pos < size; ++pos) {
989         hash *= prime8bytes;
990         hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
991     }
992     return hash;
993 }
994 
995 /** ZSTD_rollingHash_compute() :
996  * Compute the rolling hash value of the buffer.
997  */
ZSTD_rollingHash_compute(void const * buf,size_t size)998 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
999 {
1000     return ZSTD_rollingHash_append(0, buf, size);
1001 }
1002 
1003 /** ZSTD_rollingHash_primePower() :
1004  * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
1005  * over a window of length bytes.
1006  */
ZSTD_rollingHash_primePower(U32 length)1007 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
1008 {
1009     return ZSTD_ipow(prime8bytes, length - 1);
1010 }
1011 
1012 /** ZSTD_rollingHash_rotate() :
1013  * Rotate the rolling hash by one byte.
1014  */
ZSTD_rollingHash_rotate(U64 hash,BYTE toRemove,BYTE toAdd,U64 primePower)1015 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
1016 {
1017     hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
1018     hash *= prime8bytes;
1019     hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
1020     return hash;
1021 }
1022 
1023 /*-*************************************
1024 *  Round buffer management
1025 ***************************************/
1026 /* Max @current value allowed:
1027  * In 32-bit mode: we want to avoid crossing the 2 GB limit,
1028  *                 reducing risks of side effects in case of signed operations on indexes.
1029  * In 64-bit mode: we want to ensure that adding the maximum job size (512 MB)
1030  *                 doesn't overflow U32 index capacity (4 GB) */
1031 #define ZSTD_CURRENT_MAX (MEM_64bits() ? 3500U MB : 2000U MB)
1032 /* Maximum chunk size before overflow correction needs to be called again */
1033 #define ZSTD_CHUNKSIZE_MAX                                                     \
1034     ( ((U32)-1)                  /* Maximum ending current index */            \
1035     - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
1036 
1037 /**
1038  * ZSTD_window_clear():
1039  * Clears the window containing the history by simply setting it to empty.
1040  */
ZSTD_window_clear(ZSTD_window_t * window)1041 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
1042 {
1043     size_t const endT = (size_t)(window->nextSrc - window->base);
1044     U32 const end = (U32)endT;
1045 
1046     window->lowLimit = end;
1047     window->dictLimit = end;
1048 }
1049 
ZSTD_window_isEmpty(ZSTD_window_t const window)1050 MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
1051 {
1052     return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
1053            window.lowLimit == ZSTD_WINDOW_START_INDEX &&
1054            (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
1055 }
1056 
1057 /**
1058  * ZSTD_window_hasExtDict():
1059  * Returns non-zero if the window has a non-empty extDict.
1060  */
ZSTD_window_hasExtDict(ZSTD_window_t const window)1061 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
1062 {
1063     return window.lowLimit < window.dictLimit;
1064 }
1065 
1066 /**
1067  * ZSTD_matchState_dictMode():
1068  * Inspects the provided matchState and figures out what dictMode should be
1069  * passed to the compressor.
1070  */
ZSTD_matchState_dictMode(const ZSTD_MatchState_t * ms)1071 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_MatchState_t *ms)
1072 {
1073     return ZSTD_window_hasExtDict(ms->window) ?
1074         ZSTD_extDict :
1075         ms->dictMatchState != NULL ?
1076             (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
1077             ZSTD_noDict;
1078 }
1079 
1080 /* Defining this macro to non-zero tells zstd to run the overflow correction
1081  * code much more frequently. This is very inefficient, and should only be
1082  * used for tests and fuzzers.
1083  */
1084 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
1085 #  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1086 #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
1087 #  else
1088 #    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
1089 #  endif
1090 #endif
1091 
1092 /**
1093  * ZSTD_window_canOverflowCorrect():
1094  * Returns non-zero if the indices are large enough for overflow correction
1095  * to work correctly without impacting compression ratio.
1096  */
ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src)1097 MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
1098                                               U32 cycleLog,
1099                                               U32 maxDist,
1100                                               U32 loadedDictEnd,
1101                                               void const* src)
1102 {
1103     U32 const cycleSize = 1u << cycleLog;
1104     U32 const curr = (U32)((BYTE const*)src - window.base);
1105     U32 const minIndexToOverflowCorrect = cycleSize
1106                                         + MAX(maxDist, cycleSize)
1107                                         + ZSTD_WINDOW_START_INDEX;
1108 
1109     /* Adjust the min index to backoff the overflow correction frequency,
1110      * so we don't waste too much CPU in overflow correction. If this
1111      * computation overflows we don't really care, we just need to make
1112      * sure it is at least minIndexToOverflowCorrect.
1113      */
1114     U32 const adjustment = window.nbOverflowCorrections + 1;
1115     U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
1116                                   minIndexToOverflowCorrect);
1117     U32 const indexLargeEnough = curr > adjustedIndex;
1118 
1119     /* Only overflow correct early if the dictionary is invalidated already,
1120      * so we don't hurt compression ratio.
1121      */
1122     U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
1123 
1124     return indexLargeEnough && dictionaryInvalidated;
1125 }
1126 
1127 /**
1128  * ZSTD_window_needOverflowCorrection():
1129  * Returns non-zero if the indices are getting too large and need overflow
1130  * protection.
1131  */
ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src,void const * srcEnd)1132 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
1133                                                   U32 cycleLog,
1134                                                   U32 maxDist,
1135                                                   U32 loadedDictEnd,
1136                                                   void const* src,
1137                                                   void const* srcEnd)
1138 {
1139     U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
1140     if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1141         if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
1142             return 1;
1143         }
1144     }
1145     return curr > ZSTD_CURRENT_MAX;
1146 }
1147 
1148 /**
1149  * ZSTD_window_correctOverflow():
1150  * Reduces the indices to protect from index overflow.
1151  * Returns the correction made to the indices, which must be applied to every
1152  * stored index.
1153  *
1154  * The least significant cycleLog bits of the indices must remain the same,
1155  * which may be 0. Every index up to maxDist in the past must be valid.
1156  */
1157 MEM_STATIC
1158 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_window_correctOverflow(ZSTD_window_t * window,U32 cycleLog,U32 maxDist,void const * src)1159 U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1160                                            U32 maxDist, void const* src)
1161 {
1162     /* preemptive overflow correction:
1163      * 1. correction is large enough:
1164      *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
1165      *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
1166      *
1167      *    current - newCurrent
1168      *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1169      *    > (3<<29) - (1<<chainLog)
1170      *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
1171      *    > 1<<29
1172      *
1173      * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1174      *    After correction, current is less than (1<<chainLog + 1<<windowLog).
1175      *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1176      *    In 32-bit mode we are safe, because (chainLog <= 29), so
1177      *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1178      * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1179      *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1180      */
1181     U32 const cycleSize = 1u << cycleLog;
1182     U32 const cycleMask = cycleSize - 1;
1183     U32 const curr = (U32)((BYTE const*)src - window->base);
1184     U32 const currentCycle = curr & cycleMask;
1185     /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1186     U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1187                                      ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1188                                      : 0;
1189     U32 const newCurrent = currentCycle
1190                          + currentCycleCorrection
1191                          + MAX(maxDist, cycleSize);
1192     U32 const correction = curr - newCurrent;
1193     /* maxDist must be a power of two so that:
1194      *   (newCurrent & cycleMask) == (curr & cycleMask)
1195      * This is required to not corrupt the chains / binary tree.
1196      */
1197     assert((maxDist & (maxDist - 1)) == 0);
1198     assert((curr & cycleMask) == (newCurrent & cycleMask));
1199     assert(curr > newCurrent);
1200     if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1201         /* Loose bound, should be around 1<<29 (see above) */
1202         assert(correction > 1<<28);
1203     }
1204 
1205     window->base += correction;
1206     window->dictBase += correction;
1207     if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1208         window->lowLimit = ZSTD_WINDOW_START_INDEX;
1209     } else {
1210         window->lowLimit -= correction;
1211     }
1212     if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1213         window->dictLimit = ZSTD_WINDOW_START_INDEX;
1214     } else {
1215         window->dictLimit -= correction;
1216     }
1217 
1218     /* Ensure we can still reference the full window. */
1219     assert(newCurrent >= maxDist);
1220     assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1221     /* Ensure that lowLimit and dictLimit didn't underflow. */
1222     assert(window->lowLimit <= newCurrent);
1223     assert(window->dictLimit <= newCurrent);
1224 
1225     ++window->nbOverflowCorrections;
1226 
1227     DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1228              window->lowLimit);
1229     return correction;
1230 }
1231 
1232 /**
1233  * ZSTD_window_enforceMaxDist():
1234  * Updates lowLimit so that:
1235  *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1236  *
1237  * It ensures index is valid as long as index >= lowLimit.
1238  * This must be called before a block compression call.
1239  *
1240  * loadedDictEnd is only defined if a dictionary is in use for current compression.
1241  * As the name implies, loadedDictEnd represents the index at end of dictionary.
1242  * The value lies within context's referential, it can be directly compared to blockEndIdx.
1243  *
1244  * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1245  * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1246  * This is because dictionaries are allowed to be referenced fully
1247  * as long as the last byte of the dictionary is in the window.
1248  * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1249  *
1250  * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1251  * In dictMatchState mode, lowLimit and dictLimit are the same,
1252  * and the dictionary is below them.
1253  * forceWindow and dictMatchState are therefore incompatible.
1254  */
1255 MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_MatchState_t ** dictMatchStatePtr)1256 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1257                      const void* blockEnd,
1258                            U32   maxDist,
1259                            U32*  loadedDictEndPtr,
1260                      const ZSTD_MatchState_t** dictMatchStatePtr)
1261 {
1262     U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1263     U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1264     DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1265                 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1266 
1267     /* - When there is no dictionary : loadedDictEnd == 0.
1268          In which case, the test (blockEndIdx > maxDist) is merely to avoid
1269          overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1270        - When there is a standard dictionary :
1271          Index referential is copied from the dictionary,
1272          which means it starts from 0.
1273          In which case, loadedDictEnd == dictSize,
1274          and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1275          since `blockEndIdx` also starts from zero.
1276        - When there is an attached dictionary :
1277          loadedDictEnd is expressed within the referential of the context,
1278          so it can be directly compared against blockEndIdx.
1279     */
1280     if (blockEndIdx > maxDist + loadedDictEnd) {
1281         U32 const newLowLimit = blockEndIdx - maxDist;
1282         if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1283         if (window->dictLimit < window->lowLimit) {
1284             DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1285                         (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1286             window->dictLimit = window->lowLimit;
1287         }
1288         /* On reaching window size, dictionaries are invalidated */
1289         if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1290         if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1291     }
1292 }
1293 
1294 /* Similar to ZSTD_window_enforceMaxDist(),
1295  * but only invalidates dictionary
1296  * when input progresses beyond window size.
1297  * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1298  *              loadedDictEnd uses same referential as window->base
1299  *              maxDist is the window size */
1300 MEM_STATIC void
ZSTD_checkDictValidity(const ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_MatchState_t ** dictMatchStatePtr)1301 ZSTD_checkDictValidity(const ZSTD_window_t* window,
1302                        const void* blockEnd,
1303                              U32   maxDist,
1304                              U32*  loadedDictEndPtr,
1305                        const ZSTD_MatchState_t** dictMatchStatePtr)
1306 {
1307     assert(loadedDictEndPtr != NULL);
1308     assert(dictMatchStatePtr != NULL);
1309     {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1310         U32 const loadedDictEnd = *loadedDictEndPtr;
1311         DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1312                     (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1313         assert(blockEndIdx >= loadedDictEnd);
1314 
1315         if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) {
1316             /* On reaching window size, dictionaries are invalidated.
1317              * For simplification, if window size is reached anywhere within next block,
1318              * the dictionary is invalidated for the full block.
1319              *
1320              * We also have to invalidate the dictionary if ZSTD_window_update() has detected
1321              * non-contiguous segments, which means that loadedDictEnd != window->dictLimit.
1322              * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use
1323              * dictMatchState, so setting it to NULL is not a problem.
1324              */
1325             DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1326             *loadedDictEndPtr = 0;
1327             *dictMatchStatePtr = NULL;
1328         } else {
1329             if (*loadedDictEndPtr != 0) {
1330                 DEBUGLOG(6, "dictionary considered valid for current block");
1331     }   }   }
1332 }
1333 
ZSTD_window_init(ZSTD_window_t * window)1334 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1335     ZSTD_memset(window, 0, sizeof(*window));
1336     window->base = (BYTE const*)" ";
1337     window->dictBase = (BYTE const*)" ";
1338     ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1339     window->dictLimit = ZSTD_WINDOW_START_INDEX;    /* start from >0, so that 1st position is valid */
1340     window->lowLimit = ZSTD_WINDOW_START_INDEX;     /* it ensures first and later CCtx usages compress the same */
1341     window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX;   /* see issue #1241 */
1342     window->nbOverflowCorrections = 0;
1343 }
1344 
1345 /**
1346  * ZSTD_window_update():
1347  * Updates the window by appending [src, src + srcSize) to the window.
1348  * If it is not contiguous, the current prefix becomes the extDict, and we
1349  * forget about the extDict. Handles overlap of the prefix and extDict.
1350  * Returns non-zero if the segment is contiguous.
1351  */
1352 MEM_STATIC
1353 ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
ZSTD_window_update(ZSTD_window_t * window,const void * src,size_t srcSize,int forceNonContiguous)1354 U32 ZSTD_window_update(ZSTD_window_t* window,
1355                  const void* src, size_t srcSize,
1356                        int forceNonContiguous)
1357 {
1358     BYTE const* const ip = (BYTE const*)src;
1359     U32 contiguous = 1;
1360     DEBUGLOG(5, "ZSTD_window_update");
1361     if (srcSize == 0)
1362         return contiguous;
1363     assert(window->base != NULL);
1364     assert(window->dictBase != NULL);
1365     /* Check if blocks follow each other */
1366     if (src != window->nextSrc || forceNonContiguous) {
1367         /* not contiguous */
1368         size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1369         DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1370         window->lowLimit = window->dictLimit;
1371         assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
1372         window->dictLimit = (U32)distanceFromBase;
1373         window->dictBase = window->base;
1374         window->base = ip - distanceFromBase;
1375         /* ms->nextToUpdate = window->dictLimit; */
1376         if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
1377         contiguous = 0;
1378     }
1379     window->nextSrc = ip + srcSize;
1380     /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1381     if ( (ip+srcSize > window->dictBase + window->lowLimit)
1382        & (ip < window->dictBase + window->dictLimit)) {
1383         size_t const highInputIdx = (size_t)((ip + srcSize) - window->dictBase);
1384         U32 const lowLimitMax = (highInputIdx > (size_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1385         assert(highInputIdx < UINT_MAX);
1386         window->lowLimit = lowLimitMax;
1387         DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1388     }
1389     return contiguous;
1390 }
1391 
1392 /**
1393  * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1394  */
ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t * ms,U32 curr,unsigned windowLog)1395 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog)
1396 {
1397     U32 const maxDistance = 1U << windowLog;
1398     U32 const lowestValid = ms->window.lowLimit;
1399     U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1400     U32 const isDictionary = (ms->loadedDictEnd != 0);
1401     /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1402      * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1403      * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1404      */
1405     U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1406     return matchLowest;
1407 }
1408 
1409 /**
1410  * Returns the lowest allowed match index in the prefix.
1411  */
ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t * ms,U32 curr,unsigned windowLog)1412 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog)
1413 {
1414     U32    const maxDistance = 1U << windowLog;
1415     U32    const lowestValid = ms->window.dictLimit;
1416     U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1417     U32    const isDictionary = (ms->loadedDictEnd != 0);
1418     /* When computing the lowest prefix index we need to take the dictionary into account to handle
1419      * the edge case where the dictionary and the source are contiguous in memory.
1420      */
1421     U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
1422     return matchLowest;
1423 }
1424 
1425 /* index_safety_check:
1426  * intentional underflow : ensure repIndex isn't overlapping dict + prefix
1427  * @return 1 if values are not overlapping,
1428  * 0 otherwise */
ZSTD_index_overlap_check(const U32 prefixLowestIndex,const U32 repIndex)1429 MEM_STATIC int ZSTD_index_overlap_check(const U32 prefixLowestIndex, const U32 repIndex) {
1430     return ((U32)((prefixLowestIndex-1)  - repIndex) >= 3);
1431 }
1432 
1433 
1434 /* debug functions */
1435 #if (DEBUGLEVEL>=2)
1436 
ZSTD_fWeight(U32 rawStat)1437 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1438 {
1439     U32 const fp_accuracy = 8;
1440     U32 const fp_multiplier = (1 << fp_accuracy);
1441     U32 const newStat = rawStat + 1;
1442     U32 const hb = ZSTD_highbit32(newStat);
1443     U32 const BWeight = hb * fp_multiplier;
1444     U32 const FWeight = (newStat << fp_accuracy) >> hb;
1445     U32 const weight = BWeight + FWeight;
1446     assert(hb + fp_accuracy < 31);
1447     return (double)weight / fp_multiplier;
1448 }
1449 
1450 /* display a table content,
1451  * listing each element, its frequency, and its predicted bit cost */
ZSTD_debugTable(const U32 * table,U32 max)1452 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1453 {
1454     unsigned u, sum;
1455     for (u=0, sum=0; u<=max; u++) sum += table[u];
1456     DEBUGLOG(2, "total nb elts: %u", sum);
1457     for (u=0; u<=max; u++) {
1458         DEBUGLOG(2, "%2u: %5u  (%.2f)",
1459                 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1460     }
1461 }
1462 
1463 #endif
1464 
1465 /* Short Cache */
1466 
1467 /* Normally, zstd matchfinders follow this flow:
1468  *     1. Compute hash at ip
1469  *     2. Load index from hashTable[hash]
1470  *     3. Check if *ip == *(base + index)
1471  * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
1472  *
1473  * Short cache is an optimization which allows us to avoid step 3 most of the time
1474  * when the data doesn't actually match. With short cache, the flow becomes:
1475  *     1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
1476  *     2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
1477  *     3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue.
1478  *
1479  * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
1480  * dictMatchState matchfinders.
1481  */
1482 #define ZSTD_SHORT_CACHE_TAG_BITS 8
1483 #define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
1484 
1485 /* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.
1486  * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
ZSTD_writeTaggedIndex(U32 * const hashTable,size_t hashAndTag,U32 index)1487 MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
1488     size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
1489     U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
1490     assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0);
1491     hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag;
1492 }
1493 
1494 /* Helper function for short cache matchfinders.
1495  * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
ZSTD_comparePackedTags(size_t packedTag1,size_t packedTag2)1496 MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
1497     U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
1498     U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
1499     return tag1 == tag2;
1500 }
1501 
1502 /* ===============================================================
1503  * Shared internal declarations
1504  * These prototypes may be called from sources not in lib/compress
1505  * =============================================================== */
1506 
1507 /* ZSTD_loadCEntropy() :
1508  * dict : must point at beginning of a valid zstd dictionary.
1509  * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1510  * assumptions : magic number supposed already checked
1511  *               and dictSize >= 8 */
1512 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1513                          const void* const dict, size_t dictSize);
1514 
1515 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1516 
1517 typedef struct {
1518     U32 idx;            /* Index in array of ZSTD_Sequence */
1519     U32 posInSequence;  /* Position within sequence at idx */
1520     size_t posInSrc;    /* Number of bytes given by sequences provided so far */
1521 } ZSTD_SequencePosition;
1522 
1523 /* for benchmark */
1524 size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx,
1525                         const ZSTD_Sequence* const inSeqs, size_t nbSequences,
1526                         int const repcodeResolution);
1527 
1528 typedef struct {
1529     size_t nbSequences;
1530     size_t blockSize;
1531     size_t litSize;
1532 } BlockSummary;
1533 
1534 BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs);
1535 
1536 /* ==============================================================
1537  * Private declarations
1538  * These prototypes shall only be called from within lib/compress
1539  * ============================================================== */
1540 
1541 /* ZSTD_getCParamsFromCCtxParams() :
1542  * cParams are built depending on compressionLevel, src size hints,
1543  * LDM and manually set compression parameters.
1544  * Note: srcSizeHint == 0 means 0!
1545  */
1546 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1547         const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode);
1548 
1549 /*! ZSTD_initCStream_internal() :
1550  *  Private use only. Init streaming operation.
1551  *  expects params to be valid.
1552  *  must receive dict, or cdict, or none, but not both.
1553  *  @return : 0, or an error code */
1554 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1555                      const void* dict, size_t dictSize,
1556                      const ZSTD_CDict* cdict,
1557                      const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1558 
1559 void ZSTD_resetSeqStore(SeqStore_t* ssPtr);
1560 
1561 /*! ZSTD_getCParamsFromCDict() :
1562  *  as the name implies */
1563 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1564 
1565 /* ZSTD_compressBegin_advanced_internal() :
1566  * Private use only. To be called from zstdmt_compress.c. */
1567 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1568                                     const void* dict, size_t dictSize,
1569                                     ZSTD_dictContentType_e dictContentType,
1570                                     ZSTD_dictTableLoadMethod_e dtlm,
1571                                     const ZSTD_CDict* cdict,
1572                                     const ZSTD_CCtx_params* params,
1573                                     unsigned long long pledgedSrcSize);
1574 
1575 /* ZSTD_compress_advanced_internal() :
1576  * Private use only. To be called from zstdmt_compress.c. */
1577 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1578                                        void* dst, size_t dstCapacity,
1579                                  const void* src, size_t srcSize,
1580                                  const void* dict,size_t dictSize,
1581                                  const ZSTD_CCtx_params* params);
1582 
1583 
1584 /* ZSTD_writeLastEmptyBlock() :
1585  * output an empty Block with end-of-frame mark to complete a frame
1586  * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1587  *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1588  */
1589 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1590 
1591 
1592 /* ZSTD_referenceExternalSequences() :
1593  * Must be called before starting a compression operation.
1594  * seqs must parse a prefix of the source.
1595  * This cannot be used when long range matching is enabled.
1596  * Zstd will use these sequences, and pass the literals to a secondary block
1597  * compressor.
1598  * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1599  * access and data corruption.
1600  */
1601 void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1602 
1603 /** ZSTD_cycleLog() :
1604  *  condition for correct operation : hashLog > 1 */
1605 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1606 
1607 /** ZSTD_CCtx_trace() :
1608  *  Trace the end of a compression call.
1609  */
1610 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1611 
1612 /* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
ZSTD_hasExtSeqProd(const ZSTD_CCtx_params * params)1613 MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
1614     return params->extSeqProdFunc != NULL;
1615 }
1616 
1617 /* ===============================================================
1618  * Deprecated definitions that are still used internally to avoid
1619  * deprecation warnings. These functions are exactly equivalent to
1620  * their public variants, but avoid the deprecation warnings.
1621  * =============================================================== */
1622 
1623 size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
1624 
1625 size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
1626                                     void* dst, size_t dstCapacity,
1627                               const void* src, size_t srcSize);
1628 
1629 size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
1630                                void* dst, size_t dstCapacity,
1631                          const void* src, size_t srcSize);
1632 
1633 size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
1634 
1635 
1636 #endif /* ZSTD_COMPRESS_H */
1637