1 /*
2 * Copyright (c) Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /* This header contains definitions
12 * that shall **only** be used by modules within lib/compress.
13 */
14
15 #ifndef ZSTD_COMPRESS_H
16 #define ZSTD_COMPRESS_H
17
18 /*-*************************************
19 * Dependencies
20 ***************************************/
21 #include "../common/zstd_internal.h"
22 #include "zstd_cwksp.h"
23 #ifdef ZSTD_MULTITHREAD
24 # include "zstdmt_compress.h"
25 #endif
26
27 #if defined (__cplusplus)
28 extern "C" {
29 #endif
30
31 /*-*************************************
32 * Constants
33 ***************************************/
34 #define kSearchStrength 8
35 #define HASH_READ_SIZE 8
36 #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
37 It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
38 It's not a big deal though : candidate will just be sorted again.
39 Additionally, candidate position 1 will be lost.
40 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
41 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
42 This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
43
44
45 /*-*************************************
46 * Context memory management
47 ***************************************/
48 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
49 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
50
51 typedef struct ZSTD_prefixDict_s {
52 const void* dict;
53 size_t dictSize;
54 ZSTD_dictContentType_e dictContentType;
55 } ZSTD_prefixDict;
56
57 typedef struct {
58 void* dictBuffer;
59 void const* dict;
60 size_t dictSize;
61 ZSTD_dictContentType_e dictContentType;
62 ZSTD_CDict* cdict;
63 } ZSTD_localDict;
64
65 typedef struct {
66 HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
67 HUF_repeat repeatMode;
68 } ZSTD_hufCTables_t;
69
70 typedef struct {
71 FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
72 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
73 FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
74 FSE_repeat offcode_repeatMode;
75 FSE_repeat matchlength_repeatMode;
76 FSE_repeat litlength_repeatMode;
77 } ZSTD_fseCTables_t;
78
79 typedef struct {
80 ZSTD_hufCTables_t huf;
81 ZSTD_fseCTables_t fse;
82 } ZSTD_entropyCTables_t;
83
84 /***********************************************
85 * Entropy buffer statistics structs and funcs *
86 ***********************************************/
87 /** ZSTD_hufCTablesMetadata_t :
88 * Stores Literals Block Type for a super-block in hType, and
89 * huffman tree description in hufDesBuffer.
90 * hufDesSize refers to the size of huffman tree description in bytes.
91 * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
92 typedef struct {
93 symbolEncodingType_e hType;
94 BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
95 size_t hufDesSize;
96 } ZSTD_hufCTablesMetadata_t;
97
98 /** ZSTD_fseCTablesMetadata_t :
99 * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
100 * fse tables in fseTablesBuffer.
101 * fseTablesSize refers to the size of fse tables in bytes.
102 * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
103 typedef struct {
104 symbolEncodingType_e llType;
105 symbolEncodingType_e ofType;
106 symbolEncodingType_e mlType;
107 BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
108 size_t fseTablesSize;
109 size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
110 } ZSTD_fseCTablesMetadata_t;
111
112 typedef struct {
113 ZSTD_hufCTablesMetadata_t hufMetadata;
114 ZSTD_fseCTablesMetadata_t fseMetadata;
115 } ZSTD_entropyCTablesMetadata_t;
116
117 /** ZSTD_buildBlockEntropyStats() :
118 * Builds entropy for the block.
119 * @return : 0 on success or error code */
120 size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
121 const ZSTD_entropyCTables_t* prevEntropy,
122 ZSTD_entropyCTables_t* nextEntropy,
123 const ZSTD_CCtx_params* cctxParams,
124 ZSTD_entropyCTablesMetadata_t* entropyMetadata,
125 void* workspace, size_t wkspSize);
126
127 /*********************************
128 * Compression internals structs *
129 *********************************/
130
131 typedef struct {
132 U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */
133 U32 len; /* Raw length of match */
134 } ZSTD_match_t;
135
136 typedef struct {
137 U32 offset; /* Offset of sequence */
138 U32 litLength; /* Length of literals prior to match */
139 U32 matchLength; /* Raw length of match */
140 } rawSeq;
141
142 typedef struct {
143 rawSeq* seq; /* The start of the sequences */
144 size_t pos; /* The index in seq where reading stopped. pos <= size. */
145 size_t posInSequence; /* The position within the sequence at seq[pos] where reading
146 stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
147 size_t size; /* The number of sequences. <= capacity. */
148 size_t capacity; /* The capacity starting from `seq` pointer */
149 } rawSeqStore_t;
150
151 UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
152
153 typedef struct {
154 int price;
155 U32 off;
156 U32 mlen;
157 U32 litlen;
158 U32 rep[ZSTD_REP_NUM];
159 } ZSTD_optimal_t;
160
161 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
162
163 typedef struct {
164 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
165 unsigned* litFreq; /* table of literals statistics, of size 256 */
166 unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
167 unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
168 unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
169 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
170 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
171
172 U32 litSum; /* nb of literals */
173 U32 litLengthSum; /* nb of litLength codes */
174 U32 matchLengthSum; /* nb of matchLength codes */
175 U32 offCodeSum; /* nb of offset codes */
176 U32 litSumBasePrice; /* to compare to log2(litfreq) */
177 U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */
178 U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */
179 U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
180 ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
181 const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
182 ZSTD_paramSwitch_e literalCompressionMode;
183 } optState_t;
184
185 typedef struct {
186 ZSTD_entropyCTables_t entropy;
187 U32 rep[ZSTD_REP_NUM];
188 } ZSTD_compressedBlockState_t;
189
190 typedef struct {
191 BYTE const* nextSrc; /* next block here to continue on current prefix */
192 BYTE const* base; /* All regular indexes relative to this position */
193 BYTE const* dictBase; /* extDict indexes relative to this position */
194 U32 dictLimit; /* below that point, need extDict */
195 U32 lowLimit; /* below that point, no more valid data */
196 U32 nbOverflowCorrections; /* Number of times overflow correction has run since
197 * ZSTD_window_init(). Useful for debugging coredumps
198 * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
199 */
200 } ZSTD_window_t;
201
202 #define ZSTD_WINDOW_START_INDEX 2
203
204 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
205
206 #define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
207
208 struct ZSTD_matchState_t {
209 ZSTD_window_t window; /* State for window round buffer management */
210 U32 loadedDictEnd; /* index of end of dictionary, within context's referential.
211 * When loadedDictEnd != 0, a dictionary is in use, and still valid.
212 * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
213 * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
214 * When dict referential is copied into active context (i.e. not attached),
215 * loadedDictEnd == dictSize, since referential starts from zero.
216 */
217 U32 nextToUpdate; /* index from which to continue table update */
218 U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */
219
220 U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
221 U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */
222 U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
223
224 U32* hashTable;
225 U32* hashTable3;
226 U32* chainTable;
227
228 U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
229
230 int dedicatedDictSearch; /* Indicates whether this matchState is using the
231 * dedicated dictionary search structure.
232 */
233 optState_t opt; /* optimal parser state */
234 const ZSTD_matchState_t* dictMatchState;
235 ZSTD_compressionParameters cParams;
236 const rawSeqStore_t* ldmSeqStore;
237 };
238
239 typedef struct {
240 ZSTD_compressedBlockState_t* prevCBlock;
241 ZSTD_compressedBlockState_t* nextCBlock;
242 ZSTD_matchState_t matchState;
243 } ZSTD_blockState_t;
244
245 typedef struct {
246 U32 offset;
247 U32 checksum;
248 } ldmEntry_t;
249
250 typedef struct {
251 BYTE const* split;
252 U32 hash;
253 U32 checksum;
254 ldmEntry_t* bucket;
255 } ldmMatchCandidate_t;
256
257 #define LDM_BATCH_SIZE 64
258
259 typedef struct {
260 ZSTD_window_t window; /* State for the window round buffer management */
261 ldmEntry_t* hashTable;
262 U32 loadedDictEnd;
263 BYTE* bucketOffsets; /* Next position in bucket to insert entry */
264 size_t splitIndices[LDM_BATCH_SIZE];
265 ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
266 } ldmState_t;
267
268 typedef struct {
269 ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
270 U32 hashLog; /* Log size of hashTable */
271 U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
272 U32 minMatchLength; /* Minimum match length */
273 U32 hashRateLog; /* Log number of entries to skip */
274 U32 windowLog; /* Window log for the LDM */
275 } ldmParams_t;
276
277 typedef struct {
278 int collectSequences;
279 ZSTD_Sequence* seqStart;
280 size_t seqIndex;
281 size_t maxSequences;
282 } SeqCollector;
283
284 struct ZSTD_CCtx_params_s {
285 ZSTD_format_e format;
286 ZSTD_compressionParameters cParams;
287 ZSTD_frameParameters fParams;
288
289 int compressionLevel;
290 int forceWindow; /* force back-references to respect limit of
291 * 1<<wLog, even for dictionary */
292 size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
293 * No target when targetCBlockSize == 0.
294 * There is no guarantee on compressed block size */
295 int srcSizeHint; /* User's best guess of source size.
296 * Hint is not valid when srcSizeHint == 0.
297 * There is no guarantee that hint is close to actual source size */
298
299 ZSTD_dictAttachPref_e attachDictPref;
300 ZSTD_paramSwitch_e literalCompressionMode;
301
302 /* Multithreading: used to pass parameters to mtctx */
303 int nbWorkers;
304 size_t jobSize;
305 int overlapLog;
306 int rsyncable;
307
308 /* Long distance matching parameters */
309 ldmParams_t ldmParams;
310
311 /* Dedicated dict search algorithm trigger */
312 int enableDedicatedDictSearch;
313
314 /* Input/output buffer modes */
315 ZSTD_bufferMode_e inBufferMode;
316 ZSTD_bufferMode_e outBufferMode;
317
318 /* Sequence compression API */
319 ZSTD_sequenceFormat_e blockDelimiters;
320 int validateSequences;
321
322 /* Block splitting */
323 ZSTD_paramSwitch_e useBlockSplitter;
324
325 /* Param for deciding whether to use row-based matchfinder */
326 ZSTD_paramSwitch_e useRowMatchFinder;
327
328 /* Always load a dictionary in ext-dict mode (not prefix mode)? */
329 int deterministicRefPrefix;
330
331 /* Internal use, for createCCtxParams() and freeCCtxParams() only */
332 ZSTD_customMem customMem;
333 }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
334
335 #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
336 #define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
337
338 /**
339 * Indicates whether this compression proceeds directly from user-provided
340 * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
341 * whether the context needs to buffer the input/output (ZSTDb_buffered).
342 */
343 typedef enum {
344 ZSTDb_not_buffered,
345 ZSTDb_buffered
346 } ZSTD_buffered_policy_e;
347
348 /**
349 * Struct that contains all elements of block splitter that should be allocated
350 * in a wksp.
351 */
352 #define ZSTD_MAX_NB_BLOCK_SPLITS 196
353 typedef struct {
354 seqStore_t fullSeqStoreChunk;
355 seqStore_t firstHalfSeqStore;
356 seqStore_t secondHalfSeqStore;
357 seqStore_t currSeqStore;
358 seqStore_t nextSeqStore;
359
360 U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
361 ZSTD_entropyCTablesMetadata_t entropyMetadata;
362 } ZSTD_blockSplitCtx;
363
364 struct ZSTD_CCtx_s {
365 ZSTD_compressionStage_e stage;
366 int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
367 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
368 ZSTD_CCtx_params requestedParams;
369 ZSTD_CCtx_params appliedParams;
370 ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
371 U32 dictID;
372 size_t dictContentSize;
373
374 ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
375 size_t blockSize;
376 unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
377 unsigned long long consumedSrcSize;
378 unsigned long long producedCSize;
379 XXH64_state_t xxhState;
380 ZSTD_customMem customMem;
381 ZSTD_threadPool* pool;
382 size_t staticSize;
383 SeqCollector seqCollector;
384 int isFirstBlock;
385 int initialized;
386
387 seqStore_t seqStore; /* sequences storage ptrs */
388 ldmState_t ldmState; /* long distance matching state */
389 rawSeq* ldmSequences; /* Storage for the ldm output sequences */
390 size_t maxNbLdmSequences;
391 rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
392 ZSTD_blockState_t blockState;
393 U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
394
395 /* Whether we are streaming or not */
396 ZSTD_buffered_policy_e bufferedPolicy;
397
398 /* streaming */
399 char* inBuff;
400 size_t inBuffSize;
401 size_t inToCompress;
402 size_t inBuffPos;
403 size_t inBuffTarget;
404 char* outBuff;
405 size_t outBuffSize;
406 size_t outBuffContentSize;
407 size_t outBuffFlushedSize;
408 ZSTD_cStreamStage streamStage;
409 U32 frameEnded;
410
411 /* Stable in/out buffer verification */
412 ZSTD_inBuffer expectedInBuffer;
413 size_t expectedOutBufferSize;
414
415 /* Dictionary */
416 ZSTD_localDict localDict;
417 const ZSTD_CDict* cdict;
418 ZSTD_prefixDict prefixDict; /* single-usage dictionary */
419
420 /* Multi-threading */
421 #ifdef ZSTD_MULTITHREAD
422 ZSTDMT_CCtx* mtctx;
423 #endif
424
425 /* Tracing */
426 #if ZSTD_TRACE
427 ZSTD_TraceCtx traceCtx;
428 #endif
429
430 /* Workspace for block splitter */
431 ZSTD_blockSplitCtx blockSplitCtx;
432 };
433
434 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
435
436 typedef enum {
437 ZSTD_noDict = 0,
438 ZSTD_extDict = 1,
439 ZSTD_dictMatchState = 2,
440 ZSTD_dedicatedDictSearch = 3
441 } ZSTD_dictMode_e;
442
443 typedef enum {
444 ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict.
445 * In this mode we use both the srcSize and the dictSize
446 * when selecting and adjusting parameters.
447 */
448 ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
449 * In this mode we only take the srcSize into account when selecting
450 * and adjusting parameters.
451 */
452 ZSTD_cpm_createCDict = 2, /* Creating a CDict.
453 * In this mode we take both the source size and the dictionary size
454 * into account when selecting and adjusting the parameters.
455 */
456 ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
457 * We don't know what these parameters are for. We default to the legacy
458 * behavior of taking both the source size and the dict size into account
459 * when selecting and adjusting parameters.
460 */
461 } ZSTD_cParamMode_e;
462
463 typedef size_t (*ZSTD_blockCompressor) (
464 ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
465 void const* src, size_t srcSize);
466 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
467
468
ZSTD_LLcode(U32 litLength)469 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
470 {
471 static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
472 8, 9, 10, 11, 12, 13, 14, 15,
473 16, 16, 17, 17, 18, 18, 19, 19,
474 20, 20, 20, 20, 21, 21, 21, 21,
475 22, 22, 22, 22, 22, 22, 22, 22,
476 23, 23, 23, 23, 23, 23, 23, 23,
477 24, 24, 24, 24, 24, 24, 24, 24,
478 24, 24, 24, 24, 24, 24, 24, 24 };
479 static const U32 LL_deltaCode = 19;
480 return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
481 }
482
483 /* ZSTD_MLcode() :
484 * note : mlBase = matchLength - MINMATCH;
485 * because it's the format it's stored in seqStore->sequences */
ZSTD_MLcode(U32 mlBase)486 MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
487 {
488 static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
489 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
490 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
491 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
492 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
493 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
494 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
495 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
496 static const U32 ML_deltaCode = 36;
497 return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
498 }
499
500 typedef struct repcodes_s {
501 U32 rep[3];
502 } repcodes_t;
503
ZSTD_updateRep(U32 const rep[3],U32 const offset,U32 const ll0)504 MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
505 {
506 repcodes_t newReps;
507 if (offset >= ZSTD_REP_NUM) { /* full offset */
508 newReps.rep[2] = rep[1];
509 newReps.rep[1] = rep[0];
510 newReps.rep[0] = offset - ZSTD_REP_MOVE;
511 } else { /* repcode */
512 U32 const repCode = offset + ll0;
513 if (repCode > 0) { /* note : if repCode==0, no change */
514 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
515 newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
516 newReps.rep[1] = rep[0];
517 newReps.rep[0] = currentOffset;
518 } else { /* repCode == 0 */
519 ZSTD_memcpy(&newReps, rep, sizeof(newReps));
520 }
521 }
522 return newReps;
523 }
524
525 /* ZSTD_cParam_withinBounds:
526 * @return 1 if value is within cParam bounds,
527 * 0 otherwise */
ZSTD_cParam_withinBounds(ZSTD_cParameter cParam,int value)528 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
529 {
530 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
531 if (ZSTD_isError(bounds.error)) return 0;
532 if (value < bounds.lowerBound) return 0;
533 if (value > bounds.upperBound) return 0;
534 return 1;
535 }
536
537 /* ZSTD_noCompressBlock() :
538 * Writes uncompressed block to dst buffer from given src.
539 * Returns the size of the block */
ZSTD_noCompressBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize,U32 lastBlock)540 MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
541 {
542 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
543 RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
544 dstSize_tooSmall, "dst buf too small for uncompressed block");
545 MEM_writeLE24(dst, cBlockHeader24);
546 ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
547 return ZSTD_blockHeaderSize + srcSize;
548 }
549
ZSTD_rleCompressBlock(void * dst,size_t dstCapacity,BYTE src,size_t srcSize,U32 lastBlock)550 MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
551 {
552 BYTE* const op = (BYTE*)dst;
553 U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
554 RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
555 MEM_writeLE24(op, cBlockHeader);
556 op[3] = src;
557 return 4;
558 }
559
560
561 /* ZSTD_minGain() :
562 * minimum compression required
563 * to generate a compress block or a compressed literals section.
564 * note : use same formula for both situations */
ZSTD_minGain(size_t srcSize,ZSTD_strategy strat)565 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
566 {
567 U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
568 ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
569 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
570 return (srcSize >> minlog) + 2;
571 }
572
ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params * cctxParams)573 MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
574 {
575 switch (cctxParams->literalCompressionMode) {
576 case ZSTD_ps_enable:
577 return 0;
578 case ZSTD_ps_disable:
579 return 1;
580 default:
581 assert(0 /* impossible: pre-validated */);
582 ZSTD_FALLTHROUGH;
583 case ZSTD_ps_auto:
584 return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
585 }
586 }
587
588 /*! ZSTD_safecopyLiterals() :
589 * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
590 * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
591 * large copies.
592 */
ZSTD_safecopyLiterals(BYTE * op,BYTE const * ip,BYTE const * const iend,BYTE const * ilimit_w)593 static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
594 assert(iend > ilimit_w);
595 if (ip <= ilimit_w) {
596 ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
597 op += ilimit_w - ip;
598 ip = ilimit_w;
599 }
600 while (ip < iend) *op++ = *ip++;
601 }
602
603 /*! ZSTD_storeSeq() :
604 * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
605 * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
606 * `mlBase` : matchLength - MINMATCH
607 * Allowed to overread literals up to litLimit.
608 */
609 HINT_INLINE UNUSED_ATTR
ZSTD_storeSeq(seqStore_t * seqStorePtr,size_t litLength,const BYTE * literals,const BYTE * litLimit,U32 offCode,size_t mlBase)610 void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
611 {
612 BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
613 BYTE const* const litEnd = literals + litLength;
614 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
615 static const BYTE* g_start = NULL;
616 if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
617 { U32 const pos = (U32)((const BYTE*)literals - g_start);
618 DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
619 pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
620 }
621 #endif
622 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
623 /* copy Literals */
624 assert(seqStorePtr->maxNbLit <= 128 KB);
625 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
626 assert(literals + litLength <= litLimit);
627 if (litEnd <= litLimit_w) {
628 /* Common case we can use wildcopy.
629 * First copy 16 bytes, because literals are likely short.
630 */
631 assert(WILDCOPY_OVERLENGTH >= 16);
632 ZSTD_copy16(seqStorePtr->lit, literals);
633 if (litLength > 16) {
634 ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
635 }
636 } else {
637 ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
638 }
639 seqStorePtr->lit += litLength;
640
641 /* literal Length */
642 if (litLength>0xFFFF) {
643 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
644 seqStorePtr->longLengthType = ZSTD_llt_literalLength;
645 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
646 }
647 seqStorePtr->sequences[0].litLength = (U16)litLength;
648
649 /* match offset */
650 seqStorePtr->sequences[0].offset = offCode + 1;
651
652 /* match Length */
653 if (mlBase>0xFFFF) {
654 assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
655 seqStorePtr->longLengthType = ZSTD_llt_matchLength;
656 seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
657 }
658 seqStorePtr->sequences[0].matchLength = (U16)mlBase;
659
660 seqStorePtr->sequences++;
661 }
662
663
664 /*-*************************************
665 * Match length counter
666 ***************************************/
ZSTD_NbCommonBytes(size_t val)667 static unsigned ZSTD_NbCommonBytes (size_t val)
668 {
669 if (MEM_isLittleEndian()) {
670 if (MEM_64bits()) {
671 # if defined(_MSC_VER) && defined(_WIN64)
672 # if STATIC_BMI2
673 return _tzcnt_u64(val) >> 3;
674 # else
675 if (val != 0) {
676 unsigned long r;
677 _BitScanForward64(&r, (U64)val);
678 return (unsigned)(r >> 3);
679 } else {
680 /* Should not reach this code path */
681 __assume(0);
682 }
683 # endif
684 # elif defined(__GNUC__) && (__GNUC__ >= 4)
685 return (__builtin_ctzll((U64)val) >> 3);
686 # else
687 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
688 0, 3, 1, 3, 1, 4, 2, 7,
689 0, 2, 3, 6, 1, 5, 3, 5,
690 1, 3, 4, 4, 2, 5, 6, 7,
691 7, 0, 1, 2, 3, 3, 4, 6,
692 2, 6, 5, 5, 3, 4, 5, 6,
693 7, 1, 2, 4, 6, 4, 4, 5,
694 7, 2, 6, 5, 7, 6, 7, 7 };
695 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
696 # endif
697 } else { /* 32 bits */
698 # if defined(_MSC_VER)
699 if (val != 0) {
700 unsigned long r;
701 _BitScanForward(&r, (U32)val);
702 return (unsigned)(r >> 3);
703 } else {
704 /* Should not reach this code path */
705 __assume(0);
706 }
707 # elif defined(__GNUC__) && (__GNUC__ >= 3)
708 return (__builtin_ctz((U32)val) >> 3);
709 # else
710 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
711 3, 2, 2, 1, 3, 2, 0, 1,
712 3, 3, 1, 2, 2, 2, 2, 0,
713 3, 1, 2, 0, 1, 0, 1, 1 };
714 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
715 # endif
716 }
717 } else { /* Big Endian CPU */
718 if (MEM_64bits()) {
719 # if defined(_MSC_VER) && defined(_WIN64)
720 # if STATIC_BMI2
721 return _lzcnt_u64(val) >> 3;
722 # else
723 if (val != 0) {
724 unsigned long r;
725 _BitScanReverse64(&r, (U64)val);
726 return (unsigned)(r >> 3);
727 } else {
728 /* Should not reach this code path */
729 __assume(0);
730 }
731 # endif
732 # elif defined(__GNUC__) && (__GNUC__ >= 4)
733 return (__builtin_clzll(val) >> 3);
734 # else
735 unsigned r;
736 const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
737 if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
738 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
739 r += (!val);
740 return r;
741 # endif
742 } else { /* 32 bits */
743 # if defined(_MSC_VER)
744 if (val != 0) {
745 unsigned long r;
746 _BitScanReverse(&r, (unsigned long)val);
747 return (unsigned)(r >> 3);
748 } else {
749 /* Should not reach this code path */
750 __assume(0);
751 }
752 # elif defined(__GNUC__) && (__GNUC__ >= 3)
753 return (__builtin_clz((U32)val) >> 3);
754 # else
755 unsigned r;
756 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
757 r += (!val);
758 return r;
759 # endif
760 } }
761 }
762
763
ZSTD_count(const BYTE * pIn,const BYTE * pMatch,const BYTE * const pInLimit)764 MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
765 {
766 const BYTE* const pStart = pIn;
767 const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
768
769 if (pIn < pInLoopLimit) {
770 { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
771 if (diff) return ZSTD_NbCommonBytes(diff); }
772 pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
773 while (pIn < pInLoopLimit) {
774 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
775 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
776 pIn += ZSTD_NbCommonBytes(diff);
777 return (size_t)(pIn - pStart);
778 } }
779 if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
780 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
781 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
782 return (size_t)(pIn - pStart);
783 }
784
785 /** ZSTD_count_2segments() :
786 * can count match length with `ip` & `match` in 2 different segments.
787 * convention : on reaching mEnd, match count continue starting from iStart
788 */
789 MEM_STATIC size_t
ZSTD_count_2segments(const BYTE * ip,const BYTE * match,const BYTE * iEnd,const BYTE * mEnd,const BYTE * iStart)790 ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
791 const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
792 {
793 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
794 size_t const matchLength = ZSTD_count(ip, match, vEnd);
795 if (match + matchLength != mEnd) return matchLength;
796 DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
797 DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
798 DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
799 DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
800 DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
801 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
802 }
803
804
805 /*-*************************************
806 * Hashes
807 ***************************************/
808 static const U32 prime3bytes = 506832829U;
ZSTD_hash3(U32 u,U32 h)809 static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
ZSTD_hash3Ptr(const void * ptr,U32 h)810 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
811
812 static const U32 prime4bytes = 2654435761U;
ZSTD_hash4(U32 u,U32 h)813 static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
ZSTD_hash4Ptr(const void * ptr,U32 h)814 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
815
816 static const U64 prime5bytes = 889523592379ULL;
ZSTD_hash5(U64 u,U32 h)817 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
ZSTD_hash5Ptr(const void * p,U32 h)818 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
819
820 static const U64 prime6bytes = 227718039650203ULL;
ZSTD_hash6(U64 u,U32 h)821 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
ZSTD_hash6Ptr(const void * p,U32 h)822 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
823
824 static const U64 prime7bytes = 58295818150454627ULL;
ZSTD_hash7(U64 u,U32 h)825 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
ZSTD_hash7Ptr(const void * p,U32 h)826 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
827
828 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
ZSTD_hash8(U64 u,U32 h)829 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
ZSTD_hash8Ptr(const void * p,U32 h)830 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
831
832 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_hashPtr(const void * p,U32 hBits,U32 mls)833 size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
834 {
835 switch(mls)
836 {
837 default:
838 case 4: return ZSTD_hash4Ptr(p, hBits);
839 case 5: return ZSTD_hash5Ptr(p, hBits);
840 case 6: return ZSTD_hash6Ptr(p, hBits);
841 case 7: return ZSTD_hash7Ptr(p, hBits);
842 case 8: return ZSTD_hash8Ptr(p, hBits);
843 }
844 }
845
846 /** ZSTD_ipow() :
847 * Return base^exponent.
848 */
ZSTD_ipow(U64 base,U64 exponent)849 static U64 ZSTD_ipow(U64 base, U64 exponent)
850 {
851 U64 power = 1;
852 while (exponent) {
853 if (exponent & 1) power *= base;
854 exponent >>= 1;
855 base *= base;
856 }
857 return power;
858 }
859
860 #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
861
862 /** ZSTD_rollingHash_append() :
863 * Add the buffer to the hash value.
864 */
ZSTD_rollingHash_append(U64 hash,void const * buf,size_t size)865 static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
866 {
867 BYTE const* istart = (BYTE const*)buf;
868 size_t pos;
869 for (pos = 0; pos < size; ++pos) {
870 hash *= prime8bytes;
871 hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
872 }
873 return hash;
874 }
875
876 /** ZSTD_rollingHash_compute() :
877 * Compute the rolling hash value of the buffer.
878 */
ZSTD_rollingHash_compute(void const * buf,size_t size)879 MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
880 {
881 return ZSTD_rollingHash_append(0, buf, size);
882 }
883
884 /** ZSTD_rollingHash_primePower() :
885 * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
886 * over a window of length bytes.
887 */
ZSTD_rollingHash_primePower(U32 length)888 MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
889 {
890 return ZSTD_ipow(prime8bytes, length - 1);
891 }
892
893 /** ZSTD_rollingHash_rotate() :
894 * Rotate the rolling hash by one byte.
895 */
ZSTD_rollingHash_rotate(U64 hash,BYTE toRemove,BYTE toAdd,U64 primePower)896 MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
897 {
898 hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
899 hash *= prime8bytes;
900 hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
901 return hash;
902 }
903
904 /*-*************************************
905 * Round buffer management
906 ***************************************/
907 #if (ZSTD_WINDOWLOG_MAX_64 > 31)
908 # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
909 #endif
910 /* Max current allowed */
911 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
912 /* Maximum chunk size before overflow correction needs to be called again */
913 #define ZSTD_CHUNKSIZE_MAX \
914 ( ((U32)-1) /* Maximum ending current index */ \
915 - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */
916
917 /**
918 * ZSTD_window_clear():
919 * Clears the window containing the history by simply setting it to empty.
920 */
ZSTD_window_clear(ZSTD_window_t * window)921 MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
922 {
923 size_t const endT = (size_t)(window->nextSrc - window->base);
924 U32 const end = (U32)endT;
925
926 window->lowLimit = end;
927 window->dictLimit = end;
928 }
929
ZSTD_window_isEmpty(ZSTD_window_t const window)930 MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
931 {
932 return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
933 window.lowLimit == ZSTD_WINDOW_START_INDEX &&
934 (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
935 }
936
937 /**
938 * ZSTD_window_hasExtDict():
939 * Returns non-zero if the window has a non-empty extDict.
940 */
ZSTD_window_hasExtDict(ZSTD_window_t const window)941 MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
942 {
943 return window.lowLimit < window.dictLimit;
944 }
945
946 /**
947 * ZSTD_matchState_dictMode():
948 * Inspects the provided matchState and figures out what dictMode should be
949 * passed to the compressor.
950 */
ZSTD_matchState_dictMode(const ZSTD_matchState_t * ms)951 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
952 {
953 return ZSTD_window_hasExtDict(ms->window) ?
954 ZSTD_extDict :
955 ms->dictMatchState != NULL ?
956 (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
957 ZSTD_noDict;
958 }
959
960 /* Defining this macro to non-zero tells zstd to run the overflow correction
961 * code much more frequently. This is very inefficient, and should only be
962 * used for tests and fuzzers.
963 */
964 #ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
965 # ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
966 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
967 # else
968 # define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
969 # endif
970 #endif
971
972 /**
973 * ZSTD_window_canOverflowCorrect():
974 * Returns non-zero if the indices are large enough for overflow correction
975 * to work correctly without impacting compression ratio.
976 */
ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src)977 MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
978 U32 cycleLog,
979 U32 maxDist,
980 U32 loadedDictEnd,
981 void const* src)
982 {
983 U32 const cycleSize = 1u << cycleLog;
984 U32 const curr = (U32)((BYTE const*)src - window.base);
985 U32 const minIndexToOverflowCorrect = cycleSize
986 + MAX(maxDist, cycleSize)
987 + ZSTD_WINDOW_START_INDEX;
988
989 /* Adjust the min index to backoff the overflow correction frequency,
990 * so we don't waste too much CPU in overflow correction. If this
991 * computation overflows we don't really care, we just need to make
992 * sure it is at least minIndexToOverflowCorrect.
993 */
994 U32 const adjustment = window.nbOverflowCorrections + 1;
995 U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
996 minIndexToOverflowCorrect);
997 U32 const indexLargeEnough = curr > adjustedIndex;
998
999 /* Only overflow correct early if the dictionary is invalidated already,
1000 * so we don't hurt compression ratio.
1001 */
1002 U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
1003
1004 return indexLargeEnough && dictionaryInvalidated;
1005 }
1006
1007 /**
1008 * ZSTD_window_needOverflowCorrection():
1009 * Returns non-zero if the indices are getting too large and need overflow
1010 * protection.
1011 */
ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,U32 cycleLog,U32 maxDist,U32 loadedDictEnd,void const * src,void const * srcEnd)1012 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
1013 U32 cycleLog,
1014 U32 maxDist,
1015 U32 loadedDictEnd,
1016 void const* src,
1017 void const* srcEnd)
1018 {
1019 U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
1020 if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1021 if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
1022 return 1;
1023 }
1024 }
1025 return curr > ZSTD_CURRENT_MAX;
1026 }
1027
1028 /**
1029 * ZSTD_window_correctOverflow():
1030 * Reduces the indices to protect from index overflow.
1031 * Returns the correction made to the indices, which must be applied to every
1032 * stored index.
1033 *
1034 * The least significant cycleLog bits of the indices must remain the same,
1035 * which may be 0. Every index up to maxDist in the past must be valid.
1036 */
ZSTD_window_correctOverflow(ZSTD_window_t * window,U32 cycleLog,U32 maxDist,void const * src)1037 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1038 U32 maxDist, void const* src)
1039 {
1040 /* preemptive overflow correction:
1041 * 1. correction is large enough:
1042 * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
1043 * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
1044 *
1045 * current - newCurrent
1046 * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1047 * > (3<<29) - (1<<chainLog)
1048 * > (3<<29) - (1<<30) (NOTE: chainLog <= 30)
1049 * > 1<<29
1050 *
1051 * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1052 * After correction, current is less than (1<<chainLog + 1<<windowLog).
1053 * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1054 * In 32-bit mode we are safe, because (chainLog <= 29), so
1055 * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1056 * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1057 * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1058 */
1059 U32 const cycleSize = 1u << cycleLog;
1060 U32 const cycleMask = cycleSize - 1;
1061 U32 const curr = (U32)((BYTE const*)src - window->base);
1062 U32 const currentCycle = curr & cycleMask;
1063 /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
1064 U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1065 ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1066 : 0;
1067 U32 const newCurrent = currentCycle
1068 + currentCycleCorrection
1069 + MAX(maxDist, cycleSize);
1070 U32 const correction = curr - newCurrent;
1071 /* maxDist must be a power of two so that:
1072 * (newCurrent & cycleMask) == (curr & cycleMask)
1073 * This is required to not corrupt the chains / binary tree.
1074 */
1075 assert((maxDist & (maxDist - 1)) == 0);
1076 assert((curr & cycleMask) == (newCurrent & cycleMask));
1077 assert(curr > newCurrent);
1078 if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1079 /* Loose bound, should be around 1<<29 (see above) */
1080 assert(correction > 1<<28);
1081 }
1082
1083 window->base += correction;
1084 window->dictBase += correction;
1085 if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1086 window->lowLimit = ZSTD_WINDOW_START_INDEX;
1087 } else {
1088 window->lowLimit -= correction;
1089 }
1090 if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1091 window->dictLimit = ZSTD_WINDOW_START_INDEX;
1092 } else {
1093 window->dictLimit -= correction;
1094 }
1095
1096 /* Ensure we can still reference the full window. */
1097 assert(newCurrent >= maxDist);
1098 assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1099 /* Ensure that lowLimit and dictLimit didn't underflow. */
1100 assert(window->lowLimit <= newCurrent);
1101 assert(window->dictLimit <= newCurrent);
1102
1103 ++window->nbOverflowCorrections;
1104
1105 DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1106 window->lowLimit);
1107 return correction;
1108 }
1109
1110 /**
1111 * ZSTD_window_enforceMaxDist():
1112 * Updates lowLimit so that:
1113 * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1114 *
1115 * It ensures index is valid as long as index >= lowLimit.
1116 * This must be called before a block compression call.
1117 *
1118 * loadedDictEnd is only defined if a dictionary is in use for current compression.
1119 * As the name implies, loadedDictEnd represents the index at end of dictionary.
1120 * The value lies within context's referential, it can be directly compared to blockEndIdx.
1121 *
1122 * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1123 * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1124 * This is because dictionaries are allowed to be referenced fully
1125 * as long as the last byte of the dictionary is in the window.
1126 * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1127 *
1128 * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1129 * In dictMatchState mode, lowLimit and dictLimit are the same,
1130 * and the dictionary is below them.
1131 * forceWindow and dictMatchState are therefore incompatible.
1132 */
1133 MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1134 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1135 const void* blockEnd,
1136 U32 maxDist,
1137 U32* loadedDictEndPtr,
1138 const ZSTD_matchState_t** dictMatchStatePtr)
1139 {
1140 U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1141 U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
1142 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1143 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1144
1145 /* - When there is no dictionary : loadedDictEnd == 0.
1146 In which case, the test (blockEndIdx > maxDist) is merely to avoid
1147 overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1148 - When there is a standard dictionary :
1149 Index referential is copied from the dictionary,
1150 which means it starts from 0.
1151 In which case, loadedDictEnd == dictSize,
1152 and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1153 since `blockEndIdx` also starts from zero.
1154 - When there is an attached dictionary :
1155 loadedDictEnd is expressed within the referential of the context,
1156 so it can be directly compared against blockEndIdx.
1157 */
1158 if (blockEndIdx > maxDist + loadedDictEnd) {
1159 U32 const newLowLimit = blockEndIdx - maxDist;
1160 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1161 if (window->dictLimit < window->lowLimit) {
1162 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
1163 (unsigned)window->dictLimit, (unsigned)window->lowLimit);
1164 window->dictLimit = window->lowLimit;
1165 }
1166 /* On reaching window size, dictionaries are invalidated */
1167 if (loadedDictEndPtr) *loadedDictEndPtr = 0;
1168 if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1169 }
1170 }
1171
1172 /* Similar to ZSTD_window_enforceMaxDist(),
1173 * but only invalidates dictionary
1174 * when input progresses beyond window size.
1175 * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1176 * loadedDictEnd uses same referential as window->base
1177 * maxDist is the window size */
1178 MEM_STATIC void
ZSTD_checkDictValidity(const ZSTD_window_t * window,const void * blockEnd,U32 maxDist,U32 * loadedDictEndPtr,const ZSTD_matchState_t ** dictMatchStatePtr)1179 ZSTD_checkDictValidity(const ZSTD_window_t* window,
1180 const void* blockEnd,
1181 U32 maxDist,
1182 U32* loadedDictEndPtr,
1183 const ZSTD_matchState_t** dictMatchStatePtr)
1184 {
1185 assert(loadedDictEndPtr != NULL);
1186 assert(dictMatchStatePtr != NULL);
1187 { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1188 U32 const loadedDictEnd = *loadedDictEndPtr;
1189 DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1190 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1191 assert(blockEndIdx >= loadedDictEnd);
1192
1193 if (blockEndIdx > loadedDictEnd + maxDist) {
1194 /* On reaching window size, dictionaries are invalidated.
1195 * For simplification, if window size is reached anywhere within next block,
1196 * the dictionary is invalidated for the full block.
1197 */
1198 DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
1199 *loadedDictEndPtr = 0;
1200 *dictMatchStatePtr = NULL;
1201 } else {
1202 if (*loadedDictEndPtr != 0) {
1203 DEBUGLOG(6, "dictionary considered valid for current block");
1204 } } }
1205 }
1206
ZSTD_window_init(ZSTD_window_t * window)1207 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1208 ZSTD_memset(window, 0, sizeof(*window));
1209 window->base = (BYTE const*)" ";
1210 window->dictBase = (BYTE const*)" ";
1211 ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
1212 window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */
1213 window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */
1214 window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */
1215 window->nbOverflowCorrections = 0;
1216 }
1217
1218 /**
1219 * ZSTD_window_update():
1220 * Updates the window by appending [src, src + srcSize) to the window.
1221 * If it is not contiguous, the current prefix becomes the extDict, and we
1222 * forget about the extDict. Handles overlap of the prefix and extDict.
1223 * Returns non-zero if the segment is contiguous.
1224 */
ZSTD_window_update(ZSTD_window_t * window,void const * src,size_t srcSize,int forceNonContiguous)1225 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
1226 void const* src, size_t srcSize,
1227 int forceNonContiguous)
1228 {
1229 BYTE const* const ip = (BYTE const*)src;
1230 U32 contiguous = 1;
1231 DEBUGLOG(5, "ZSTD_window_update");
1232 if (srcSize == 0)
1233 return contiguous;
1234 assert(window->base != NULL);
1235 assert(window->dictBase != NULL);
1236 /* Check if blocks follow each other */
1237 if (src != window->nextSrc || forceNonContiguous) {
1238 /* not contiguous */
1239 size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1240 DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1241 window->lowLimit = window->dictLimit;
1242 assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
1243 window->dictLimit = (U32)distanceFromBase;
1244 window->dictBase = window->base;
1245 window->base = ip - distanceFromBase;
1246 /* ms->nextToUpdate = window->dictLimit; */
1247 if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
1248 contiguous = 0;
1249 }
1250 window->nextSrc = ip + srcSize;
1251 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
1252 if ( (ip+srcSize > window->dictBase + window->lowLimit)
1253 & (ip < window->dictBase + window->dictLimit)) {
1254 ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
1255 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1256 window->lowLimit = lowLimitMax;
1257 DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1258 }
1259 return contiguous;
1260 }
1261
1262 /**
1263 * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1264 */
ZSTD_getLowestMatchIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1265 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1266 {
1267 U32 const maxDistance = 1U << windowLog;
1268 U32 const lowestValid = ms->window.lowLimit;
1269 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1270 U32 const isDictionary = (ms->loadedDictEnd != 0);
1271 /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
1272 * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1273 * valid for the entire block. So this check is sufficient to find the lowest valid match index.
1274 */
1275 U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1276 return matchLowest;
1277 }
1278
1279 /**
1280 * Returns the lowest allowed match index in the prefix.
1281 */
ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t * ms,U32 curr,unsigned windowLog)1282 MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
1283 {
1284 U32 const maxDistance = 1U << windowLog;
1285 U32 const lowestValid = ms->window.dictLimit;
1286 U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1287 U32 const isDictionary = (ms->loadedDictEnd != 0);
1288 /* When computing the lowest prefix index we need to take the dictionary into account to handle
1289 * the edge case where the dictionary and the source are contiguous in memory.
1290 */
1291 U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1292 return matchLowest;
1293 }
1294
1295
1296
1297 /* debug functions */
1298 #if (DEBUGLEVEL>=2)
1299
ZSTD_fWeight(U32 rawStat)1300 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1301 {
1302 U32 const fp_accuracy = 8;
1303 U32 const fp_multiplier = (1 << fp_accuracy);
1304 U32 const newStat = rawStat + 1;
1305 U32 const hb = ZSTD_highbit32(newStat);
1306 U32 const BWeight = hb * fp_multiplier;
1307 U32 const FWeight = (newStat << fp_accuracy) >> hb;
1308 U32 const weight = BWeight + FWeight;
1309 assert(hb + fp_accuracy < 31);
1310 return (double)weight / fp_multiplier;
1311 }
1312
1313 /* display a table content,
1314 * listing each element, its frequency, and its predicted bit cost */
ZSTD_debugTable(const U32 * table,U32 max)1315 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1316 {
1317 unsigned u, sum;
1318 for (u=0, sum=0; u<=max; u++) sum += table[u];
1319 DEBUGLOG(2, "total nb elts: %u", sum);
1320 for (u=0; u<=max; u++) {
1321 DEBUGLOG(2, "%2u: %5u (%.2f)",
1322 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1323 }
1324 }
1325
1326 #endif
1327
1328
1329 #if defined (__cplusplus)
1330 }
1331 #endif
1332
1333 /* ===============================================================
1334 * Shared internal declarations
1335 * These prototypes may be called from sources not in lib/compress
1336 * =============================================================== */
1337
1338 /* ZSTD_loadCEntropy() :
1339 * dict : must point at beginning of a valid zstd dictionary.
1340 * return : size of dictionary header (size of magic number + dict ID + entropy tables)
1341 * assumptions : magic number supposed already checked
1342 * and dictSize >= 8 */
1343 size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1344 const void* const dict, size_t dictSize);
1345
1346 void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1347
1348 /* ==============================================================
1349 * Private declarations
1350 * These prototypes shall only be called from within lib/compress
1351 * ============================================================== */
1352
1353 /* ZSTD_getCParamsFromCCtxParams() :
1354 * cParams are built depending on compressionLevel, src size hints,
1355 * LDM and manually set compression parameters.
1356 * Note: srcSizeHint == 0 means 0!
1357 */
1358 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1359 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
1360
1361 /*! ZSTD_initCStream_internal() :
1362 * Private use only. Init streaming operation.
1363 * expects params to be valid.
1364 * must receive dict, or cdict, or none, but not both.
1365 * @return : 0, or an error code */
1366 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1367 const void* dict, size_t dictSize,
1368 const ZSTD_CDict* cdict,
1369 const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1370
1371 void ZSTD_resetSeqStore(seqStore_t* ssPtr);
1372
1373 /*! ZSTD_getCParamsFromCDict() :
1374 * as the name implies */
1375 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1376
1377 /* ZSTD_compressBegin_advanced_internal() :
1378 * Private use only. To be called from zstdmt_compress.c. */
1379 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1380 const void* dict, size_t dictSize,
1381 ZSTD_dictContentType_e dictContentType,
1382 ZSTD_dictTableLoadMethod_e dtlm,
1383 const ZSTD_CDict* cdict,
1384 const ZSTD_CCtx_params* params,
1385 unsigned long long pledgedSrcSize);
1386
1387 /* ZSTD_compress_advanced_internal() :
1388 * Private use only. To be called from zstdmt_compress.c. */
1389 size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1390 void* dst, size_t dstCapacity,
1391 const void* src, size_t srcSize,
1392 const void* dict,size_t dictSize,
1393 const ZSTD_CCtx_params* params);
1394
1395
1396 /* ZSTD_writeLastEmptyBlock() :
1397 * output an empty Block with end-of-frame mark to complete a frame
1398 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1399 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1400 */
1401 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1402
1403
1404 /* ZSTD_referenceExternalSequences() :
1405 * Must be called before starting a compression operation.
1406 * seqs must parse a prefix of the source.
1407 * This cannot be used when long range matching is enabled.
1408 * Zstd will use these sequences, and pass the literals to a secondary block
1409 * compressor.
1410 * @return : An error code on failure.
1411 * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1412 * access and data corruption.
1413 */
1414 size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1415
1416 /** ZSTD_cycleLog() :
1417 * condition for correct operation : hashLog > 1 */
1418 U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1419
1420 /** ZSTD_CCtx_trace() :
1421 * Trace the end of a compression call.
1422 */
1423 void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1424
1425 #endif /* ZSTD_COMPRESS_H */
1426