1 /*
2 * Copyright (c) Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 #ifndef ZSTD_CCOMMON_H_MODULE
12 #define ZSTD_CCOMMON_H_MODULE
13
14 /* this module contains definitions which must be identical
15 * across compression, decompression and dictBuilder.
16 * It also contains a few functions useful to at least 2 of them
17 * and which benefit from being inlined */
18
19 /*-*************************************
20 * Dependencies
21 ***************************************/
22 #include "compiler.h"
23 #include "cpu.h"
24 #include "mem.h"
25 #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
26 #include "error_private.h"
27 #define ZSTD_STATIC_LINKING_ONLY
28 #include "../zstd.h"
29 #define FSE_STATIC_LINKING_ONLY
30 #include "fse.h"
31 #define HUF_STATIC_LINKING_ONLY
32 #include "huf.h"
33 #ifndef XXH_STATIC_LINKING_ONLY
34 # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
35 #endif
36 #include "xxhash.h" /* XXH_reset, update, digest */
37 #ifndef ZSTD_NO_TRACE
38 # include "zstd_trace.h"
39 #else
40 # define ZSTD_TRACE 0
41 #endif
42
43 #if defined (__cplusplus)
44 extern "C" {
45 #endif
46
47 /* ---- static assert (debug) --- */
48 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
49 #define ZSTD_isError ERR_isError /* for inlining */
50 #define FSE_isError ERR_isError
51 #define HUF_isError ERR_isError
52
53
54 /*-*************************************
55 * shared macros
56 ***************************************/
57 #undef MIN
58 #undef MAX
59 #define MIN(a,b) ((a)<(b) ? (a) : (b))
60 #define MAX(a,b) ((a)>(b) ? (a) : (b))
61 #define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
62
63
64 /*-*************************************
65 * Common constants
66 ***************************************/
67 #define ZSTD_OPT_NUM (1<<12)
68
69 #define ZSTD_REP_NUM 3 /* number of repcodes */
70 static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
71
72 #define KB *(1 <<10)
73 #define MB *(1 <<20)
74 #define GB *(1U<<30)
75
76 #define BIT7 128
77 #define BIT6 64
78 #define BIT5 32
79 #define BIT4 16
80 #define BIT1 2
81 #define BIT0 1
82
83 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
84 static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
85 static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
86
87 #define ZSTD_FRAMEIDSIZE 4 /* magic number size */
88
89 #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
90 static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
91 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
92
93 #define ZSTD_FRAMECHECKSUMSIZE 4
94
95 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
96 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
97
98 #define HufLog 12
99 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
100
101 #define LONGNBSEQ 0x7F00
102
103 #define MINMATCH 3
104
105 #define Litbits 8
106 #define MaxLit ((1<<Litbits) - 1)
107 #define MaxML 52
108 #define MaxLL 35
109 #define DefaultMaxOff 28
110 #define MaxOff 31
111 #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
112 #define MLFSELog 9
113 #define LLFSELog 9
114 #define OffFSELog 8
115 #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
116
117 #define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
118 /* Each table cannot take more than #symbols * FSELog bits */
119 #define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
120
121 static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
122 0, 0, 0, 0, 0, 0, 0, 0,
123 0, 0, 0, 0, 0, 0, 0, 0,
124 1, 1, 1, 1, 2, 2, 3, 3,
125 4, 6, 7, 8, 9,10,11,12,
126 13,14,15,16
127 };
128 static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
129 4, 3, 2, 2, 2, 2, 2, 2,
130 2, 2, 2, 2, 2, 1, 1, 1,
131 2, 2, 2, 2, 2, 2, 2, 2,
132 2, 3, 2, 1, 1, 1, 1, 1,
133 -1,-1,-1,-1
134 };
135 #define LL_DEFAULTNORMLOG 6 /* for static allocation */
136 static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
137
138 static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
139 0, 0, 0, 0, 0, 0, 0, 0,
140 0, 0, 0, 0, 0, 0, 0, 0,
141 0, 0, 0, 0, 0, 0, 0, 0,
142 0, 0, 0, 0, 0, 0, 0, 0,
143 1, 1, 1, 1, 2, 2, 3, 3,
144 4, 4, 5, 7, 8, 9,10,11,
145 12,13,14,15,16
146 };
147 static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
148 1, 4, 3, 2, 2, 2, 2, 2,
149 2, 1, 1, 1, 1, 1, 1, 1,
150 1, 1, 1, 1, 1, 1, 1, 1,
151 1, 1, 1, 1, 1, 1, 1, 1,
152 1, 1, 1, 1, 1, 1, 1, 1,
153 1, 1, 1, 1, 1, 1,-1,-1,
154 -1,-1,-1,-1,-1
155 };
156 #define ML_DEFAULTNORMLOG 6 /* for static allocation */
157 static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
158
159 static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
160 1, 1, 1, 1, 1, 1, 2, 2,
161 2, 1, 1, 1, 1, 1, 1, 1,
162 1, 1, 1, 1, 1, 1, 1, 1,
163 -1,-1,-1,-1,-1
164 };
165 #define OF_DEFAULTNORMLOG 5 /* for static allocation */
166 static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
167
168
169 /*-*******************************************
170 * Shared functions to include for inlining
171 *********************************************/
ZSTD_copy8(void * dst,const void * src)172 static void ZSTD_copy8(void* dst, const void* src) {
173 #if defined(ZSTD_ARCH_ARM_NEON)
174 vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
175 #else
176 ZSTD_memcpy(dst, src, 8);
177 #endif
178 }
179 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
180
181 /* Need to use memmove here since the literal buffer can now be located within
182 the dst buffer. In circumstances where the op "catches up" to where the
183 literal buffer is, there can be partial overlaps in this call on the final
184 copy if the literal is being shifted by less than 16 bytes. */
ZSTD_copy16(void * dst,const void * src)185 static void ZSTD_copy16(void* dst, const void* src) {
186 #if defined(ZSTD_ARCH_ARM_NEON)
187 vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
188 #elif defined(ZSTD_ARCH_X86_SSE2)
189 _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
190 #elif defined(__clang__)
191 ZSTD_memmove(dst, src, 16);
192 #else
193 /* ZSTD_memmove is not inlined properly by gcc */
194 BYTE copy16_buf[16];
195 ZSTD_memcpy(copy16_buf, src, 16);
196 ZSTD_memcpy(dst, copy16_buf, 16);
197 #endif
198 }
199 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
200
201 #define WILDCOPY_OVERLENGTH 32
202 #define WILDCOPY_VECLEN 16
203
204 typedef enum {
205 ZSTD_no_overlap,
206 ZSTD_overlap_src_before_dst
207 /* ZSTD_overlap_dst_before_src, */
208 } ZSTD_overlap_e;
209
210 /*! ZSTD_wildcopy() :
211 * Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
212 * @param ovtype controls the overlap detection
213 * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
214 * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
215 * The src buffer must be before the dst buffer.
216 */
217 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_wildcopy(void * dst,const void * src,ptrdiff_t length,ZSTD_overlap_e const ovtype)218 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
219 {
220 ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
221 const BYTE* ip = (const BYTE*)src;
222 BYTE* op = (BYTE*)dst;
223 BYTE* const oend = op + length;
224
225 if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
226 /* Handle short offset copies. */
227 do {
228 COPY8(op, ip)
229 } while (op < oend);
230 } else {
231 assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
232 /* Separate out the first COPY16() call because the copy length is
233 * almost certain to be short, so the branches have different
234 * probabilities. Since it is almost certain to be short, only do
235 * one COPY16() in the first call. Then, do two calls per loop since
236 * at that point it is more likely to have a high trip count.
237 */
238 #ifdef __aarch64__
239 do {
240 COPY16(op, ip);
241 }
242 while (op < oend);
243 #else
244 ZSTD_copy16(op, ip);
245 if (16 >= length) return;
246 op += 16;
247 ip += 16;
248 do {
249 COPY16(op, ip);
250 COPY16(op, ip);
251 }
252 while (op < oend);
253 #endif
254 }
255 }
256
ZSTD_limitCopy(void * dst,size_t dstCapacity,const void * src,size_t srcSize)257 MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
258 {
259 size_t const length = MIN(dstCapacity, srcSize);
260 if (length > 0) {
261 ZSTD_memcpy(dst, src, length);
262 }
263 return length;
264 }
265
266 /* define "workspace is too large" as this number of times larger than needed */
267 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3
268
269 /* when workspace is continuously too large
270 * during at least this number of times,
271 * context's memory usage is considered wasteful,
272 * because it's sized to handle a worst case scenario which rarely happens.
273 * In which case, resize it down to free some memory */
274 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
275
276 /* Controls whether the input/output buffer is buffered or stable. */
277 typedef enum {
278 ZSTD_bm_buffered = 0, /* Buffer the input/output */
279 ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
280 } ZSTD_bufferMode_e;
281
282
283 /*-*******************************************
284 * Private declarations
285 *********************************************/
286 typedef struct seqDef_s {
287 U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
288 U16 litLength;
289 U16 mlBase; /* mlBase == matchLength - MINMATCH */
290 } seqDef;
291
292 /* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
293 typedef enum {
294 ZSTD_llt_none = 0, /* no longLengthType */
295 ZSTD_llt_literalLength = 1, /* represents a long literal */
296 ZSTD_llt_matchLength = 2 /* represents a long match */
297 } ZSTD_longLengthType_e;
298
299 typedef struct {
300 seqDef* sequencesStart;
301 seqDef* sequences; /* ptr to end of sequences */
302 BYTE* litStart;
303 BYTE* lit; /* ptr to end of literals */
304 BYTE* llCode;
305 BYTE* mlCode;
306 BYTE* ofCode;
307 size_t maxNbSeq;
308 size_t maxNbLit;
309
310 /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
311 * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
312 * the existing value of the litLength or matchLength by 0x10000.
313 */
314 ZSTD_longLengthType_e longLengthType;
315 U32 longLengthPos; /* Index of the sequence to apply long length modification to */
316 } seqStore_t;
317
318 typedef struct {
319 U32 litLength;
320 U32 matchLength;
321 } ZSTD_sequenceLength;
322
323 /**
324 * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
325 * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
326 */
ZSTD_getSequenceLength(seqStore_t const * seqStore,seqDef const * seq)327 MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
328 {
329 ZSTD_sequenceLength seqLen;
330 seqLen.litLength = seq->litLength;
331 seqLen.matchLength = seq->mlBase + MINMATCH;
332 if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
333 if (seqStore->longLengthType == ZSTD_llt_literalLength) {
334 seqLen.litLength += 0xFFFF;
335 }
336 if (seqStore->longLengthType == ZSTD_llt_matchLength) {
337 seqLen.matchLength += 0xFFFF;
338 }
339 }
340 return seqLen;
341 }
342
343 /**
344 * Contains the compressed frame size and an upper-bound for the decompressed frame size.
345 * Note: before using `compressedSize`, check for errors using ZSTD_isError().
346 * similarly, before using `decompressedBound`, check for errors using:
347 * `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
348 */
349 typedef struct {
350 size_t compressedSize;
351 unsigned long long decompressedBound;
352 } ZSTD_frameSizeInfo; /* decompress & legacy */
353
354 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
355 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
356
357 /* custom memory allocation functions */
358 void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
359 void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
360 void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
361
362
ZSTD_highbit32(U32 val)363 MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
364 {
365 assert(val != 0);
366 {
367 # if defined(_MSC_VER) /* Visual */
368 # if STATIC_BMI2 == 1
369 return _lzcnt_u32(val)^31;
370 # else
371 if (val != 0) {
372 unsigned long r;
373 _BitScanReverse(&r, val);
374 return (unsigned)r;
375 } else {
376 /* Should not reach this code path */
377 __assume(0);
378 }
379 # endif
380 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
381 return __builtin_clz (val) ^ 31;
382 # elif defined(__ICCARM__) /* IAR Intrinsic */
383 return 31 - __CLZ(val);
384 # else /* Software version */
385 static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
386 U32 v = val;
387 v |= v >> 1;
388 v |= v >> 2;
389 v |= v >> 4;
390 v |= v >> 8;
391 v |= v >> 16;
392 return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
393 # endif
394 }
395 }
396
397 /**
398 * Counts the number of trailing zeros of a `size_t`.
399 * Most compilers should support CTZ as a builtin. A backup
400 * implementation is provided if the builtin isn't supported, but
401 * it may not be terribly efficient.
402 */
ZSTD_countTrailingZeros(size_t val)403 MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
404 {
405 if (MEM_64bits()) {
406 # if defined(_MSC_VER) && defined(_WIN64)
407 # if STATIC_BMI2
408 return _tzcnt_u64(val);
409 # else
410 if (val != 0) {
411 unsigned long r;
412 _BitScanForward64(&r, (U64)val);
413 return (unsigned)r;
414 } else {
415 /* Should not reach this code path */
416 __assume(0);
417 }
418 # endif
419 # elif defined(__GNUC__) && (__GNUC__ >= 4)
420 return __builtin_ctzll((U64)val);
421 # else
422 static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19,
423 4, 25, 14, 28, 9, 34, 20, 56,
424 5, 17, 26, 54, 15, 41, 29, 43,
425 10, 31, 38, 35, 21, 45, 49, 57,
426 63, 6, 12, 18, 24, 27, 33, 55,
427 16, 53, 40, 42, 30, 37, 44, 48,
428 62, 11, 23, 32, 52, 39, 36, 47,
429 61, 22, 51, 46, 60, 50, 59, 58 };
430 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
431 # endif
432 } else { /* 32 bits */
433 # if defined(_MSC_VER)
434 if (val != 0) {
435 unsigned long r;
436 _BitScanForward(&r, (U32)val);
437 return (unsigned)r;
438 } else {
439 /* Should not reach this code path */
440 __assume(0);
441 }
442 # elif defined(__GNUC__) && (__GNUC__ >= 3)
443 return __builtin_ctz((U32)val);
444 # else
445 static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3,
446 30, 22, 20, 15, 25, 17, 4, 8,
447 31, 27, 13, 23, 21, 19, 16, 7,
448 26, 12, 18, 6, 11, 5, 10, 9 };
449 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
450 # endif
451 }
452 }
453
454
455 /* ZSTD_invalidateRepCodes() :
456 * ensures next compression will not use repcodes from previous block.
457 * Note : only works with regular variant;
458 * do not use with extDict variant ! */
459 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
460
461
462 typedef struct {
463 blockType_e blockType;
464 U32 lastBlock;
465 U32 origSize;
466 } blockProperties_t; /* declared here for decompress and fullbench */
467
468 /*! ZSTD_getcBlockSize() :
469 * Provides the size of compressed block from block header `src` */
470 /* Used by: decompress, fullbench (does not get its definition from here) */
471 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
472 blockProperties_t* bpPtr);
473
474 /*! ZSTD_decodeSeqHeaders() :
475 * decode sequence header from src */
476 /* Used by: decompress, fullbench (does not get its definition from here) */
477 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
478 const void* src, size_t srcSize);
479
480 /**
481 * @returns true iff the CPU supports dynamic BMI2 dispatch.
482 */
ZSTD_cpuSupportsBmi2(void)483 MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
484 {
485 ZSTD_cpuid_t cpuid = ZSTD_cpuid();
486 return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
487 }
488
489 #if defined (__cplusplus)
490 }
491 #endif
492
493 #endif /* ZSTD_CCOMMON_H_MODULE */
494