• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /**
12  * This fuzz target performs a zstd round-trip test by generating an arbitrary
13  * array of sequences, generating the associated source buffer, calling
14  * ZSTD_compressSequences(), and then decompresses and compares the result with
15  * the original generated source buffer.
16  */
17 
18 #define ZSTD_STATIC_LINKING_ONLY
19 #include "zstd_errors.h"
20 
21 #include <stddef.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <time.h>
26 #include "fuzz_helpers.h"
27 #include "zstd_helpers.h"
28 #include "fuzz_data_producer.h"
29 #include "fuzz_third_party_seq_prod.h"
30 
31 static ZSTD_CCtx* cctx = NULL;
32 static ZSTD_DCtx* dctx = NULL;
33 static void* literalsBuffer = NULL;
34 static void* generatedSrc = NULL;
35 static ZSTD_Sequence* generatedSequences = NULL;
36 
37 static void* dictBuffer = NULL;
38 static ZSTD_CDict* cdict = NULL;
39 static ZSTD_DDict* ddict = NULL;
40 
41 #define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */
42 #define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 20) /* Fixed size 1MB literals buffer */
43 #define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */
44 #define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << ZSTD_WINDOWLOG_MAX_32) /* Allow up to 1 << ZSTD_WINDOWLOG_MAX_32 dictionary */
45 #define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */
46 
47 /* Deterministic random number generator */
48 #define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
FUZZ_RDG_rand(uint32_t * src)49 static uint32_t FUZZ_RDG_rand(uint32_t* src)
50 {
51     static const uint32_t prime1 = 2654435761U;
52     static const uint32_t prime2 = 2246822519U;
53     uint32_t rand32 = *src;
54     rand32 *= prime1;
55     rand32 ^= prime2;
56     rand32  = FUZZ_RDG_rotl32(rand32, 13);
57     *src = rand32;
58     return rand32 >> 5;
59 }
60 
61 /* Make a pseudorandom string - this simple function exists to avoid
62  * taking a dependency on datagen.h to have RDG_genBuffer().
63  */
generatePseudoRandomString(char * str,size_t size,FUZZ_dataProducer_t * producer)64 static char* generatePseudoRandomString(char* str, size_t size, FUZZ_dataProducer_t* producer) {
65     const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
66     uint32_t seed = FUZZ_dataProducer_uint32(producer);
67     if (size) {
68         for (size_t n = 0; n < size; n++) {
69             int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
70             str[n] = charset[key];
71         }
72     }
73     return str;
74 }
75 
76 /* Returns size of source buffer */
decodeSequences(void * dst,size_t nbSequences,size_t literalsSize,const void * dict,size_t dictSize,ZSTD_SequenceFormat_e mode)77 static size_t decodeSequences(void* dst, size_t nbSequences,
78                               size_t literalsSize,
79                               const void* dict, size_t dictSize,
80                               ZSTD_SequenceFormat_e mode)
81 {
82     const uint8_t* litPtr = literalsBuffer;
83     const uint8_t* const litBegin = literalsBuffer;
84     const uint8_t* const litEnd = litBegin + literalsSize;
85     const uint8_t* dictPtr = dict;
86     uint8_t* op = dst;
87     const uint8_t* const oend = (uint8_t*)dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
88     size_t generatedSrcBufferSize = 0;
89     size_t bytesWritten = 0;
90 
91     for (size_t i = 0; i < nbSequences; ++i) {
92         /* block boundary */
93         if (generatedSequences[i].offset == 0)
94             FUZZ_ASSERT(generatedSequences[i].matchLength == 0);
95 
96         if (litPtr + generatedSequences[i].litLength > litEnd) {
97             litPtr = litBegin;
98         }
99         memcpy(op, litPtr, generatedSequences[i].litLength);
100         bytesWritten += generatedSequences[i].litLength;
101         op += generatedSequences[i].litLength;
102         litPtr += generatedSequences[i].litLength;
103 
104         /* Copy over the match */
105         {   size_t matchLength = generatedSequences[i].matchLength;
106             size_t j = 0;
107             size_t k = 0;
108             if (dictSize != 0) {
109                 if (generatedSequences[i].offset > bytesWritten) { /* Offset goes into the dictionary */
110                     size_t dictOffset = generatedSequences[i].offset - bytesWritten;
111                     size_t matchInDict = MIN(matchLength, dictOffset);
112                     for (; k < matchInDict; ++k) {
113                         op[k] = dictPtr[dictSize - dictOffset + k];
114                     }
115                     matchLength -= matchInDict;
116                     op += matchInDict;
117                 }
118             }
119             for (; j < matchLength; ++j) {
120                 op[j] = op[(ptrdiff_t)(j - generatedSequences[i].offset)];
121             }
122             op += j;
123             FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
124             bytesWritten += generatedSequences[i].matchLength;
125         }
126     }
127     generatedSrcBufferSize = bytesWritten;
128     FUZZ_ASSERT(litPtr <= litEnd);
129     if (mode == ZSTD_sf_noBlockDelimiters) {
130         const uint32_t lastLLSize = (uint32_t)(litEnd - litPtr);
131         if (lastLLSize <= (uint32_t)(oend - op)) {
132             memcpy(op, litPtr, lastLLSize);
133             generatedSrcBufferSize += lastLLSize;
134     }   }
135     return generatedSrcBufferSize;
136 }
137 
138 /* Returns nb sequences generated
139  * Note : random sequences are always valid in ZSTD_sf_noBlockDelimiters mode.
140  * However, it can fail with ZSTD_sf_explicitBlockDelimiters,
141  * due to potential lack of space in
142  */
generateRandomSequences(FUZZ_dataProducer_t * producer,size_t literalsSizeLimit,size_t dictSize,size_t windowLog,ZSTD_SequenceFormat_e mode)143 static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
144                                       size_t literalsSizeLimit, size_t dictSize,
145                                       size_t windowLog, ZSTD_SequenceFormat_e mode)
146 {
147     const uint32_t repCode = 0;  /* not used by sequence ingestion api */
148     size_t windowSize = 1ULL << windowLog;
149     size_t blockSizeMax = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
150     uint32_t matchLengthMax = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
151     uint32_t bytesGenerated = 0;
152     uint32_t nbSeqGenerated = 0;
153     uint32_t isFirstSequence = 1;
154     uint32_t blockSize = 0;
155 
156     if (mode == ZSTD_sf_explicitBlockDelimiters) {
157         /* ensure that no sequence can be larger than one block */
158         literalsSizeLimit = MIN(literalsSizeLimit, blockSizeMax/2);
159         matchLengthMax = MIN(matchLengthMax, (uint32_t)blockSizeMax/2);
160     }
161 
162     while ( nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ - 3 /* extra room for explicit delimiters */
163          && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE
164          && !FUZZ_dataProducer_empty(producer)) {
165         uint32_t matchLength;
166         uint32_t matchBound = matchLengthMax;
167         uint32_t offset;
168         uint32_t offsetBound;
169         const uint32_t minLitLength = (isFirstSequence && (dictSize == 0));
170         const uint32_t litLength = FUZZ_dataProducer_uint32Range(producer, minLitLength, (uint32_t)literalsSizeLimit);
171         bytesGenerated += litLength;
172         if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
173             break;
174         }
175         offsetBound = (bytesGenerated > windowSize) ? (uint32_t)windowSize : bytesGenerated + (uint32_t)dictSize;
176         offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound);
177         if (dictSize > 0 && bytesGenerated <= windowSize) {
178             /* Prevent match length from being such that it would be associated with an offset too large
179              * from the decoder's perspective. If not possible (match would be too small),
180              * then reduce the offset if necessary.
181              */
182             const size_t bytesToReachWindowSize = windowSize - bytesGenerated;
183             if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) {
184                 const uint32_t newOffsetBound = offsetBound > windowSize ? (uint32_t)windowSize : offsetBound;
185                 offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound);
186             } else {
187                 matchBound = MIN(matchLengthMax, (uint32_t)bytesToReachWindowSize);
188             }
189         }
190         matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound);
191         bytesGenerated += matchLength;
192         if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
193             break;
194         }
195         {   ZSTD_Sequence seq = {offset, litLength, matchLength, repCode};
196             const uint32_t lastLits = FUZZ_dataProducer_uint32Range(producer, 0, litLength);
197             #define SPLITPROB 6000
198             #define SPLITMARK 5234
199             const int split = (FUZZ_dataProducer_uint32Range(producer, 0, SPLITPROB) == SPLITMARK);
200             if (mode == ZSTD_sf_explicitBlockDelimiters) {
201                 const size_t seqSize = seq.litLength + seq.matchLength;
202                 if (blockSize + seqSize > blockSizeMax) {  /* reaching limit : must end block now */
203                     const ZSTD_Sequence endBlock = {0, 0, 0, 0};
204                     generatedSequences[nbSeqGenerated++] = endBlock;
205                     blockSize = (uint32_t)seqSize;
206                 }
207                 if (split) {
208                     const ZSTD_Sequence endBlock = {0, lastLits, 0, 0};
209                     generatedSequences[nbSeqGenerated++] = endBlock;
210                     assert(lastLits <= seq.litLength);
211                     seq.litLength -= lastLits;
212                     blockSize = (uint32_t)(seqSize - lastLits);
213                 } else {
214                     blockSize += seqSize;
215                 }
216             }
217             generatedSequences[nbSeqGenerated++] = seq;
218             isFirstSequence = 0;
219         }
220     }
221 
222     if (mode == ZSTD_sf_explicitBlockDelimiters) {
223         /* always end sequences with a block delimiter */
224         const ZSTD_Sequence endBlock = {0, 0, 0, 0};
225         assert(nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ);
226         generatedSequences[nbSeqGenerated++] = endBlock;
227     }
228     return nbSeqGenerated;
229 }
230 
231 static size_t
transferLiterals(void * dst,size_t dstCapacity,const ZSTD_Sequence * seqs,size_t nbSeqs,const void * src,size_t srcSize)232 transferLiterals(void* dst, size_t dstCapacity, const ZSTD_Sequence* seqs, size_t nbSeqs, const void* src, size_t srcSize)
233 {
234     size_t n;
235     char* op = dst;
236     char* const oend = op + dstCapacity;
237     const char* ip = src;
238     const char* const iend = ip + srcSize;
239     for (n=0; n<nbSeqs; n++) {
240         size_t litLen = seqs[n].litLength;
241         size_t mlen = seqs[n].matchLength;
242         assert(op + litLen < oend); (void)oend;
243         assert(ip + litLen + mlen <= iend); (void)iend;
244         memcpy(op, ip, litLen);
245         op += litLen;
246         ip += litLen + mlen;
247     }
248     assert(oend - op >= 8);
249     return (size_t)(op - (char*)dst);
250 }
251 
roundTripTest_compressSequencesAndLiterals(void * result,size_t resultCapacity,void * compressed,size_t compressedCapacity,const void * src,size_t srcSize,const ZSTD_Sequence * seqs,size_t nbSeqs)252 static size_t roundTripTest_compressSequencesAndLiterals(
253                     void* result, size_t resultCapacity,
254                     void* compressed, size_t compressedCapacity,
255                     const void* src, size_t srcSize,
256                     const ZSTD_Sequence* seqs, size_t nbSeqs)
257 {
258     size_t const litCapacity = srcSize + 8;
259     void* literals = malloc(litCapacity);
260     size_t cSize, litSize;
261 
262     assert(literals);
263     litSize = transferLiterals(literals, litCapacity, seqs, nbSeqs, src, srcSize);
264 
265     cSize = ZSTD_compressSequencesAndLiterals(cctx,
266                                 compressed, compressedCapacity,
267                                    seqs, nbSeqs,
268                                    literals, litSize, litCapacity, srcSize);
269     free(literals);
270     if (ZSTD_getErrorCode(cSize) == ZSTD_error_cannotProduce_uncompressedBlock) {
271         /* Valid scenario : ZSTD_compressSequencesAndLiterals cannot generate uncompressed blocks */
272         return 0;
273     }
274     if (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall) {
275         /* Valid scenario : in explicit delimiter mode,
276          * it might be possible for the compressed size to outgrow dstCapacity.
277          * In which case, it's still a valid fuzzer scenario,
278          * but no roundtrip shall be possible */
279         return 0;
280     }
281 
282     /* round-trip */
283     FUZZ_ZASSERT(cSize);
284     {   size_t const dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
285         FUZZ_ZASSERT(dSize);
286         FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");
287         FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!");
288         return dSize;
289     }
290 }
291 
roundTripTest(void * result,size_t resultCapacity,void * compressed,size_t compressedCapacity,const void * src,size_t srcSize,const ZSTD_Sequence * seqs,size_t nbSeqs,unsigned hasDict,ZSTD_SequenceFormat_e mode)292 static size_t roundTripTest(void* result, size_t resultCapacity,
293                             void* compressed, size_t compressedCapacity,
294                             const void* src, size_t srcSize,
295                             const ZSTD_Sequence* seqs, size_t nbSeqs,
296                             unsigned hasDict,
297                             ZSTD_SequenceFormat_e mode)
298 {
299     size_t cSize;
300     size_t dSize;
301 
302     if (hasDict) {
303         FUZZ_ZASSERT(ZSTD_CCtx_refCDict(cctx, cdict));
304         FUZZ_ZASSERT(ZSTD_DCtx_refDDict(dctx, ddict));
305     }
306 
307     {   int blockMode, validation;
308         /* compressSequencesAndLiterals() only supports explicitBlockDelimiters and no validation */
309         FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_blockDelimiters, &blockMode));
310         FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_validateSequences, &validation));
311         if ((blockMode == ZSTD_sf_explicitBlockDelimiters) && (!validation)) {
312             FUZZ_ZASSERT(roundTripTest_compressSequencesAndLiterals(result, resultCapacity, compressed, compressedCapacity, src, srcSize, seqs, nbSeqs));
313         }
314     }
315 
316     cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity,
317                                    seqs, nbSeqs,
318                                    src, srcSize);
319     if ( (ZSTD_getErrorCode(cSize) == ZSTD_error_dstSize_tooSmall)
320       && (mode == ZSTD_sf_explicitBlockDelimiters) ) {
321         /* Valid scenario : in explicit delimiter mode,
322          * it might be possible for the compressed size to outgrow dstCapacity.
323          * In which case, it's still a valid fuzzer scenario,
324          * but no roundtrip shall be possible */
325         return 0;
326     }
327     /* round-trip */
328     FUZZ_ZASSERT(cSize);
329     dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
330     FUZZ_ZASSERT(dSize);
331     FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");
332     FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, result, srcSize), "Corruption!");
333     return dSize;
334 }
335 
LLVMFuzzerTestOneInput(const uint8_t * src,size_t size)336 int LLVMFuzzerTestOneInput(const uint8_t* src, size_t size)
337 {
338     FUZZ_SEQ_PROD_SETUP();
339 
340     void* rBuf;
341     size_t rBufSize;
342     void* cBuf;
343     size_t cBufSize;
344     size_t generatedSrcSize;
345     size_t nbSequences;
346     size_t dictSize = 0;
347     unsigned hasDict;
348     unsigned wLog;
349     int cLevel;
350     ZSTD_SequenceFormat_e mode;
351 
352     FUZZ_dataProducer_t* const producer = FUZZ_dataProducer_create(src, size);
353     FUZZ_ASSERT(producer);
354 
355     if (!cctx) {
356         cctx = ZSTD_createCCtx();
357         FUZZ_ASSERT(cctx);
358     }
359     if (!dctx) {
360         dctx = ZSTD_createDCtx();
361         FUZZ_ASSERT(dctx);
362     }
363 
364     /* Generate window log first so we don't generate offsets too large */
365     wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
366     cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
367     mode = (ZSTD_SequenceFormat_e)FUZZ_dataProducer_int32Range(producer, 0, 1);
368 
369     ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
370     ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
371     ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
372     ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, (int)wLog);
373     ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
374     ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
375     ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, (int)mode);
376     ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach);
377 
378     if (!literalsBuffer) {
379         literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
380         FUZZ_ASSERT(literalsBuffer);
381         literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, producer);
382     }
383 
384     if (!dictBuffer) { /* Generate global dictionary buffer */
385         ZSTD_compressionParameters cParams;
386 
387         /* Generate a large dictionary buffer */
388         dictBuffer = calloc(ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, 1);
389         FUZZ_ASSERT(dictBuffer);
390 
391         /* Create global cdict and ddict */
392         cParams = ZSTD_getCParams(1, ZSTD_FUZZ_GENERATED_SRC_MAXSIZE, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
393         cParams.minMatch = ZSTD_MINMATCH_MIN;
394         cParams.hashLog = ZSTD_HASHLOG_MIN;
395         cParams.chainLog = ZSTD_CHAINLOG_MIN;
396 
397         cdict = ZSTD_createCDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem);
398         ddict = ZSTD_createDDict_advanced(dictBuffer, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem);
399         FUZZ_ASSERT(cdict);
400         FUZZ_ASSERT(ddict);
401     }
402 
403     FUZZ_ASSERT(cdict);
404     FUZZ_ASSERT(ddict);
405 
406     hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
407     if (hasDict) {
408         dictSize = ZSTD_FUZZ_GENERATED_DICT_MAXSIZE;
409     }
410 
411     if (!generatedSequences) {
412         generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
413     }
414     if (!generatedSrc) {
415         generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
416     }
417 
418     nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog, mode);
419     generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize, mode);
420 
421     /* Note : in explicit block delimiters mode,
422      * the fuzzer might generate a lot of small blocks.
423      * In which case, the final compressed size might be > ZSTD_compressBound().
424      * This is still a valid scenario fuzzer though, which makes it possible to check under-sized dstCapacity.
425      * The test just doesn't roundtrip. */
426     cBufSize = ZSTD_compressBound(generatedSrcSize);
427     cBuf = FUZZ_malloc(cBufSize);
428 
429     rBufSize = generatedSrcSize;
430     rBuf = FUZZ_malloc(rBufSize);
431 
432     {   const size_t result = roundTripTest(rBuf, rBufSize,
433                                         cBuf, cBufSize,
434                                         generatedSrc, generatedSrcSize,
435                                         generatedSequences, nbSequences,
436                                         hasDict, mode);
437         FUZZ_ASSERT(result <= generatedSrcSize);  /* can be 0 when no round-trip */
438     }
439 
440     free(rBuf);
441     free(cBuf);
442     FUZZ_dataProducer_free(producer);
443 #ifndef STATEFUL_FUZZING
444     ZSTD_freeCCtx(cctx); cctx = NULL;
445     ZSTD_freeDCtx(dctx); dctx = NULL;
446     free(generatedSequences); generatedSequences = NULL;
447     free(generatedSrc); generatedSrc = NULL;
448     free(literalsBuffer); literalsBuffer = NULL;
449 #endif
450     FUZZ_SEQ_PROD_TEARDOWN();
451     return 0;
452 }
453