1 /*
2     bench.c - Demo program to benchmark open-source compression algorithms
3     Copyright (C) Yann Collet 2012-2020
4 
5     GPL v2 License
6 
7     This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation; either version 2 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License along
18     with this program; if not, write to the Free Software Foundation, Inc.,
19     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 
21     You can contact the author at :
22     - LZ4 homepage : http://www.lz4.org
23     - LZ4 source repository : https://github.com/lz4/lz4
24 */
25 
26 
27 /*-************************************
28 *  Compiler options
29 **************************************/
30 #ifdef _MSC_VER    /* Visual Studio */
31 #  pragma warning(disable : 4127)    /* disable: C4127: conditional expression is constant */
32 #endif
33 
34 
35 /* *************************************
36 *  Includes
37 ***************************************/
38 #include "platform.h"    /* Compiler options */
39 #include "util.h"        /* UTIL_GetFileSize, UTIL_sleep */
40 #include <stdlib.h>      /* malloc, free */
41 #include <string.h>      /* memset */
42 #include <stdio.h>       /* fprintf, fopen, ftello */
43 #include <time.h>        /* clock_t, clock, CLOCKS_PER_SEC */
44 #include <assert.h>      /* assert */
45 
46 #include "lorem.h"       /* LOREM_genBuffer */
47 #include "xxhash.h"
48 #include "bench.h"
49 #include "timefn.h"
50 
51 #define LZ4_STATIC_LINKING_ONLY
52 #include "lz4.h"
53 #define LZ4_HC_STATIC_LINKING_ONLY
54 #include "lz4hc.h"
55 #include "lz4frame.h"   /* LZ4F_decompress */
56 
57 
58 /* *************************************
59 *  Constants
60 ***************************************/
61 #ifndef LZ4_GIT_COMMIT_STRING
62 #  define LZ4_GIT_COMMIT_STRING ""
63 #else
64 #  define LZ4_GIT_COMMIT_STRING LZ4_EXPAND_AND_QUOTE(LZ4_GIT_COMMIT)
65 #endif
66 
67 #define NBSECONDS             3
68 #define TIMELOOP_MICROSEC     1*1000000ULL /* 1 second */
69 #define TIMELOOP_NANOSEC      1*1000000000ULL /* 1 second */
70 #define ACTIVEPERIOD_NANOSEC 70*1000000000ULL /* 70 seconds */
71 #define COOLPERIOD_SEC        10
72 #define DECOMP_MULT           1 /* test decompression DECOMP_MULT times longer than compression */
73 
74 #define KB *(1 <<10)
75 #define MB *(1 <<20)
76 #define GB *(1U<<30)
77 
78 #define LZ4_MAX_DICT_SIZE (64 KB)
79 
80 static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
81 
82 
83 /* *************************************
84 *  console display
85 ***************************************/
86 #define DISPLAYOUT(...)      fprintf(stdout, __VA_ARGS__)
87 #define OUTLEVEL(l, ...)     if (g_displayLevel>=(l)) { DISPLAYOUT(__VA_ARGS__); }
88 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
89 #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=(l)) { DISPLAY(__VA_ARGS__); }
90 static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
91 
92 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
93             if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \
94             { g_time = clock(); DISPLAY(__VA_ARGS__); \
95             if (g_displayLevel>=4) fflush(stdout); } }
96 static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
97 static clock_t g_time = 0;
98 
99 
100 /* *************************************
101 *  DEBUG and error conditions
102 ***************************************/
103 #ifndef DEBUG
104 #  define DEBUG 0
105 #endif
106 #define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
107 #define END_PROCESS(error, ...)                                           \
108 do {                                                                      \
109     DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
110     DISPLAYLEVEL(1, "Error %i : ", error);                                \
111     DISPLAYLEVEL(1, __VA_ARGS__);                                         \
112     DISPLAYLEVEL(1, "\n");                                                \
113     exit(error);                                                          \
114 } while (0)
115 
116 #define LZ4_isError(errcode) (errcode==0)
117 
118 
119 /* *************************************
120 *  Benchmark Parameters
121 ***************************************/
122 static U32 g_nbSeconds = NBSECONDS;
123 static size_t g_blockSize = 0;
124 int g_additionalParam = 0;
125 int g_benchSeparately = 0;
126 int g_decodeOnly = 0;
127 unsigned g_skipChecksums = 0;
128 
BMK_setNotificationLevel(unsigned level)129 void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; }
130 
BMK_setAdditionalParam(int additionalParam)131 void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; }
132 
BMK_setNbSeconds(unsigned nbSeconds)133 void BMK_setNbSeconds(unsigned nbSeconds)
134 {
135     g_nbSeconds = nbSeconds;
136     DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression -\n", g_nbSeconds);
137 }
138 
BMK_setBlockSize(size_t blockSize)139 void BMK_setBlockSize(size_t blockSize) { g_blockSize = blockSize; }
140 
BMK_setBenchSeparately(int separate)141 void BMK_setBenchSeparately(int separate) { g_benchSeparately = (separate!=0); }
142 
BMK_setDecodeOnlyMode(int set)143 void BMK_setDecodeOnlyMode(int set) { g_decodeOnly = (set!=0); }
144 
BMK_skipChecksums(int skip)145 void BMK_skipChecksums(int skip) { g_skipChecksums = (skip!=0); }
146 
147 
148 /* *************************************
149  *  Compression state management
150 ***************************************/
151 
152 struct compressionParameters
153 {
154     int cLevel;
155     const char* dictBuf;
156     int dictSize;
157 
158     LZ4_stream_t* LZ4_stream;
159     LZ4_stream_t* LZ4_dictStream;
160     LZ4_streamHC_t* LZ4_streamHC;
161     LZ4_streamHC_t* LZ4_dictStreamHC;
162 
163     void (*initFunction)(
164         struct compressionParameters* pThis);
165     void (*resetFunction)(
166         const struct compressionParameters* pThis);
167     int (*blockFunction)(
168         const struct compressionParameters* pThis,
169         const char* src, char* dst, int srcSize, int dstSize);
170     void (*cleanupFunction)(
171         const struct compressionParameters* pThis);
172 };
173 
174 static void
LZ4_compressInitNoStream(struct compressionParameters * pThis)175 LZ4_compressInitNoStream(struct compressionParameters* pThis)
176 {
177     pThis->LZ4_stream = NULL;
178     pThis->LZ4_dictStream = NULL;
179     pThis->LZ4_streamHC = NULL;
180     pThis->LZ4_dictStreamHC = NULL;
181 }
182 
183 static void
LZ4_compressInitStream(struct compressionParameters * pThis)184 LZ4_compressInitStream(struct compressionParameters* pThis)
185 {
186     pThis->LZ4_stream = LZ4_createStream();
187     pThis->LZ4_dictStream = LZ4_createStream();
188     pThis->LZ4_streamHC = NULL;
189     pThis->LZ4_dictStreamHC = NULL;
190     LZ4_loadDictSlow(pThis->LZ4_dictStream, pThis->dictBuf, pThis->dictSize);
191 }
192 
193 static void
LZ4_compressInitStreamHC(struct compressionParameters * pThis)194 LZ4_compressInitStreamHC(struct compressionParameters* pThis)
195 {
196     pThis->LZ4_stream = NULL;
197     pThis->LZ4_dictStream = NULL;
198     pThis->LZ4_streamHC = LZ4_createStreamHC();
199     pThis->LZ4_dictStreamHC = LZ4_createStreamHC();
200     LZ4_resetStreamHC_fast(pThis->LZ4_dictStreamHC, pThis->cLevel);
201     LZ4_loadDictHC(pThis->LZ4_dictStreamHC, pThis->dictBuf, pThis->dictSize);
202 }
203 
204 static void
LZ4_compressResetNoStream(const struct compressionParameters * cparams)205 LZ4_compressResetNoStream(const struct compressionParameters* cparams)
206 {
207     (void)cparams;
208 }
209 
210 static void
LZ4_compressResetStream(const struct compressionParameters * cparams)211 LZ4_compressResetStream(const struct compressionParameters* cparams)
212 {
213     LZ4_resetStream_fast(cparams->LZ4_stream);
214     LZ4_attach_dictionary(cparams->LZ4_stream, cparams->LZ4_dictStream);
215 }
216 
217 static void
LZ4_compressResetStreamHC(const struct compressionParameters * cparams)218 LZ4_compressResetStreamHC(const struct compressionParameters* cparams)
219 {
220     LZ4_resetStreamHC_fast(cparams->LZ4_streamHC, cparams->cLevel);
221     LZ4_attach_HC_dictionary(cparams->LZ4_streamHC, cparams->LZ4_dictStreamHC);
222 }
223 
224 static int
LZ4_compressBlockNoStream(const struct compressionParameters * cparams,const char * src,char * dst,int srcSize,int dstSize)225 LZ4_compressBlockNoStream(const struct compressionParameters* cparams,
226                           const char* src, char* dst,
227                           int srcSize, int dstSize)
228 {
229     int const acceleration = (cparams->cLevel < 0) ? -cparams->cLevel + 1 : 1;
230     return LZ4_compress_fast(src, dst, srcSize, dstSize, acceleration);
231 }
232 
233 static int
LZ4_compressBlockNoStreamHC(const struct compressionParameters * cparams,const char * src,char * dst,int srcSize,int dstSize)234 LZ4_compressBlockNoStreamHC(const struct compressionParameters* cparams,
235                             const char* src, char* dst,
236                             int srcSize, int dstSize)
237 {
238     return LZ4_compress_HC(src, dst, srcSize, dstSize, cparams->cLevel);
239 }
240 
241 static int
LZ4_compressBlockStream(const struct compressionParameters * cparams,const char * src,char * dst,int srcSize,int dstSize)242 LZ4_compressBlockStream(const struct compressionParameters* cparams,
243                         const char* src, char* dst,
244                         int srcSize, int dstSize)
245 {
246     int const acceleration = (cparams->cLevel < 0) ? -cparams->cLevel + 1 : 1;
247     LZ4_compressResetStream(cparams);
248     return LZ4_compress_fast_continue(cparams->LZ4_stream, src, dst, srcSize, dstSize, acceleration);
249 }
250 
251 static int
LZ4_compressBlockStreamHC(const struct compressionParameters * cparams,const char * src,char * dst,int srcSize,int dstSize)252 LZ4_compressBlockStreamHC(const struct compressionParameters* cparams,
253                           const char* src, char* dst,
254                           int srcSize, int dstSize)
255 {
256     LZ4_compressResetStreamHC(cparams);
257     return LZ4_compress_HC_continue(cparams->LZ4_streamHC, src, dst, srcSize, dstSize);
258 }
259 
260 static void
LZ4_compressCleanupNoStream(const struct compressionParameters * cparams)261 LZ4_compressCleanupNoStream(const struct compressionParameters* cparams)
262 {
263     (void)cparams;
264 }
265 
266 static void
LZ4_compressCleanupStream(const struct compressionParameters * cparams)267 LZ4_compressCleanupStream(const struct compressionParameters* cparams)
268 {
269     LZ4_freeStream(cparams->LZ4_stream);
270     LZ4_freeStream(cparams->LZ4_dictStream);
271 }
272 
273 static void
LZ4_compressCleanupStreamHC(const struct compressionParameters * cparams)274 LZ4_compressCleanupStreamHC(const struct compressionParameters* cparams)
275 {
276     LZ4_freeStreamHC(cparams->LZ4_streamHC);
277     LZ4_freeStreamHC(cparams->LZ4_dictStreamHC);
278 }
279 
280 static void
LZ4_buildCompressionParameters(struct compressionParameters * pParams,int cLevel,const char * dictBuf,int dictSize)281 LZ4_buildCompressionParameters(struct compressionParameters* pParams,
282                                int cLevel,
283                          const char* dictBuf, int dictSize)
284 {
285     pParams->cLevel = cLevel;
286     pParams->dictBuf = dictBuf;
287     pParams->dictSize = dictSize;
288 
289     if (dictSize) {
290         if (cLevel < LZ4HC_CLEVEL_MIN) {
291             pParams->initFunction = LZ4_compressInitStream;
292             pParams->resetFunction = LZ4_compressResetStream;
293             pParams->blockFunction = LZ4_compressBlockStream;
294             pParams->cleanupFunction = LZ4_compressCleanupStream;
295         } else {
296             pParams->initFunction = LZ4_compressInitStreamHC;
297             pParams->resetFunction = LZ4_compressResetStreamHC;
298             pParams->blockFunction = LZ4_compressBlockStreamHC;
299             pParams->cleanupFunction = LZ4_compressCleanupStreamHC;
300         }
301     } else {
302         pParams->initFunction = LZ4_compressInitNoStream;
303         pParams->resetFunction = LZ4_compressResetNoStream;
304         pParams->cleanupFunction = LZ4_compressCleanupNoStream;
305 
306         if (cLevel < LZ4HC_CLEVEL_MIN) {
307             pParams->blockFunction = LZ4_compressBlockNoStream;
308         } else {
309             pParams->blockFunction = LZ4_compressBlockNoStreamHC;
310         }
311     }
312 }
313 
314 
315 typedef int (*DecFunction_f)(const char* src, char* dst,
316                              int srcSize, int dstCapacity,
317                              const char* dictStart, int dictSize);
318 
319 static LZ4F_dctx* g_dctx = NULL;
320 
321 static int
LZ4F_decompress_binding(const char * src,char * dst,int srcSize,int dstCapacity,const char * dictStart,int dictSize)322 LZ4F_decompress_binding(const char* src, char* dst,
323                         int srcSize, int dstCapacity,
324                   const char* dictStart, int dictSize)
325 {
326     size_t dstSize = (size_t)dstCapacity;
327     size_t readSize = (size_t)srcSize;
328     LZ4F_decompressOptions_t dOpt = { 1, 0, 0, 0 };
329     size_t decStatus;
330     dOpt.skipChecksums = g_skipChecksums;
331     decStatus = LZ4F_decompress(g_dctx,
332                     dst, &dstSize,
333                     src, &readSize,
334                     &dOpt);
335     if ( (decStatus == 0)   /* decompression successful */
336       && ((int)readSize==srcSize) /* consume all input */ )
337         return (int)dstSize;
338     /* else, error */
339     return -1;
340     (void)dictStart; (void)dictSize;  /* not compatible with dictionary yet */
341 }
342 
343 
344 /* ********************************************************
345 *  Bench functions
346 **********************************************************/
347 typedef struct {
348     const char* srcPtr;
349     size_t srcSize;
350     char*  cPtr;
351     size_t cRoom;
352     size_t cSize;
353     char*  resPtr;
354     size_t resSize;
355 } blockParam_t;
356 
357 #define MIN(a,b) ((a)<(b) ? (a) : (b))
358 #define MAX(a,b) ((a)>(b) ? (a) : (b))
359 
BMK_benchMem(const void * srcBuffer,size_t srcSize,const char * displayName,int cLevel,const size_t * fileSizes,U32 nbFiles,const char * dictBuf,int dictSize)360 static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
361                         const char* displayName, int cLevel,
362                         const size_t* fileSizes, U32 nbFiles,
363                         const char* dictBuf, int dictSize)
364 {
365     size_t const blockSize = (g_blockSize>=32 && !g_decodeOnly ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
366     U32 const maxNbBlocks = (U32)((srcSize + (blockSize-1)) / blockSize) + nbFiles;
367     blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
368     size_t const maxCompressedSize = (size_t)LZ4_compressBound((int)srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
369     void* const compressedBuffer = malloc(maxCompressedSize);
370     size_t const decMultiplier = g_decodeOnly ? 255 : 1;
371     size_t const maxInSize = (size_t)LZ4_MAX_INPUT_SIZE / decMultiplier;
372     size_t const maxDecSize = srcSize < maxInSize ? srcSize * decMultiplier : LZ4_MAX_INPUT_SIZE;
373     void* const resultBuffer = malloc(maxDecSize);
374     int benchError = 0;
375     U32 nbBlocks;
376     struct compressionParameters compP;
377 
378     /* checks */
379     if (!compressedBuffer || !resultBuffer || !blockTable)
380         END_PROCESS(31, "allocation error : not enough memory");
381 
382     if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
383 
384     /* init */
385     LZ4_buildCompressionParameters(&compP, cLevel, dictBuf, dictSize);
386     compP.initFunction(&compP);
387     if (g_dctx==NULL) {
388         LZ4F_createDecompressionContext(&g_dctx, LZ4F_VERSION);
389         if (g_dctx==NULL)
390             END_PROCESS(1, "allocation error - decompression state");
391     }
392 
393     /* Init blockTable data */
394     {   const char* srcPtr = (const char*)srcBuffer;
395         char* cPtr = (char*)compressedBuffer;
396         char* resPtr = (char*)resultBuffer;
397         U32 fileNb;
398         for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) {
399             size_t remaining = fileSizes[fileNb];
400             U32 const nbBlocksforThisFile = (U32)((remaining + (blockSize-1)) / blockSize);
401             U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
402             for ( ; nbBlocks<blockEnd; nbBlocks++) {
403                 size_t const thisBlockSize = MIN(remaining, blockSize);
404                 size_t const resMaxSize = thisBlockSize * decMultiplier;
405                 size_t const resCapa = (thisBlockSize < maxInSize) ? resMaxSize : LZ4_MAX_INPUT_SIZE;
406                 blockTable[nbBlocks].srcPtr = srcPtr;
407                 blockTable[nbBlocks].cPtr = cPtr;
408                 blockTable[nbBlocks].resPtr = resPtr;
409                 blockTable[nbBlocks].srcSize = thisBlockSize;
410                 blockTable[nbBlocks].cRoom = (size_t)LZ4_compressBound((int)thisBlockSize);
411                 srcPtr += thisBlockSize;
412                 cPtr += blockTable[nbBlocks].cRoom;
413                 resPtr += resCapa;
414                 remaining -= thisBlockSize;
415     }   }   }
416 
417     /* warming up memory */
418     memset(compressedBuffer, ' ', maxCompressedSize);
419 
420     /* decode-only mode : copy input to @compressedBuffer */
421     if (g_decodeOnly) {
422         U32 blockNb;
423         for (blockNb=0; blockNb < nbBlocks; blockNb++) {
424             memcpy(blockTable[blockNb].cPtr, blockTable[blockNb].srcPtr, blockTable[blockNb].srcSize);
425             blockTable[blockNb].cSize = blockTable[blockNb].srcSize;
426     }   }
427 
428     /* Bench */
429     {   U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
430         U64 const crcOrig = XXH64(srcBuffer, srcSize, 0);
431         TIME_t coolTime = TIME_getTime();
432         U64 const maxTime = (g_nbSeconds * TIMELOOP_NANOSEC) + 100;
433         U32 nbCompressionLoops = (U32)((5 MB) / (srcSize+1)) + 1;  /* conservative initial compression speed estimate */
434         U32 nbDecodeLoops = (U32)((200 MB) / (srcSize+1)) + 1;  /* conservative initial decode speed estimate */
435         Duration_ns totalCTime=0, totalDTime=0;
436         U32 cCompleted=(g_decodeOnly==1), dCompleted=0;
437 #       define NB_MARKS 4
438         const char* const marks[NB_MARKS] = { " |", " /", " =",  "\\" };
439         U32 markNb = 0;
440         size_t cSize = srcSize;
441         size_t totalRSize = srcSize;
442         double ratio = 0.;
443 
444         DISPLAYLEVEL(2, "\r%79s\r", "");
445         if (g_nbSeconds==0) { nbCompressionLoops = 1; nbDecodeLoops = 1; }
446         while (!cCompleted || !dCompleted) {
447             /* overheat protection */
448             if (TIME_clockSpan_ns(coolTime) > ACTIVEPERIOD_NANOSEC) {
449                 DISPLAYLEVEL(2, "\rcooling down ...    \r");
450                 UTIL_sleep(COOLPERIOD_SEC);
451                 coolTime = TIME_getTime();
452             }
453 
454             /* Compression */
455             DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)totalRSize);
456             if (!cCompleted) {
457                 memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase compressed buffer */
458                 { U32 blockNb; for (blockNb=0; blockNb<nbBlocks; blockNb++) blockTable[blockNb].cSize = 0; }
459             }
460 
461             UTIL_sleepMilli(1);  /* give processor time to other processes */
462             TIME_waitForNextTick();
463 
464             if (!cCompleted) {   /* still some time to do compression tests */
465                 TIME_t const timeStart = TIME_getTime();
466                 U32 nbLoops;
467                 for (nbLoops=0; nbLoops < nbCompressionLoops; nbLoops++) {
468                     U32 blockNb;
469                     compP.resetFunction(&compP);
470                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
471                         size_t const rSize = (size_t)compP.blockFunction(
472                             &compP,
473                             blockTable[blockNb].srcPtr, blockTable[blockNb].cPtr,
474                             (int)blockTable[blockNb].srcSize, (int)blockTable[blockNb].cRoom);
475                         if (LZ4_isError(rSize)) {
476                             DISPLAY("LZ4 compression failed on block %u \n", blockNb);
477                             benchError =1 ;
478                         }
479                         blockTable[blockNb].cSize = rSize;
480                 }   }
481                 {   Duration_ns const duration_ns = TIME_clockSpan_ns(timeStart);
482                     if (duration_ns > 0) {
483                         if (duration_ns < fastestC * nbCompressionLoops)
484                             fastestC = duration_ns / nbCompressionLoops;
485                         assert(fastestC > 0);
486                         nbCompressionLoops = (U32)(TIMELOOP_NANOSEC / fastestC) + 1;  /* aim for ~1sec */
487                     } else {
488                         assert(nbCompressionLoops < 40000000);   /* avoid overflow */
489                         nbCompressionLoops *= 100;
490                     }
491                     totalCTime += duration_ns;
492                     cCompleted = totalCTime>maxTime;
493                 }
494 
495                 cSize = 0;
496                 { U32 blockNb; for (blockNb=0; blockNb<nbBlocks; blockNb++) cSize += blockTable[blockNb].cSize; }
497                 cSize += !cSize;  /* avoid div by 0 */
498                 ratio = (double)totalRSize / (double)cSize;
499                 markNb = (markNb+1) % NB_MARKS;
500                 OUTLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r",
501                         marks[markNb], displayName,
502                         (U32)totalRSize, (U32)cSize, ratio,
503                         ((double)totalRSize / (double)fastestC) * 1000 );
504                 fflush(NULL);
505             }
506             (void)fastestD; (void)crcOrig;   /*  unused when decompression disabled */
507 #if 1
508             /* Decompression */
509             if (!dCompleted) memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
510 
511             UTIL_sleepMilli(5); /* give processor time to other processes */
512             TIME_waitForNextTick();
513 
514             if (!dCompleted) {
515                 const DecFunction_f decFunction = g_decodeOnly ?
516                     LZ4F_decompress_binding : LZ4_decompress_safe_usingDict;
517                 const char* const decString = g_decodeOnly ?
518                     "LZ4F_decompress" : "LZ4_decompress_safe_usingDict";
519                 TIME_t const timeStart = TIME_getTime();
520                 U32 nbLoops;
521 
522                 for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) {
523                     U32 blockNb;
524                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
525                         size_t const inMaxSize = (size_t)INT_MAX / decMultiplier;
526                         size_t const resCapa = (blockTable[blockNb].srcSize < inMaxSize) ?
527                                                 blockTable[blockNb].srcSize * decMultiplier :
528                                                 INT_MAX;
529                         int const regenSize = decFunction(
530                             blockTable[blockNb].cPtr, blockTable[blockNb].resPtr,
531                             (int)blockTable[blockNb].cSize, (int)resCapa,
532                             dictBuf, dictSize);
533                         if (regenSize < 0) {
534                             DISPLAY("%s() failed on block %u of size %u \n",
535                                 decString, blockNb, (unsigned)blockTable[blockNb].srcSize);
536                             if (g_decodeOnly)
537                                 DISPLAY("Is input using LZ4 Frame format ? \n");
538                             benchError = 1;
539                             break;
540                         }
541                         blockTable[blockNb].resSize = (size_t)regenSize;
542                 }   }
543                 {   Duration_ns const duration_ns = TIME_clockSpan_ns(timeStart);
544                     if (duration_ns > 0) {
545                         if (duration_ns < fastestD * nbDecodeLoops)
546                             fastestD = duration_ns / nbDecodeLoops;
547                         assert(fastestD > 0);
548                         nbDecodeLoops = (U32)(TIMELOOP_NANOSEC / fastestD) + 1;  /* aim for ~1sec */
549                     } else {
550                         assert(nbDecodeLoops < 40000000);   /* avoid overflow */
551                         nbDecodeLoops *= 100;
552                     }
553                     totalDTime += duration_ns;
554                     dCompleted = totalDTime > (DECOMP_MULT*maxTime);
555             }   }
556 
557             if (g_decodeOnly) {
558                 unsigned u;
559                 totalRSize = 0;
560                 for (u=0; u<nbBlocks; u++) totalRSize += blockTable[u].resSize;
561             }
562             markNb = (markNb+1) % NB_MARKS;
563             ratio  = (double)totalRSize / (double)cSize;
564             OUTLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s, %6.1f MB/s\r",
565                     marks[markNb], displayName,
566                     (U32)totalRSize, (U32)cSize, ratio,
567                     ((double)totalRSize / (double)fastestC) * 1000,
568                     ((double)totalRSize / (double)fastestD) * 1000);
569             fflush(NULL);
570 
571             /* CRC Checking (not possible in decode-only mode)*/
572             if (!g_decodeOnly) {
573                 U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
574                 if (crcOrig!=crcCheck) {
575                     size_t u;
576                     DISPLAY("\n!!! WARNING !!! %17s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
577                     benchError = 1;
578                     for (u=0; u<srcSize; u++) {
579                         if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) {
580                             U32 segNb, bNb, pos;
581                             size_t bacc = 0;
582                             DISPLAY("Decoding error at pos %u ", (U32)u);
583                             for (segNb = 0; segNb < nbBlocks; segNb++) {
584                                 if (bacc + blockTable[segNb].srcSize > u) break;
585                                 bacc += blockTable[segNb].srcSize;
586                             }
587                             pos = (U32)(u - bacc);
588                             bNb = pos / (128 KB);
589                             DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos);
590                             break;
591                         }
592                         if (u==srcSize-1) {  /* should never happen */
593                             DISPLAY("no difference detected\n");
594                     }   }
595                     break;
596             }   }   /* CRC Checking */
597 #endif
598         }   /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
599 
600         OUTLEVEL(2, "%2i#\n", cLevel);
601 
602         /* quiet mode */
603         if (g_displayLevel == 1) {
604             double const cSpeed = ((double)srcSize / (double)fastestC) * 1000;
605             double const dSpeed = ((double)srcSize / (double)fastestD) * 1000;
606             DISPLAYOUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s ", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
607             if (g_additionalParam)
608                 DISPLAYOUT("(param=%d)", g_additionalParam);
609             DISPLAYOUT("\n");
610         }
611     }   /* Bench */
612 
613     /* clean up */
614     compP.cleanupFunction(&compP);
615     free(blockTable);
616     free(compressedBuffer);
617     free(resultBuffer);
618     return benchError;
619 }
620 
621 
BMK_findMaxMem(U64 requiredMem)622 static size_t BMK_findMaxMem(U64 requiredMem)
623 {
624     size_t step = 64 MB;
625     BYTE* testmem=NULL;
626 
627     requiredMem = (((requiredMem >> 26) + 1) << 26);
628     requiredMem += 2*step;
629     if (requiredMem > maxMemory) requiredMem = maxMemory;
630 
631     while (!testmem) {
632         if (requiredMem > step) requiredMem -= step;
633         else requiredMem >>= 1;
634         testmem = (BYTE*) malloc ((size_t)requiredMem);
635     }
636     free (testmem);
637 
638     /* keep some space available */
639     if (requiredMem > step) requiredMem -= step;
640     else requiredMem >>= 1;
641 
642     return (size_t)requiredMem;
643 }
644 
645 
BMK_benchCLevel(void * srcBuffer,size_t benchedSize,const char * displayName,int cLevel,int cLevelLast,const size_t * fileSizes,unsigned nbFiles,const char * dictBuf,int dictSize)646 static int BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
647                             const char* displayName, int cLevel, int cLevelLast,
648                             const size_t* fileSizes, unsigned nbFiles,
649                             const char* dictBuf, int dictSize)
650 {
651     int l;
652     int benchError = 0;
653     const char* pch = strrchr(displayName, '\\'); /* Windows */
654     if (!pch) pch = strrchr(displayName, '/'); /* Linux */
655     if (pch) displayName = pch+1;
656 
657     SET_REALTIME_PRIORITY;
658 
659     if (g_displayLevel == 1 && !g_additionalParam)
660         DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", LZ4_VERSION_STRING, LZ4_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10));
661 
662     if (cLevelLast < cLevel) cLevelLast = cLevel;
663 
664     for (l=cLevel; l <= cLevelLast; l++) {
665         benchError |= BMK_benchMem(
666                             srcBuffer, benchedSize,
667                             displayName, l,
668                             fileSizes, nbFiles,
669                             dictBuf, dictSize);
670     }
671     return benchError;
672 }
673 
674 
675 /*! BMK_loadFiles() :
676     Loads `buffer` with content of files listed within `fileNamesTable`.
677     At most, fills `buffer` entirely */
BMK_loadFiles(void * buffer,size_t bufferSize,size_t * fileSizes,const char ** fileNamesTable,unsigned nbFiles)678 static void BMK_loadFiles(void* buffer, size_t bufferSize,
679                           size_t* fileSizes,
680                           const char** fileNamesTable, unsigned nbFiles)
681 {
682     size_t pos = 0, totalSize = 0;
683     unsigned n;
684     for (n=0; n<nbFiles; n++) {
685         FILE* f;
686         U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
687         if (UTIL_isDirectory(fileNamesTable[n])) {
688             DISPLAYLEVEL(2, "Ignoring %s directory...       \n", fileNamesTable[n]);
689             fileSizes[n] = 0;
690             continue;
691         }
692         f = fopen(fileNamesTable[n], "rb");
693         if (f==NULL) END_PROCESS(10, "impossible to open file %s", fileNamesTable[n]);
694         DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
695         if (fileSize > bufferSize-pos) { /* buffer too small - stop after this file */
696             fileSize = bufferSize-pos;
697             nbFiles=n;
698         }
699         { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
700           if (readSize != (size_t)fileSize) END_PROCESS(11, "could not read %s", fileNamesTable[n]);
701           pos += readSize; }
702         fileSizes[n] = (size_t)fileSize;
703         totalSize += (size_t)fileSize;
704         fclose(f);
705     }
706 
707     if (totalSize == 0) END_PROCESS(12, "no data to bench");
708 }
709 
BMK_benchFileTable(const char ** fileNamesTable,unsigned nbFiles,int cLevel,int cLevelLast,const char * dictBuf,int dictSize)710 static int BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles,
711                               int cLevel, int cLevelLast,
712                               const char* dictBuf, int dictSize)
713 {
714     void* srcBuffer;
715     size_t benchedSize;
716     int benchError = 0;
717     size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
718     U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
719     char mfName[20] = {0};
720 
721     if (!fileSizes) END_PROCESS(12, "not enough memory for fileSizes");
722 
723     /* Memory allocation & restrictions */
724     benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
725     if (benchedSize==0) END_PROCESS(12, "not enough memory");
726     if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
727     if (benchedSize > LZ4_MAX_INPUT_SIZE) {
728         benchedSize = LZ4_MAX_INPUT_SIZE;
729         DISPLAY("File(s) bigger than LZ4's max input size; testing %u MB only...\n", (U32)(benchedSize >> 20));
730     } else {
731         if (benchedSize < totalSizeToLoad)
732             DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20));
733     }
734     srcBuffer = malloc(benchedSize + !benchedSize);   /* avoid alloc of zero */
735     if (!srcBuffer) END_PROCESS(12, "not enough memory");
736 
737     /* Load input buffer */
738     BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles);
739 
740     /* Bench */
741     snprintf (mfName, sizeof(mfName), " %u files", nbFiles);
742     {   const char* displayName = (nbFiles > 1) ? mfName : fileNamesTable[0];
743         benchError = BMK_benchCLevel(srcBuffer, benchedSize,
744                         displayName, cLevel, cLevelLast,
745                         fileSizes, nbFiles,
746                         dictBuf, dictSize);
747     }
748 
749     /* clean up */
750     free(srcBuffer);
751     free(fileSizes);
752     return benchError;
753 }
754 
755 
BMK_syntheticTest(int cLevel,int cLevelLast,const char * dictBuf,int dictSize)756 static int BMK_syntheticTest(int cLevel, int cLevelLast,
757                              const char* dictBuf, int dictSize)
758 {
759     int benchError = 0;
760     size_t const benchedSize = 10000000;
761     void* const srcBuffer = malloc(benchedSize);
762 
763     /* Memory allocation */
764     if (!srcBuffer) END_PROCESS(21, "not enough memory");
765 
766     /* Fill input buffer */
767     LOREM_genBuffer(srcBuffer, benchedSize, 0);
768 
769     /* Bench */
770     benchError = BMK_benchCLevel(srcBuffer, benchedSize,
771                     "Lorem ipsum",
772                     cLevel, cLevelLast,
773                     &benchedSize,
774                     1,
775                     dictBuf, dictSize);
776 
777     /* clean up */
778     free(srcBuffer);
779 
780     return benchError;
781 }
782 
783 
784 static int
BMK_benchFilesSeparately(const char ** fileNamesTable,unsigned nbFiles,int cLevel,int cLevelLast,const char * dictBuf,int dictSize)785 BMK_benchFilesSeparately(const char** fileNamesTable, unsigned nbFiles,
786                    int cLevel, int cLevelLast,
787                    const char* dictBuf, int dictSize)
788 {
789     int benchError = 0;
790     unsigned fileNb;
791     if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX;
792     if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX;
793     if (cLevelLast < cLevel) cLevelLast = cLevel;
794 
795     for (fileNb=0; fileNb<nbFiles; fileNb++)
796         benchError |= BMK_benchFileTable(fileNamesTable+fileNb, 1, cLevel, cLevelLast, dictBuf, dictSize);
797 
798     return benchError;
799 }
800 
801 
BMK_benchFiles(const char ** fileNamesTable,unsigned nbFiles,int cLevel,int cLevelLast,const char * dictFileName)802 int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
803                    int cLevel, int cLevelLast,
804                    const char* dictFileName)
805 {
806     int benchError = 0;
807     char* dictBuf = NULL;
808     size_t dictSize = 0;
809 
810     if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX;
811     if (g_decodeOnly) {
812         DISPLAYLEVEL(2, "Benchmark Decompression of LZ4 Frame ");
813         if (g_skipChecksums) {
814             DISPLAYLEVEL(2, "_without_ checksum even when present \n");
815         } else {
816             DISPLAYLEVEL(2, "+ Checksum when present \n");
817         }
818         cLevelLast = cLevel;
819     }
820     if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX;
821     if (cLevelLast < cLevel) cLevelLast = cLevel;
822     if (cLevelLast > cLevel)
823         DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
824 
825     if (dictFileName) {
826         FILE* dictFile = NULL;
827         U64 const dictFileSize = UTIL_getFileSize(dictFileName);
828         if (!dictFileSize)
829             END_PROCESS(25, "Dictionary error : could not stat dictionary file");
830         if (g_decodeOnly)
831             END_PROCESS(26, "Error : LZ4 Frame decoder mode not compatible with dictionary yet");
832 
833         dictFile = fopen(dictFileName, "rb");
834         if (!dictFile)
835             END_PROCESS(25, "Dictionary error : could not open dictionary file");
836 
837         if (dictFileSize > LZ4_MAX_DICT_SIZE) {
838             dictSize = LZ4_MAX_DICT_SIZE;
839             if (UTIL_fseek(dictFile, (long)(dictFileSize - dictSize), SEEK_SET))
840                 END_PROCESS(25, "Dictionary error : could not seek dictionary file");
841         } else {
842             dictSize = (size_t)dictFileSize;
843         }
844 
845         dictBuf = (char*)malloc(dictSize);
846         if (!dictBuf) END_PROCESS(25, "Allocation error : not enough memory");
847 
848         if (fread(dictBuf, 1, dictSize, dictFile) != dictSize)
849             END_PROCESS(25, "Dictionary error : could not read dictionary file");
850 
851         fclose(dictFile);
852     }
853 
854     if (nbFiles == 0) {
855         benchError = BMK_syntheticTest(cLevel, cLevelLast, dictBuf, (int)dictSize);
856     } else {
857         if (g_benchSeparately)
858             benchError = BMK_benchFilesSeparately(fileNamesTable, nbFiles, cLevel, cLevelLast, dictBuf, (int)dictSize);
859         else
860             benchError = BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast, dictBuf, (int)dictSize);
861     }
862 
863     free(dictBuf);
864     return benchError;
865 }
866