• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2     bench.c - Demo program to benchmark open-source compression algorithms
3     Copyright (C) Yann Collet 2012-2020
4 
5     GPL v2 License
6 
7     This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation; either version 2 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License along
18     with this program; if not, write to the Free Software Foundation, Inc.,
19     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 
21     You can contact the author at :
22     - LZ4 homepage : http://www.lz4.org
23     - LZ4 source repository : https://github.com/lz4/lz4
24 */
25 
26 
27 /*-************************************
28 *  Compiler options
29 **************************************/
30 #ifdef _MSC_VER    /* Visual Studio */
31 #  pragma warning(disable : 4127)    /* disable: C4127: conditional expression is constant */
32 #endif
33 
34 
35 /* *************************************
36 *  Includes
37 ***************************************/
38 #include "platform.h"    /* Compiler options */
39 #include "util.h"        /* UTIL_GetFileSize, UTIL_sleep */
40 #include <stdlib.h>      /* malloc, free */
41 #include <string.h>      /* memset */
42 #include <stdio.h>       /* fprintf, fopen, ftello */
43 #include <time.h>        /* clock_t, clock, CLOCKS_PER_SEC */
44 #include <assert.h>      /* assert */
45 
46 #include "datagen.h"     /* RDG_genBuffer */
47 #include "xxhash.h"
48 #include "bench.h"
49 
50 #define LZ4_STATIC_LINKING_ONLY
51 #include "lz4.h"
52 #define LZ4_HC_STATIC_LINKING_ONLY
53 #include "lz4hc.h"
54 #include "lz4frame.h"   /* LZ4F_decompress */
55 
56 
57 /* *************************************
58 *  Constants
59 ***************************************/
60 #ifndef LZ4_GIT_COMMIT_STRING
61 #  define LZ4_GIT_COMMIT_STRING ""
62 #else
63 #  define LZ4_GIT_COMMIT_STRING LZ4_EXPAND_AND_QUOTE(LZ4_GIT_COMMIT)
64 #endif
65 
66 #define NBSECONDS             3
67 #define TIMELOOP_MICROSEC     1*1000000ULL /* 1 second */
68 #define TIMELOOP_NANOSEC      1*1000000000ULL /* 1 second */
69 #define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */
70 #define COOLPERIOD_SEC        10
71 #define DECOMP_MULT           1 /* test decompression DECOMP_MULT times longer than compression */
72 
73 #define KB *(1 <<10)
74 #define MB *(1 <<20)
75 #define GB *(1U<<30)
76 
77 #define LZ4_MAX_DICT_SIZE (64 KB)
78 
79 static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
80 
81 static U32 g_compressibilityDefault = 50;
82 
83 
84 /* *************************************
85 *  console display
86 ***************************************/
87 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
88 #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
89 static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
90 
91 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
92             if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \
93             { g_time = clock(); DISPLAY(__VA_ARGS__); \
94             if (g_displayLevel>=4) fflush(stdout); } }
95 static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
96 static clock_t g_time = 0;
97 
98 
99 /* *************************************
100 *  DEBUG and error conditions
101 ***************************************/
102 #ifndef DEBUG
103 #  define DEBUG 0
104 #endif
105 #define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
106 #define END_PROCESS(error, ...)                                             \
107 {                                                                         \
108     DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
109     DISPLAYLEVEL(1, "Error %i : ", error);                                \
110     DISPLAYLEVEL(1, __VA_ARGS__);                                         \
111     DISPLAYLEVEL(1, "\n");                                                \
112     exit(error);                                                          \
113 }
114 
115 #define LZ4_isError(errcode) (errcode==0)
116 
117 
118 /* *************************************
119 *  Benchmark Parameters
120 ***************************************/
121 static U32 g_nbSeconds = NBSECONDS;
122 static size_t g_blockSize = 0;
123 int g_additionalParam = 0;
124 int g_benchSeparately = 0;
125 int g_decodeOnly = 0;
126 unsigned g_skipChecksums = 0;
127 
BMK_setNotificationLevel(unsigned level)128 void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; }
129 
BMK_setAdditionalParam(int additionalParam)130 void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; }
131 
BMK_setNbSeconds(unsigned nbSeconds)132 void BMK_setNbSeconds(unsigned nbSeconds)
133 {
134     g_nbSeconds = nbSeconds;
135     DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression -\n", g_nbSeconds);
136 }
137 
BMK_setBlockSize(size_t blockSize)138 void BMK_setBlockSize(size_t blockSize) { g_blockSize = blockSize; }
139 
BMK_setBenchSeparately(int separate)140 void BMK_setBenchSeparately(int separate) { g_benchSeparately = (separate!=0); }
141 
BMK_setDecodeOnlyMode(int set)142 void BMK_setDecodeOnlyMode(int set) { g_decodeOnly = (set!=0); }
143 
BMK_skipChecksums(int skip)144 void BMK_skipChecksums(int skip) { g_skipChecksums = (skip!=0); }
145 
146 
147 /* *************************************
148  *  Compression state management
149 ***************************************/
150 
151 struct compressionParameters
152 {
153     int cLevel;
154     const char* dictBuf;
155     int dictSize;
156 
157     LZ4_stream_t* LZ4_stream;
158     LZ4_stream_t* LZ4_dictStream;
159     LZ4_streamHC_t* LZ4_streamHC;
160     LZ4_streamHC_t* LZ4_dictStreamHC;
161 
162     void (*initFunction)(
163         struct compressionParameters* pThis);
164     void (*resetFunction)(
165         const struct compressionParameters* pThis);
166     int (*blockFunction)(
167         const struct compressionParameters* pThis,
168         const char* src, char* dst, int srcSize, int dstSize);
169     void (*cleanupFunction)(
170         const struct compressionParameters* pThis);
171 };
172 
173 static void
LZ4_compressInitNoStream(struct compressionParameters * pThis)174 LZ4_compressInitNoStream(struct compressionParameters* pThis)
175 {
176     pThis->LZ4_stream = NULL;
177     pThis->LZ4_dictStream = NULL;
178     pThis->LZ4_streamHC = NULL;
179     pThis->LZ4_dictStreamHC = NULL;
180 }
181 
182 static void
LZ4_compressInitStream(struct compressionParameters * pThis)183 LZ4_compressInitStream(struct compressionParameters* pThis)
184 {
185     pThis->LZ4_stream = LZ4_createStream();
186     pThis->LZ4_dictStream = LZ4_createStream();
187     pThis->LZ4_streamHC = NULL;
188     pThis->LZ4_dictStreamHC = NULL;
189     LZ4_loadDict(pThis->LZ4_dictStream, pThis->dictBuf, pThis->dictSize);
190 }
191 
192 static void
LZ4_compressInitStreamHC(struct compressionParameters * pThis)193 LZ4_compressInitStreamHC(struct compressionParameters* pThis)
194 {
195     pThis->LZ4_stream = NULL;
196     pThis->LZ4_dictStream = NULL;
197     pThis->LZ4_streamHC = LZ4_createStreamHC();
198     pThis->LZ4_dictStreamHC = LZ4_createStreamHC();
199     LZ4_loadDictHC(pThis->LZ4_dictStreamHC, pThis->dictBuf, pThis->dictSize);
200 }
201 
202 static void
LZ4_compressResetNoStream(const struct compressionParameters * pThis)203 LZ4_compressResetNoStream(const struct compressionParameters* pThis)
204 {
205     (void)pThis;
206 }
207 
208 static void
LZ4_compressResetStream(const struct compressionParameters * pThis)209 LZ4_compressResetStream(const struct compressionParameters* pThis)
210 {
211     LZ4_resetStream_fast(pThis->LZ4_stream);
212     LZ4_attach_dictionary(pThis->LZ4_stream, pThis->LZ4_dictStream);
213 }
214 
215 static void
LZ4_compressResetStreamHC(const struct compressionParameters * pThis)216 LZ4_compressResetStreamHC(const struct compressionParameters* pThis)
217 {
218     LZ4_resetStreamHC_fast(pThis->LZ4_streamHC, pThis->cLevel);
219     LZ4_attach_HC_dictionary(pThis->LZ4_streamHC, pThis->LZ4_dictStreamHC);
220 }
221 
222 static int
LZ4_compressBlockNoStream(const struct compressionParameters * pThis,const char * src,char * dst,int srcSize,int dstSize)223 LZ4_compressBlockNoStream(const struct compressionParameters* pThis,
224                           const char* src, char* dst,
225                           int srcSize, int dstSize)
226 {
227     int const acceleration = (pThis->cLevel < 0) ? -pThis->cLevel + 1 : 1;
228     return LZ4_compress_fast(src, dst, srcSize, dstSize, acceleration);
229 }
230 
231 static int
LZ4_compressBlockNoStreamHC(const struct compressionParameters * pThis,const char * src,char * dst,int srcSize,int dstSize)232 LZ4_compressBlockNoStreamHC(const struct compressionParameters* pThis,
233                             const char* src, char* dst,
234                             int srcSize, int dstSize)
235 {
236     return LZ4_compress_HC(src, dst, srcSize, dstSize, pThis->cLevel);
237 }
238 
239 static int
LZ4_compressBlockStream(const struct compressionParameters * pThis,const char * src,char * dst,int srcSize,int dstSize)240 LZ4_compressBlockStream(const struct compressionParameters* pThis,
241                         const char* src, char* dst,
242                         int srcSize, int dstSize)
243 {
244     int const acceleration = (pThis->cLevel < 0) ? -pThis->cLevel + 1 : 1;
245     return LZ4_compress_fast_continue(pThis->LZ4_stream, src, dst, srcSize, dstSize, acceleration);
246 }
247 
248 static int
LZ4_compressBlockStreamHC(const struct compressionParameters * pThis,const char * src,char * dst,int srcSize,int dstSize)249 LZ4_compressBlockStreamHC(const struct compressionParameters* pThis,
250                           const char* src, char* dst,
251                           int srcSize, int dstSize)
252 {
253     return LZ4_compress_HC_continue(pThis->LZ4_streamHC, src, dst, srcSize, dstSize);
254 }
255 
256 static void
LZ4_compressCleanupNoStream(const struct compressionParameters * pThis)257 LZ4_compressCleanupNoStream(const struct compressionParameters* pThis)
258 {
259     (void)pThis;
260 }
261 
262 static void
LZ4_compressCleanupStream(const struct compressionParameters * pThis)263 LZ4_compressCleanupStream(const struct compressionParameters* pThis)
264 {
265     LZ4_freeStream(pThis->LZ4_stream);
266     LZ4_freeStream(pThis->LZ4_dictStream);
267 }
268 
269 static void
LZ4_compressCleanupStreamHC(const struct compressionParameters * pThis)270 LZ4_compressCleanupStreamHC(const struct compressionParameters* pThis)
271 {
272     LZ4_freeStreamHC(pThis->LZ4_streamHC);
273     LZ4_freeStreamHC(pThis->LZ4_dictStreamHC);
274 }
275 
276 static void
LZ4_buildCompressionParameters(struct compressionParameters * pParams,int cLevel,const char * dictBuf,int dictSize)277 LZ4_buildCompressionParameters(struct compressionParameters* pParams,
278                                int cLevel,
279                          const char* dictBuf, int dictSize)
280 {
281     pParams->cLevel = cLevel;
282     pParams->dictBuf = dictBuf;
283     pParams->dictSize = dictSize;
284 
285     if (dictSize) {
286         if (cLevel < LZ4HC_CLEVEL_MIN) {
287             pParams->initFunction = LZ4_compressInitStream;
288             pParams->resetFunction = LZ4_compressResetStream;
289             pParams->blockFunction = LZ4_compressBlockStream;
290             pParams->cleanupFunction = LZ4_compressCleanupStream;
291         } else {
292             pParams->initFunction = LZ4_compressInitStreamHC;
293             pParams->resetFunction = LZ4_compressResetStreamHC;
294             pParams->blockFunction = LZ4_compressBlockStreamHC;
295             pParams->cleanupFunction = LZ4_compressCleanupStreamHC;
296         }
297     } else {
298         pParams->initFunction = LZ4_compressInitNoStream;
299         pParams->resetFunction = LZ4_compressResetNoStream;
300         pParams->cleanupFunction = LZ4_compressCleanupNoStream;
301 
302         if (cLevel < LZ4HC_CLEVEL_MIN) {
303             pParams->blockFunction = LZ4_compressBlockNoStream;
304         } else {
305             pParams->blockFunction = LZ4_compressBlockNoStreamHC;
306         }
307     }
308 }
309 
310 
311 typedef int (*DecFunction_f)(const char* src, char* dst,
312                              int srcSize, int dstCapacity,
313                              const char* dictStart, int dictSize);
314 
315 static LZ4F_dctx* g_dctx = NULL;
316 
317 static int
LZ4F_decompress_binding(const char * src,char * dst,int srcSize,int dstCapacity,const char * dictStart,int dictSize)318 LZ4F_decompress_binding(const char* src, char* dst,
319                         int srcSize, int dstCapacity,
320                   const char* dictStart, int dictSize)
321 {
322     size_t dstSize = (size_t)dstCapacity;
323     size_t readSize = (size_t)srcSize;
324     LZ4F_decompressOptions_t dOpt = { 1, 0, 0, 0 };
325     size_t decStatus;
326     dOpt.skipChecksums = g_skipChecksums;
327     decStatus = LZ4F_decompress(g_dctx,
328                     dst, &dstSize,
329                     src, &readSize,
330                     &dOpt);
331     if ( (decStatus == 0)   /* decompression successful */
332       && ((int)readSize==srcSize) /* consume all input */ )
333         return (int)dstSize;
334     /* else, error */
335     return -1;
336     (void)dictStart; (void)dictSize;  /* not compatible with dictionary yet */
337 }
338 
339 
340 /* ********************************************************
341 *  Bench functions
342 **********************************************************/
343 typedef struct {
344     const char* srcPtr;
345     size_t srcSize;
346     char*  cPtr;
347     size_t cRoom;
348     size_t cSize;
349     char*  resPtr;
350     size_t resSize;
351 } blockParam_t;
352 
353 #define MIN(a,b) ((a)<(b) ? (a) : (b))
354 #define MAX(a,b) ((a)>(b) ? (a) : (b))
355 
BMK_benchMem(const void * srcBuffer,size_t srcSize,const char * displayName,int cLevel,const size_t * fileSizes,U32 nbFiles,const char * dictBuf,int dictSize)356 static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
357                         const char* displayName, int cLevel,
358                         const size_t* fileSizes, U32 nbFiles,
359                         const char* dictBuf, int dictSize)
360 {
361     size_t const blockSize = (g_blockSize>=32 && !g_decodeOnly ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
362     U32 const maxNbBlocks = (U32)((srcSize + (blockSize-1)) / blockSize) + nbFiles;
363     blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
364     size_t const maxCompressedSize = (size_t)LZ4_compressBound((int)srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
365     void* const compressedBuffer = malloc(maxCompressedSize);
366     size_t const decMultiplier = g_decodeOnly ? 255 : 1;
367     size_t const maxInSize = (size_t)LZ4_MAX_INPUT_SIZE / decMultiplier;
368     size_t const maxDecSize = srcSize < maxInSize ? srcSize * decMultiplier : LZ4_MAX_INPUT_SIZE;
369     void* const resultBuffer = malloc(maxDecSize);
370     U32 nbBlocks;
371     struct compressionParameters compP;
372 
373     /* checks */
374     if (!compressedBuffer || !resultBuffer || !blockTable)
375         END_PROCESS(31, "allocation error : not enough memory");
376 
377     if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
378 
379     /* init */
380     LZ4_buildCompressionParameters(&compP, cLevel, dictBuf, dictSize);
381     compP.initFunction(&compP);
382     if (g_dctx==NULL) {
383         LZ4F_createDecompressionContext(&g_dctx, LZ4F_VERSION);
384         if (g_dctx==NULL)
385             END_PROCESS(1, "allocation error - decompression state");
386     }
387 
388     /* Init blockTable data */
389     {   const char* srcPtr = (const char*)srcBuffer;
390         char* cPtr = (char*)compressedBuffer;
391         char* resPtr = (char*)resultBuffer;
392         U32 fileNb;
393         for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) {
394             size_t remaining = fileSizes[fileNb];
395             U32 const nbBlocksforThisFile = (U32)((remaining + (blockSize-1)) / blockSize);
396             U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
397             for ( ; nbBlocks<blockEnd; nbBlocks++) {
398                 size_t const thisBlockSize = MIN(remaining, blockSize);
399                 size_t const resMaxSize = thisBlockSize * decMultiplier;
400                 size_t const resCapa = (thisBlockSize < maxInSize) ? resMaxSize : LZ4_MAX_INPUT_SIZE;
401                 blockTable[nbBlocks].srcPtr = srcPtr;
402                 blockTable[nbBlocks].cPtr = cPtr;
403                 blockTable[nbBlocks].resPtr = resPtr;
404                 blockTable[nbBlocks].srcSize = thisBlockSize;
405                 blockTable[nbBlocks].cRoom = (size_t)LZ4_compressBound((int)thisBlockSize);
406                 srcPtr += thisBlockSize;
407                 cPtr += blockTable[nbBlocks].cRoom;
408                 resPtr += resCapa;
409                 remaining -= thisBlockSize;
410     }   }   }
411 
412     /* warming up memory */
413     RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
414 
415     /* decode-only mode : copy input to @compressedBuffer */
416     if (g_decodeOnly) {
417         U32 blockNb;
418         for (blockNb=0; blockNb < nbBlocks; blockNb++) {
419             memcpy(blockTable[blockNb].cPtr, blockTable[blockNb].srcPtr, blockTable[blockNb].srcSize);
420             blockTable[blockNb].cSize = blockTable[blockNb].srcSize;
421     }   }
422 
423     /* Bench */
424     {   U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
425         U64 const crcOrig = XXH64(srcBuffer, srcSize, 0);
426         UTIL_time_t coolTime = UTIL_getTime();
427         U64 const maxTime = (g_nbSeconds * TIMELOOP_NANOSEC) + 100;
428         U32 nbCompressionLoops = (U32)((5 MB) / (srcSize+1)) + 1;  /* conservative initial compression speed estimate */
429         U32 nbDecodeLoops = (U32)((200 MB) / (srcSize+1)) + 1;  /* conservative initial decode speed estimate */
430         U64 totalCTime=0, totalDTime=0;
431         U32 cCompleted=(g_decodeOnly==1), dCompleted=0;
432 #       define NB_MARKS 4
433         const char* const marks[NB_MARKS] = { " |", " /", " =",  "\\" };
434         U32 markNb = 0;
435         size_t cSize = srcSize;
436         size_t totalRSize = srcSize;
437         double ratio = 0.;
438 
439         DISPLAYLEVEL(2, "\r%79s\r", "");
440         while (!cCompleted || !dCompleted) {
441             /* overheat protection */
442             if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) {
443                 DISPLAYLEVEL(2, "\rcooling down ...    \r");
444                 UTIL_sleep(COOLPERIOD_SEC);
445                 coolTime = UTIL_getTime();
446             }
447 
448             /* Compression */
449             DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)totalRSize);
450             if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase compressed buffer */
451 
452             UTIL_sleepMilli(1);  /* give processor time to other processes */
453             UTIL_waitForNextTick();
454 
455             if (!cCompleted) {   /* still some time to do compression tests */
456                 UTIL_time_t const clockStart = UTIL_getTime();
457                 U32 nbLoops;
458                 for (nbLoops=0; nbLoops < nbCompressionLoops; nbLoops++) {
459                     U32 blockNb;
460                     compP.resetFunction(&compP);
461                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
462                         size_t const rSize = (size_t)compP.blockFunction(
463                             &compP,
464                             blockTable[blockNb].srcPtr, blockTable[blockNb].cPtr,
465                             (int)blockTable[blockNb].srcSize, (int)blockTable[blockNb].cRoom);
466                         if (LZ4_isError(rSize)) END_PROCESS(1, "LZ4 compression failed");
467                         blockTable[blockNb].cSize = rSize;
468                 }   }
469                 {   U64 const clockSpan = UTIL_clockSpanNano(clockStart);
470                     if (clockSpan > 0) {
471                         if (clockSpan < fastestC * nbCompressionLoops)
472                             fastestC = clockSpan / nbCompressionLoops;
473                         assert(fastestC > 0);
474                         nbCompressionLoops = (U32)(TIMELOOP_NANOSEC / fastestC) + 1;  /* aim for ~1sec */
475                     } else {
476                         assert(nbCompressionLoops < 40000000);   /* avoid overflow */
477                         nbCompressionLoops *= 100;
478                     }
479                     totalCTime += clockSpan;
480                     cCompleted = totalCTime>maxTime;
481                 }
482 
483                 cSize = 0;
484                 { U32 blockNb; for (blockNb=0; blockNb<nbBlocks; blockNb++) cSize += blockTable[blockNb].cSize; }
485                 cSize += !cSize;  /* avoid div by 0 */
486                 ratio = (double)totalRSize / (double)cSize;
487                 markNb = (markNb+1) % NB_MARKS;
488                 DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r",
489                         marks[markNb], displayName,
490                         (U32)totalRSize, (U32)cSize, ratio,
491                         ((double)totalRSize / fastestC) * 1000 );
492             }
493             (void)fastestD; (void)crcOrig;   /*  unused when decompression disabled */
494 #if 1
495             /* Decompression */
496             if (!dCompleted) memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
497 
498             UTIL_sleepMilli(5); /* give processor time to other processes */
499             UTIL_waitForNextTick();
500 
501             if (!dCompleted) {
502                 const DecFunction_f decFunction = g_decodeOnly ?
503                     LZ4F_decompress_binding : LZ4_decompress_safe_usingDict;
504                 const char* const decString = g_decodeOnly ?
505                     "LZ4F_decompress" : "LZ4_decompress_safe_usingDict";
506                 UTIL_time_t const clockStart = UTIL_getTime();
507                 U32 nbLoops;
508 
509                 for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) {
510                     U32 blockNb;
511                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
512                         size_t const inMaxSize = (size_t)INT_MAX / decMultiplier;
513                         size_t const resCapa = (blockTable[blockNb].srcSize < inMaxSize) ?
514                                                 blockTable[blockNb].srcSize * decMultiplier :
515                                                 INT_MAX;
516                         int const regenSize = decFunction(
517                             blockTable[blockNb].cPtr, blockTable[blockNb].resPtr,
518                             (int)blockTable[blockNb].cSize, (int)resCapa,
519                             dictBuf, dictSize);
520                         if (regenSize < 0) {
521                             DISPLAY("%s() failed on block %u of size %u \n",
522                                 decString, blockNb, (unsigned)blockTable[blockNb].srcSize);
523                             if (g_decodeOnly)
524                                 DISPLAY("Is input using LZ4 Frame format ? \n");
525                             END_PROCESS(2, "error during decoding");
526                             break;
527                         }
528                         blockTable[blockNb].resSize = (size_t)regenSize;
529                 }   }
530                 {   U64 const clockSpan = UTIL_clockSpanNano(clockStart);
531                     if (clockSpan > 0) {
532                         if (clockSpan < fastestD * nbDecodeLoops)
533                             fastestD = clockSpan / nbDecodeLoops;
534                         assert(fastestD > 0);
535                         nbDecodeLoops = (U32)(TIMELOOP_NANOSEC / fastestD) + 1;  /* aim for ~1sec */
536                     } else {
537                         assert(nbDecodeLoops < 40000000);   /* avoid overflow */
538                         nbDecodeLoops *= 100;
539                     }
540                     totalDTime += clockSpan;
541                     dCompleted = totalDTime > (DECOMP_MULT*maxTime);
542             }   }
543 
544             if (g_decodeOnly) {
545                 unsigned u;
546                 totalRSize = 0;
547                 for (u=0; u<nbBlocks; u++) totalRSize += blockTable[u].resSize;
548             }
549             markNb = (markNb+1) % NB_MARKS;
550             ratio  = (double)totalRSize / (double)cSize;
551             DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r",
552                     marks[markNb], displayName,
553                     (U32)totalRSize, (U32)cSize, ratio,
554                     ((double)totalRSize / fastestC) * 1000,
555                     ((double)totalRSize / fastestD) * 1000);
556 
557             /* CRC Checking (not possible in decode-only mode)*/
558             if (!g_decodeOnly) {
559                 U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
560                 if (crcOrig!=crcCheck) {
561                     size_t u;
562                     DISPLAY("\n!!! WARNING !!! %17s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
563                     for (u=0; u<srcSize; u++) {
564                         if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) {
565                             U32 segNb, bNb, pos;
566                             size_t bacc = 0;
567                             DISPLAY("Decoding error at pos %u ", (U32)u);
568                             for (segNb = 0; segNb < nbBlocks; segNb++) {
569                                 if (bacc + blockTable[segNb].srcSize > u) break;
570                                 bacc += blockTable[segNb].srcSize;
571                             }
572                             pos = (U32)(u - bacc);
573                             bNb = pos / (128 KB);
574                             DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos);
575                             break;
576                         }
577                         if (u==srcSize-1) {  /* should never happen */
578                             DISPLAY("no difference detected\n");
579                     }   }
580                     break;
581             }   }   /* CRC Checking */
582 #endif
583         }   /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
584 
585         if (g_displayLevel == 1) {
586             double const cSpeed = ((double)srcSize / fastestC) * 1000;
587             double const dSpeed = ((double)srcSize / fastestD) * 1000;
588             if (g_additionalParam)
589                 DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
590             else
591                 DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
592         }
593         DISPLAYLEVEL(2, "%2i#\n", cLevel);
594     }   /* Bench */
595 
596     /* clean up */
597     compP.cleanupFunction(&compP);
598     free(blockTable);
599     free(compressedBuffer);
600     free(resultBuffer);
601     return 0;
602 }
603 
604 
BMK_findMaxMem(U64 requiredMem)605 static size_t BMK_findMaxMem(U64 requiredMem)
606 {
607     size_t step = 64 MB;
608     BYTE* testmem=NULL;
609 
610     requiredMem = (((requiredMem >> 26) + 1) << 26);
611     requiredMem += 2*step;
612     if (requiredMem > maxMemory) requiredMem = maxMemory;
613 
614     while (!testmem) {
615         if (requiredMem > step) requiredMem -= step;
616         else requiredMem >>= 1;
617         testmem = (BYTE*) malloc ((size_t)requiredMem);
618     }
619     free (testmem);
620 
621     /* keep some space available */
622     if (requiredMem > step) requiredMem -= step;
623     else requiredMem >>= 1;
624 
625     return (size_t)requiredMem;
626 }
627 
628 
BMK_benchCLevel(void * srcBuffer,size_t benchedSize,const char * displayName,int cLevel,int cLevelLast,const size_t * fileSizes,unsigned nbFiles,const char * dictBuf,int dictSize)629 static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
630                             const char* displayName, int cLevel, int cLevelLast,
631                             const size_t* fileSizes, unsigned nbFiles,
632                             const char* dictBuf, int dictSize)
633 {
634     int l;
635 
636     const char* pch = strrchr(displayName, '\\'); /* Windows */
637     if (!pch) pch = strrchr(displayName, '/'); /* Linux */
638     if (pch) displayName = pch+1;
639 
640     SET_REALTIME_PRIORITY;
641 
642     if (g_displayLevel == 1 && !g_additionalParam)
643         DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", LZ4_VERSION_STRING, LZ4_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10));
644 
645     if (cLevelLast < cLevel) cLevelLast = cLevel;
646 
647     for (l=cLevel; l <= cLevelLast; l++) {
648         BMK_benchMem(srcBuffer, benchedSize,
649                      displayName, l,
650                      fileSizes, nbFiles,
651                      dictBuf, dictSize);
652     }
653 }
654 
655 
656 /*! BMK_loadFiles() :
657     Loads `buffer` with content of files listed within `fileNamesTable`.
658     At most, fills `buffer` entirely */
BMK_loadFiles(void * buffer,size_t bufferSize,size_t * fileSizes,const char ** fileNamesTable,unsigned nbFiles)659 static void BMK_loadFiles(void* buffer, size_t bufferSize,
660                           size_t* fileSizes,
661                           const char** fileNamesTable, unsigned nbFiles)
662 {
663     size_t pos = 0, totalSize = 0;
664     unsigned n;
665     for (n=0; n<nbFiles; n++) {
666         FILE* f;
667         U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
668         if (UTIL_isDirectory(fileNamesTable[n])) {
669             DISPLAYLEVEL(2, "Ignoring %s directory...       \n", fileNamesTable[n]);
670             fileSizes[n] = 0;
671             continue;
672         }
673         f = fopen(fileNamesTable[n], "rb");
674         if (f==NULL) END_PROCESS(10, "impossible to open file %s", fileNamesTable[n]);
675         DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
676         if (fileSize > bufferSize-pos) { /* buffer too small - stop after this file */
677             fileSize = bufferSize-pos;
678             nbFiles=n;
679         }
680         { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
681           if (readSize != (size_t)fileSize) END_PROCESS(11, "could not read %s", fileNamesTable[n]);
682           pos += readSize; }
683         fileSizes[n] = (size_t)fileSize;
684         totalSize += (size_t)fileSize;
685         fclose(f);
686     }
687 
688     if (totalSize == 0) END_PROCESS(12, "no data to bench");
689 }
690 
BMK_benchFileTable(const char ** fileNamesTable,unsigned nbFiles,int cLevel,int cLevelLast,const char * dictBuf,int dictSize)691 static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles,
692                                int cLevel, int cLevelLast,
693                                const char* dictBuf, int dictSize)
694 {
695     void* srcBuffer;
696     size_t benchedSize;
697     size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
698     U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
699     char mfName[20] = {0};
700 
701     if (!fileSizes) END_PROCESS(12, "not enough memory for fileSizes");
702 
703     /* Memory allocation & restrictions */
704     benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
705     if (benchedSize==0) END_PROCESS(12, "not enough memory");
706     if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
707     if (benchedSize > LZ4_MAX_INPUT_SIZE) {
708         benchedSize = LZ4_MAX_INPUT_SIZE;
709         DISPLAY("File(s) bigger than LZ4's max input size; testing %u MB only...\n", (U32)(benchedSize >> 20));
710     } else {
711         if (benchedSize < totalSizeToLoad)
712             DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20));
713     }
714     srcBuffer = malloc(benchedSize + !benchedSize);   /* avoid alloc of zero */
715     if (!srcBuffer) END_PROCESS(12, "not enough memory");
716 
717     /* Load input buffer */
718     BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles);
719 
720     /* Bench */
721     snprintf (mfName, sizeof(mfName), " %u files", nbFiles);
722     {   const char* displayName = (nbFiles > 1) ? mfName : fileNamesTable[0];
723         BMK_benchCLevel(srcBuffer, benchedSize,
724                         displayName, cLevel, cLevelLast,
725                         fileSizes, nbFiles,
726                         dictBuf, dictSize);
727     }
728 
729     /* clean up */
730     free(srcBuffer);
731     free(fileSizes);
732 }
733 
734 
BMK_syntheticTest(int cLevel,int cLevelLast,double compressibility,const char * dictBuf,int dictSize)735 static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility,
736                               const char* dictBuf, int dictSize)
737 {
738     char name[20] = {0};
739     size_t benchedSize = 10000000;
740     void* const srcBuffer = malloc(benchedSize);
741 
742     /* Memory allocation */
743     if (!srcBuffer) END_PROCESS(21, "not enough memory");
744 
745     /* Fill input buffer */
746     RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
747 
748     /* Bench */
749     snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
750     BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1, dictBuf, dictSize);
751 
752     /* clean up */
753     free(srcBuffer);
754 }
755 
756 
757 static int
BMK_benchFilesSeparately(const char ** fileNamesTable,unsigned nbFiles,int cLevel,int cLevelLast,const char * dictBuf,int dictSize)758 BMK_benchFilesSeparately(const char** fileNamesTable, unsigned nbFiles,
759                    int cLevel, int cLevelLast,
760                    const char* dictBuf, int dictSize)
761 {
762     unsigned fileNb;
763     if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX;
764     if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX;
765     if (cLevelLast < cLevel) cLevelLast = cLevel;
766 
767     for (fileNb=0; fileNb<nbFiles; fileNb++)
768         BMK_benchFileTable(fileNamesTable+fileNb, 1, cLevel, cLevelLast, dictBuf, dictSize);
769 
770     return 0;
771 }
772 
773 
BMK_benchFiles(const char ** fileNamesTable,unsigned nbFiles,int cLevel,int cLevelLast,const char * dictFileName)774 int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
775                    int cLevel, int cLevelLast,
776                    const char* dictFileName)
777 {
778     double const compressibility = (double)g_compressibilityDefault / 100;
779     char* dictBuf = NULL;
780     size_t dictSize = 0;
781 
782     if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX;
783     if (g_decodeOnly) {
784         DISPLAYLEVEL(2, "Benchmark Decompression of LZ4 Frame ");
785         if (g_skipChecksums) {
786             DISPLAYLEVEL(2, "_without_ checksum even when present \n");
787         } else {
788             DISPLAYLEVEL(2, "+ Checksum when present \n");
789         }
790         cLevelLast = cLevel;
791     }
792     if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX;
793     if (cLevelLast < cLevel) cLevelLast = cLevel;
794     if (cLevelLast > cLevel)
795         DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
796 
797     if (dictFileName) {
798         FILE* dictFile = NULL;
799         U64 const dictFileSize = UTIL_getFileSize(dictFileName);
800         if (!dictFileSize)
801             END_PROCESS(25, "Dictionary error : could not stat dictionary file");
802         if (g_decodeOnly)
803             END_PROCESS(26, "Error : LZ4 Frame decoder mode not compatible with dictionary yet");
804 
805         dictFile = fopen(dictFileName, "rb");
806         if (!dictFile)
807             END_PROCESS(25, "Dictionary error : could not open dictionary file");
808 
809         if (dictFileSize > LZ4_MAX_DICT_SIZE) {
810             dictSize = LZ4_MAX_DICT_SIZE;
811             if (UTIL_fseek(dictFile, (long)(dictFileSize - dictSize), SEEK_SET))
812                 END_PROCESS(25, "Dictionary error : could not seek dictionary file");
813         } else {
814             dictSize = (size_t)dictFileSize;
815         }
816 
817         dictBuf = (char*)malloc(dictSize);
818         if (!dictBuf) END_PROCESS(25, "Allocation error : not enough memory");
819 
820         if (fread(dictBuf, 1, dictSize, dictFile) != dictSize)
821             END_PROCESS(25, "Dictionary error : could not read dictionary file");
822 
823         fclose(dictFile);
824     }
825 
826     if (nbFiles == 0)
827         BMK_syntheticTest(cLevel, cLevelLast, compressibility, dictBuf, (int)dictSize);
828     else {
829         if (g_benchSeparately)
830             BMK_benchFilesSeparately(fileNamesTable, nbFiles, cLevel, cLevelLast, dictBuf, (int)dictSize);
831         else
832             BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast, dictBuf, (int)dictSize);
833     }
834 
835     free(dictBuf);
836     return 0;
837 }
838