• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2015-2020, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 
12 /*-************************************
13 *  Dependencies
14 **************************************/
15 #include "util.h"      /* Ensure platform.h is compiled first; also : compiler options, UTIL_GetFileSize */
16 #include <stdlib.h>    /* malloc */
17 #include <stdio.h>     /* fprintf, fopen, ftello64 */
18 #include <string.h>    /* strcmp */
19 #include <math.h>      /* log */
20 #include <assert.h>
21 
22 #include "timefn.h"    /* SEC_TO_MICRO, UTIL_time_t, UTIL_clockSpanMicro, UTIL_clockSpanNano, UTIL_getTime */
23 #include "mem.h"
24 #define ZSTD_STATIC_LINKING_ONLY   /* ZSTD_parameters, ZSTD_estimateCCtxSize */
25 #include "zstd.h"
26 #include "datagen.h"
27 #include "xxhash.h"
28 #include "benchfn.h"
29 #include "benchzstd.h"
30 #include "zstd_errors.h"
31 #include "zstd_internal.h"     /* should not be needed */
32 
33 
34 /*-************************************
35 *  Constants
36 **************************************/
37 #define PROGRAM_DESCRIPTION "ZSTD parameters tester"
38 #define AUTHOR "Yann Collet"
39 #define WELCOME_MESSAGE "*** %s %s %i-bits, by %s ***\n", PROGRAM_DESCRIPTION, ZSTD_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR
40 
41 #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
42 #define NB_LEVELS_TRACKED 22   /* ensured being >= ZSTD_maxCLevel() in BMK_init_level_constraints() */
43 
44 static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
45 
46 #define COMPRESSIBILITY_DEFAULT 0.50
47 
48 static const U64 g_maxVariationTime = 60 * SEC_TO_MICRO;
49 static const int g_maxNbVariations = 64;
50 
51 
52 /*-************************************
53 *  Macros
54 **************************************/
55 #define DISPLAY(...)  fprintf(stderr, __VA_ARGS__)
56 #define DISPLAYLEVEL(n, ...) if(g_displayLevel >= n) { fprintf(stderr, __VA_ARGS__); }
57 #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
58 
59 #define TIMED 0
60 #ifndef DEBUG
61 #  define DEBUG 0
62 #endif
63 
64 #undef MIN
65 #undef MAX
66 #define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
67 #define MAX(a,b)   ( (a) > (b) ? (a) : (b) )
68 #define CUSTOM_LEVEL 99
69 #define BASE_CLEVEL 1
70 
71 #define FADT_MIN 0
72 #define FADT_MAX ((U32)-1)
73 
74 #define WLOG_RANGE (ZSTD_WINDOWLOG_MAX - ZSTD_WINDOWLOG_MIN + 1)
75 #define CLOG_RANGE (ZSTD_CHAINLOG_MAX - ZSTD_CHAINLOG_MIN + 1)
76 #define HLOG_RANGE (ZSTD_HASHLOG_MAX - ZSTD_HASHLOG_MIN + 1)
77 #define SLOG_RANGE (ZSTD_SEARCHLOG_MAX - ZSTD_SEARCHLOG_MIN + 1)
78 #define MML_RANGE  (ZSTD_MINMATCH_MAX - ZSTD_MINMATCH_MIN + 1)
79 #define TLEN_RANGE  17
80 #define STRT_RANGE (ZSTD_STRATEGY_MAX - ZSTD_STRATEGY_MIN + 1)
81 #define FADT_RANGE   3
82 
83 #define CHECKTIME(r) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); return r; } }
84 #define CHECKTIMEGT(ret, val, _gototag) { if(BMK_timeSpan_s(g_time) > g_timeLimit_s) { DEBUGOUTPUT("Time Limit Reached\n"); ret = val; goto _gototag; } }
85 
86 #define PARAM_UNSET ((U32)-2) /* can't be -1 b/c fadt uses -1 */
87 
88 static const char* g_stratName[ZSTD_STRATEGY_MAX+1] = {
89                 "(none)       ", "ZSTD_fast    ", "ZSTD_dfast   ",
90                 "ZSTD_greedy  ", "ZSTD_lazy    ", "ZSTD_lazy2   ",
91                 "ZSTD_btlazy2 ", "ZSTD_btopt   ", "ZSTD_btultra ",
92                 "ZSTD_btultra2"};
93 
94 static const U32 tlen_table[TLEN_RANGE] = { 0, 1, 2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 256, 512, 999 };
95 
96 
97 /*-************************************
98 *  Setup for Adding new params
99 **************************************/
100 
101 /* indices for each of the variables */
102 typedef enum {
103     wlog_ind = 0,
104     clog_ind = 1,
105     hlog_ind = 2,
106     slog_ind = 3,
107     mml_ind  = 4,
108     tlen_ind = 5,
109     strt_ind = 6,
110     fadt_ind = 7, /* forceAttachDict */
111     NUM_PARAMS = 8
112 } varInds_t;
113 
114 typedef struct {
115     U32 vals[NUM_PARAMS];
116 } paramValues_t;
117 
118 /* minimum value of parameters */
119 static const U32 mintable[NUM_PARAMS] =
120         { ZSTD_WINDOWLOG_MIN, ZSTD_CHAINLOG_MIN, ZSTD_HASHLOG_MIN, ZSTD_SEARCHLOG_MIN, ZSTD_MINMATCH_MIN, ZSTD_TARGETLENGTH_MIN, ZSTD_STRATEGY_MIN, FADT_MIN };
121 
122 /* maximum value of parameters */
123 static const U32 maxtable[NUM_PARAMS] =
124         { ZSTD_WINDOWLOG_MAX, ZSTD_CHAINLOG_MAX, ZSTD_HASHLOG_MAX, ZSTD_SEARCHLOG_MAX, ZSTD_MINMATCH_MAX, ZSTD_TARGETLENGTH_MAX, ZSTD_STRATEGY_MAX, FADT_MAX };
125 
126 /* # of values parameters can take on */
127 static const U32 rangetable[NUM_PARAMS] =
128         { WLOG_RANGE, CLOG_RANGE, HLOG_RANGE, SLOG_RANGE, MML_RANGE, TLEN_RANGE, STRT_RANGE, FADT_RANGE };
129 
130 /* ZSTD_cctxSetParameter() index to set */
131 static const ZSTD_cParameter cctxSetParamTable[NUM_PARAMS] =
132         { ZSTD_c_windowLog, ZSTD_c_chainLog, ZSTD_c_hashLog, ZSTD_c_searchLog, ZSTD_c_minMatch, ZSTD_c_targetLength, ZSTD_c_strategy, ZSTD_c_forceAttachDict };
133 
134 /* names of parameters */
135 static const char* g_paramNames[NUM_PARAMS] =
136         { "windowLog", "chainLog", "hashLog","searchLog", "minMatch", "targetLength", "strategy", "forceAttachDict" };
137 
138 /* shortened names of parameters */
139 static const char* g_shortParamNames[NUM_PARAMS] =
140         { "wlog", "clog", "hlog", "slog", "mml", "tlen", "strat", "fadt" };
141 
142 /* maps value from { 0 to rangetable[param] - 1 } to valid paramvalues */
rangeMap(varInds_t param,int ind)143 static U32 rangeMap(varInds_t param, int ind)
144 {
145     U32 const uind = (U32)MAX(MIN(ind, (int)rangetable[param] - 1), 0);
146     switch(param) {
147         case wlog_ind: /* using default: triggers -Wswitch-enum */
148         case clog_ind:
149         case hlog_ind:
150         case slog_ind:
151         case mml_ind:
152         case strt_ind:
153             return mintable[param] + uind;
154         case tlen_ind:
155             return tlen_table[uind];
156         case fadt_ind: /* 0, 1, 2 -> -1, 0, 1 */
157             return uind - 1;
158         case NUM_PARAMS:
159         default:;
160     }
161     DISPLAY("Error, not a valid param\n ");
162     assert(0);
163     return (U32)-1;
164 }
165 
166 /* inverse of rangeMap */
invRangeMap(varInds_t param,U32 value)167 static int invRangeMap(varInds_t param, U32 value)
168 {
169     value = MIN(MAX(mintable[param], value), maxtable[param]);
170     switch(param) {
171         case wlog_ind:
172         case clog_ind:
173         case hlog_ind:
174         case slog_ind:
175         case mml_ind:
176         case strt_ind:
177             return (int)(value - mintable[param]);
178         case tlen_ind: /* bin search */
179         {
180             int lo = 0;
181             int hi = TLEN_RANGE;
182             while(lo < hi) {
183                 int mid = (lo + hi) / 2;
184                 if(tlen_table[mid] < value) {
185                     lo = mid + 1;
186                 } if(tlen_table[mid] == value) {
187                     return mid;
188                 } else {
189                     hi = mid;
190                 }
191             }
192             return lo;
193         }
194         case fadt_ind:
195             return (int)value + 1;
196         case NUM_PARAMS:
197         default:;
198     }
199     DISPLAY("Error, not a valid param\n ");
200     assert(0);
201     return -2;
202 }
203 
204 /* display of params */
displayParamVal(FILE * f,varInds_t param,unsigned value,int width)205 static void displayParamVal(FILE* f, varInds_t param, unsigned value, int width)
206 {
207     switch(param) {
208         case wlog_ind:
209         case clog_ind:
210         case hlog_ind:
211         case slog_ind:
212         case mml_ind:
213         case tlen_ind:
214             if(width) {
215                 fprintf(f, "%*u", width, value);
216             } else {
217                 fprintf(f, "%u", value);
218             }
219             break;
220         case strt_ind:
221             if(width) {
222                 fprintf(f, "%*s", width, g_stratName[value]);
223             } else {
224                 fprintf(f, "%s", g_stratName[value]);
225             }
226             break;
227         case fadt_ind:   /* force attach dict */
228             if(width) {
229                 fprintf(f, "%*d", width, (int)value);
230             } else {
231                 fprintf(f, "%d", (int)value);
232             }
233             break;
234         case NUM_PARAMS:
235         default:
236             DISPLAY("Error, not a valid param\n ");
237             assert(0);
238             break;
239     }
240 }
241 
242 
243 /*-************************************
244 *  Benchmark Parameters/Global Variables
245 **************************************/
246 
247 /* General Utility */
248 static U32 g_timeLimit_s = 99999;   /* about 27 hours */
249 static UTIL_time_t g_time; /* to be used to compare solution finding speeds to compare to original */
250 static U32 g_blockSize = 0;
251 static U32 g_rand = 1;
252 
253 /* Display */
254 static int g_displayLevel = 3;
255 static BYTE g_silenceParams[NUM_PARAMS];   /* can selectively silence some params when displaying them */
256 
257 /* Mode Selection */
258 static U32 g_singleRun = 0;
259 static U32 g_optimizer = 0;
260 static int g_optmode = 0;
261 
262 /* For cLevel Table generation */
263 static U32 g_target = 0;
264 static U32 g_noSeed = 0;
265 
266 /* For optimizer */
267 static paramValues_t g_params; /* Initialized at the beginning of main w/ emptyParams() function */
268 static double g_ratioMultiplier = 5.;
269 static U32 g_strictness = PARAM_UNSET; /* range 1 - 100, measure of how strict  */
270 static BMK_benchResult_t g_lvltarget;
271 
272 typedef enum {
273     directMap,
274     xxhashMap,
275     noMemo
276 } memoTableType_t;
277 
278 typedef struct {
279     memoTableType_t tableType;
280     BYTE* table;
281     size_t tableLen;
282     varInds_t varArray[NUM_PARAMS];
283     size_t varLen;
284 } memoTable_t;
285 
286 typedef struct {
287     BMK_benchResult_t result;
288     paramValues_t params;
289 } winnerInfo_t;
290 
291 typedef struct {
292     U32 cSpeed;  /* bytes / sec */
293     U32 dSpeed;
294     U32 cMem;    /* bytes */
295 } constraint_t;
296 
297 typedef struct winner_ll_node winner_ll_node;
298 struct winner_ll_node {
299     winnerInfo_t res;
300     winner_ll_node* next;
301 };
302 
303 static winner_ll_node* g_winners; /* linked list sorted ascending by cSize & cSpeed */
304 
305 /*
306  * Additional Global Variables (Defined Above Use)
307  * g_level_constraint
308  * g_alreadyTested
309  * g_maxTries
310  * g_clockGranularity
311  */
312 
313 
314 /*-*******************************************************
315 *  General Util Functions
316 *********************************************************/
317 
318 /* nullified useless params, to ensure count stats */
319 /* cleans up params for memoizing / display */
sanitizeParams(paramValues_t params)320 static paramValues_t sanitizeParams(paramValues_t params)
321 {
322     if (params.vals[strt_ind] == ZSTD_fast)
323         params.vals[clog_ind] = 0, params.vals[slog_ind] = 0;
324     if (params.vals[strt_ind] == ZSTD_dfast)
325         params.vals[slog_ind] = 0;
326     if ( (params.vals[strt_ind] < ZSTD_btopt) && (params.vals[strt_ind] != ZSTD_fast) )
327         params.vals[tlen_ind] = 0;
328 
329     return params;
330 }
331 
pvalsToCParams(paramValues_t p)332 static ZSTD_compressionParameters pvalsToCParams(paramValues_t p)
333 {
334     ZSTD_compressionParameters c;
335     memset(&c, 0, sizeof(ZSTD_compressionParameters));
336     c.windowLog = p.vals[wlog_ind];
337     c.chainLog = p.vals[clog_ind];
338     c.hashLog = p.vals[hlog_ind];
339     c.searchLog = p.vals[slog_ind];
340     c.minMatch = p.vals[mml_ind];
341     c.targetLength = p.vals[tlen_ind];
342     c.strategy = p.vals[strt_ind];
343     /* no forceAttachDict */
344     return c;
345 }
346 
cParamsToPVals(ZSTD_compressionParameters c)347 static paramValues_t cParamsToPVals(ZSTD_compressionParameters c)
348 {
349     paramValues_t p;
350     varInds_t i;
351     p.vals[wlog_ind] = c.windowLog;
352     p.vals[clog_ind] = c.chainLog;
353     p.vals[hlog_ind] = c.hashLog;
354     p.vals[slog_ind] = c.searchLog;
355     p.vals[mml_ind]  = c.minMatch;
356     p.vals[tlen_ind] = c.targetLength;
357     p.vals[strt_ind] = c.strategy;
358 
359     /* set all other params to their minimum value */
360     for (i = strt_ind + 1; i < NUM_PARAMS; i++) {
361         p.vals[i] = mintable[i];
362     }
363     return p;
364 }
365 
366 /* equivalent of ZSTD_adjustCParams for paramValues_t */
367 static paramValues_t
adjustParams(paramValues_t p,const size_t maxBlockSize,const size_t dictSize)368 adjustParams(paramValues_t p, const size_t maxBlockSize, const size_t dictSize)
369 {
370     paramValues_t ot = p;
371     varInds_t i;
372     p = cParamsToPVals(ZSTD_adjustCParams(pvalsToCParams(p), maxBlockSize, dictSize));
373     if (!dictSize) { p.vals[fadt_ind] = 0; }
374     /* retain value of all other parameters */
375     for(i = strt_ind + 1; i < NUM_PARAMS; i++) {
376         p.vals[i] = ot.vals[i];
377     }
378     return p;
379 }
380 
BMK_findMaxMem(U64 requiredMem)381 static size_t BMK_findMaxMem(U64 requiredMem)
382 {
383     size_t const step = 64 MB;
384     void* testmem = NULL;
385 
386     requiredMem = (((requiredMem >> 26) + 1) << 26);
387     if (requiredMem > maxMemory) requiredMem = maxMemory;
388 
389     requiredMem += 2 * step;
390     while (!testmem && requiredMem > 0) {
391         testmem = malloc ((size_t)requiredMem);
392         requiredMem -= step;
393     }
394 
395     free (testmem);
396     return (size_t) requiredMem;
397 }
398 
399 /* accuracy in seconds only, span can be multiple years */
BMK_timeSpan_s(const UTIL_time_t tStart)400 static U32 BMK_timeSpan_s(const UTIL_time_t tStart)
401 {
402     return (U32)(UTIL_clockSpanMicro(tStart) / 1000000ULL);
403 }
404 
FUZ_rotl32(U32 x,U32 r)405 static U32 FUZ_rotl32(U32 x, U32 r)
406 {
407     return ((x << r) | (x >> (32 - r)));
408 }
409 
FUZ_rand(U32 * src)410 static U32 FUZ_rand(U32* src)
411 {
412     const U32 prime1 = 2654435761U;
413     const U32 prime2 = 2246822519U;
414     U32 rand32 = *src;
415     rand32 *= prime1;
416     rand32 += prime2;
417     rand32  = FUZ_rotl32(rand32, 13);
418     *src = rand32;
419     return rand32 >> 5;
420 }
421 
422 #define BOUNDCHECK(val,min,max) {                     \
423     if (((val)<(min)) | ((val)>(max))) {              \
424         DISPLAY("INVALID PARAMETER CONSTRAINTS\n");   \
425         return 0;                                     \
426 }   }
427 
paramValid(const paramValues_t paramTarget)428 static int paramValid(const paramValues_t paramTarget)
429 {
430     U32 i;
431     for(i = 0; i < NUM_PARAMS; i++) {
432         BOUNDCHECK(paramTarget.vals[i], mintable[i], maxtable[i]);
433     }
434     return 1;
435 }
436 
437 /* cParamUnsetMin() :
438  * if any parameter in paramTarget is not yet set,
439  * it will receive its corresponding minimal value.
440  * This function never fails */
cParamUnsetMin(paramValues_t paramTarget)441 static paramValues_t cParamUnsetMin(paramValues_t paramTarget)
442 {
443     varInds_t vi;
444     for (vi = 0; vi < NUM_PARAMS; vi++) {
445         if (paramTarget.vals[vi] == PARAM_UNSET) {
446             paramTarget.vals[vi] = mintable[vi];
447         }
448     }
449     return paramTarget;
450 }
451 
emptyParams(void)452 static paramValues_t emptyParams(void)
453 {
454     U32 i;
455     paramValues_t p;
456     for(i = 0; i < NUM_PARAMS; i++) {
457         p.vals[i] = PARAM_UNSET;
458     }
459     return p;
460 }
461 
initWinnerInfo(const paramValues_t p)462 static winnerInfo_t initWinnerInfo(const paramValues_t p)
463 {
464     winnerInfo_t w1;
465     w1.result.cSpeed = 0;
466     w1.result.dSpeed = 0;
467     w1.result.cMem = (size_t)-1;
468     w1.result.cSize = (size_t)-1;
469     w1.params = p;
470     return w1;
471 }
472 
473 static paramValues_t
overwriteParams(paramValues_t base,const paramValues_t mask)474 overwriteParams(paramValues_t base, const paramValues_t mask)
475 {
476     U32 i;
477     for(i = 0; i < NUM_PARAMS; i++) {
478         if(mask.vals[i] != PARAM_UNSET) {
479             base.vals[i] = mask.vals[i];
480         }
481     }
482     return base;
483 }
484 
485 static void
paramVaryOnce(const varInds_t paramIndex,const int amt,paramValues_t * ptr)486 paramVaryOnce(const varInds_t paramIndex, const int amt, paramValues_t* ptr)
487 {
488     ptr->vals[paramIndex] = rangeMap(paramIndex,
489                                      invRangeMap(paramIndex, ptr->vals[paramIndex]) + amt);
490 }
491 
492 /* varies ptr by nbChanges respecting varyParams*/
493 static void
paramVariation(paramValues_t * ptr,memoTable_t * mtAll,const U32 nbChanges)494 paramVariation(paramValues_t* ptr, memoTable_t* mtAll, const U32 nbChanges)
495 {
496     paramValues_t p;
497     int validated = 0;
498     while (!validated) {
499         U32 i;
500         p = *ptr;
501         for (i = 0 ; i < nbChanges ; i++) {
502             const U32 changeID = (U32)FUZ_rand(&g_rand) % (mtAll[p.vals[strt_ind]].varLen << 1);
503             paramVaryOnce(mtAll[p.vals[strt_ind]].varArray[changeID >> 1],
504                           (int)((changeID & 1) << 1) - 1,
505                           &p);
506         }
507         validated = paramValid(p);
508     }
509     *ptr = p;
510 }
511 
512 /* Completely random parameter selection */
randomParams(void)513 static paramValues_t randomParams(void)
514 {
515     varInds_t v; paramValues_t p;
516     for(v = 0; v < NUM_PARAMS; v++) {
517         p.vals[v] = rangeMap(v, (int)(FUZ_rand(&g_rand) % rangetable[v]));
518     }
519     return p;
520 }
521 
522 static U64 g_clockGranularity = 100000000ULL;
523 
init_clockGranularity(void)524 static void init_clockGranularity(void)
525 {
526     UTIL_time_t const clockStart = UTIL_getTime();
527     U64 el1 = 0, el2 = 0;
528     int i = 0;
529     do {
530         el1 = el2;
531         el2 = UTIL_clockSpanNano(clockStart);
532         if(el1 < el2) {
533             U64 iv = el2 - el1;
534             if(g_clockGranularity > iv) {
535                 g_clockGranularity = iv;
536                 i = 0;
537             } else {
538                 i++;
539             }
540         }
541     } while(i < 10);
542     DEBUGOUTPUT("Granularity: %llu\n", (unsigned long long)g_clockGranularity);
543 }
544 
545 /*-************************************
546 *  Optimizer Util Functions
547 **************************************/
548 
549 /* checks results are feasible */
feasible(const BMK_benchResult_t results,const constraint_t target)550 static int feasible(const BMK_benchResult_t results, const constraint_t target) {
551     return (results.cSpeed >= target.cSpeed)
552         && (results.dSpeed >= target.dSpeed)
553         && (results.cMem <= target.cMem)
554         && (!g_optmode || results.cSize <= g_lvltarget.cSize);
555 }
556 
557 /* hill climbing value for part 1 */
558 /* Scoring here is a linear reward for all set constraints normalized between 0 to 1
559  * (with 0 at 0 and 1 being fully fulfilling the constraint), summed with a logarithmic
560  * bonus to exceeding the constraint value. We also give linear ratio for compression ratio.
561  * The constant factors are experimental.
562  */
563 static double
resultScore(const BMK_benchResult_t res,const size_t srcSize,const constraint_t target)564 resultScore(const BMK_benchResult_t res, const size_t srcSize, const constraint_t target)
565 {
566     double cs = 0., ds = 0., rt, cm = 0.;
567     const double r1 = 1, r2 = 0.1, rtr = 0.5;
568     double ret;
569     if(target.cSpeed) { cs = res.cSpeed / (double)target.cSpeed; }
570     if(target.dSpeed) { ds = res.dSpeed / (double)target.dSpeed; }
571     if(target.cMem != (U32)-1) { cm = (double)target.cMem / res.cMem; }
572     rt = ((double)srcSize / res.cSize);
573 
574     ret = (MIN(1, cs) + MIN(1, ds)  + MIN(1, cm))*r1 + rt * rtr +
575          (MAX(0, log(cs))+ MAX(0, log(ds))+ MAX(0, log(cm))) * r2;
576 
577     return ret;
578 }
579 
580 /* calculates normalized squared euclidean distance of result1 if it is in the first quadrant relative to lvlRes */
581 static double
resultDistLvl(const BMK_benchResult_t result1,const BMK_benchResult_t lvlRes)582 resultDistLvl(const BMK_benchResult_t result1, const BMK_benchResult_t lvlRes)
583 {
584     double normalizedCSpeedGain1 = ((double)result1.cSpeed / lvlRes.cSpeed) - 1;
585     double normalizedRatioGain1 = ((double)lvlRes.cSize / result1.cSize) - 1;
586     if(normalizedRatioGain1 < 0 || normalizedCSpeedGain1 < 0) {
587         return 0.0;
588     }
589     return normalizedRatioGain1 * g_ratioMultiplier + normalizedCSpeedGain1;
590 }
591 
592 /* return true if r2 strictly better than r1 */
593 static int
compareResultLT(const BMK_benchResult_t result1,const BMK_benchResult_t result2,const constraint_t target,size_t srcSize)594 compareResultLT(const BMK_benchResult_t result1, const BMK_benchResult_t result2, const constraint_t target, size_t srcSize)
595 {
596     if(feasible(result1, target) && feasible(result2, target)) {
597         if(g_optmode) {
598             return resultDistLvl(result1, g_lvltarget) < resultDistLvl(result2, g_lvltarget);
599         } else {
600             return (result1.cSize > result2.cSize)
601                 || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed)
602                 || (result1.cSize == result2.cSize && result2.cSpeed == result1.cSpeed && result2.dSpeed > result1.dSpeed);
603         }
604     }
605     return feasible(result2, target)
606         || (!feasible(result1, target)
607             && (resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target)));
608 }
609 
relaxTarget(constraint_t target)610 static constraint_t relaxTarget(constraint_t target) {
611     target.cMem = (U32)-1;
612     target.cSpeed = (target.cSpeed * g_strictness) / 100;
613     target.dSpeed = (target.dSpeed * g_strictness) / 100;
614     return target;
615 }
616 
optimizerAdjustInput(paramValues_t * pc,const size_t maxBlockSize)617 static void optimizerAdjustInput(paramValues_t* pc, const size_t maxBlockSize)
618 {
619     varInds_t v;
620     for(v = 0; v < NUM_PARAMS; v++) {
621         if(pc->vals[v] != PARAM_UNSET) {
622             U32 newval = MIN(MAX(pc->vals[v], mintable[v]), maxtable[v]);
623             if(newval != pc->vals[v]) {
624                 pc->vals[v] = newval;
625                 DISPLAY("Warning: parameter %s not in valid range, adjusting to ",
626                         g_paramNames[v]);
627                 displayParamVal(stderr, v, newval, 0); DISPLAY("\n");
628             }
629         }
630     }
631 
632     if(pc->vals[wlog_ind] != PARAM_UNSET) {
633 
634         U32 sshb = maxBlockSize > 1 ? ZSTD_highbit32((U32)(maxBlockSize-1)) + 1 : 1;
635         /* edge case of highBit not working for 0 */
636 
637         if(maxBlockSize < (1ULL << 31) && sshb + 1 < pc->vals[wlog_ind]) {
638             U32 adjust = MAX(mintable[wlog_ind], sshb);
639             if(adjust != pc->vals[wlog_ind]) {
640                 pc->vals[wlog_ind] = adjust;
641                 DISPLAY("Warning: windowLog larger than src/block size, adjusted to %u\n",
642                         (unsigned)pc->vals[wlog_ind]);
643             }
644         }
645     }
646 
647     if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) {
648         U32 maxclog;
649         if(pc->vals[strt_ind] == PARAM_UNSET || pc->vals[strt_ind] >= (U32)ZSTD_btlazy2) {
650             maxclog = pc->vals[wlog_ind] + 1;
651         } else {
652             maxclog = pc->vals[wlog_ind];
653         }
654 
655         if(pc->vals[clog_ind] > maxclog) {
656             pc->vals[clog_ind] = maxclog;
657             DISPLAY("Warning: chainlog too much larger than windowLog size, adjusted to %u\n",
658                     (unsigned)pc->vals[clog_ind]);
659         }
660     }
661 
662     if(pc->vals[wlog_ind] != PARAM_UNSET && pc->vals[hlog_ind] != PARAM_UNSET) {
663         if(pc->vals[wlog_ind] + 1 < pc->vals[hlog_ind]) {
664             pc->vals[hlog_ind] = pc->vals[wlog_ind] + 1;
665             DISPLAY("Warning: hashlog too much larger than windowLog size, adjusted to %u\n",
666                     (unsigned)pc->vals[hlog_ind]);
667         }
668     }
669 
670     if(pc->vals[slog_ind] != PARAM_UNSET && pc->vals[clog_ind] != PARAM_UNSET) {
671         if(pc->vals[slog_ind] > pc->vals[clog_ind]) {
672             pc->vals[clog_ind] = pc->vals[slog_ind];
673             DISPLAY("Warning: searchLog larger than chainLog, adjusted to %u\n",
674                     (unsigned)pc->vals[slog_ind]);
675         }
676     }
677 }
678 
679 static int
redundantParams(const paramValues_t paramValues,const constraint_t target,const size_t maxBlockSize)680 redundantParams(const paramValues_t paramValues, const constraint_t target, const size_t maxBlockSize)
681 {
682     return
683        (ZSTD_estimateCStreamSize_usingCParams(pvalsToCParams(paramValues)) > (size_t)target.cMem) /* Uses too much memory */
684     || ((1ULL << (paramValues.vals[wlog_ind] - 1)) >= maxBlockSize && paramValues.vals[wlog_ind] != mintable[wlog_ind]) /* wlog too much bigger than src size */
685     || (paramValues.vals[clog_ind] > (paramValues.vals[wlog_ind] + (paramValues.vals[strt_ind] > ZSTD_btlazy2))) /* chainLog larger than windowLog*/
686     || (paramValues.vals[slog_ind] > paramValues.vals[clog_ind]) /* searchLog larger than chainLog */
687     || (paramValues.vals[hlog_ind] > paramValues.vals[wlog_ind] + 1); /* hashLog larger than windowLog + 1 */
688 }
689 
690 
691 /*-************************************
692 *  Display Functions
693 **************************************/
694 
695 /* BMK_paramValues_into_commandLine() :
696  * transform a set of parameters paramValues_t
697  * into a command line compatible with `zstd` syntax
698  * and writes it into FILE* f.
699  * f must be already opened and writable */
700 static void
BMK_paramValues_into_commandLine(FILE * f,const paramValues_t params)701 BMK_paramValues_into_commandLine(FILE* f, const paramValues_t params)
702 {
703     varInds_t v;
704     int first = 1;
705     fprintf(f,"--zstd=");
706     for (v = 0; v < NUM_PARAMS; v++) {
707         if (g_silenceParams[v]) { continue; }
708         if (!first) { fprintf(f, ","); }
709         fprintf(f,"%s=", g_paramNames[v]);
710 
711         if (v == strt_ind) { fprintf(f,"%u", (unsigned)params.vals[v]); }
712         else { displayParamVal(f, v, params.vals[v], 0); }
713         first = 0;
714     }
715     fprintf(f, "\n");
716 }
717 
718 
719 /* comparison function: */
720 /* strictly better, strictly worse, equal, speed-side adv, size-side adv */
721 #define WORSE_RESULT 0
722 #define BETTER_RESULT 1
723 #define ERROR_RESULT 2
724 
725 #define SPEED_RESULT 4
726 #define SIZE_RESULT 5
727 /* maybe have epsilon-eq to limit table size? */
728 static int
speedSizeCompare(const BMK_benchResult_t r1,const BMK_benchResult_t r2)729 speedSizeCompare(const BMK_benchResult_t r1, const BMK_benchResult_t r2)
730 {
731     if(r1.cSpeed < r2.cSpeed) {
732         if(r1.cSize >= r2.cSize) {
733             return BETTER_RESULT;
734         }
735         return SPEED_RESULT; /* r2 is smaller but not faster. */
736     } else {
737         if(r1.cSize <= r2.cSize) {
738             return WORSE_RESULT;
739         }
740         return SIZE_RESULT; /* r2 is faster but not smaller */
741     }
742 }
743 
744 /* 0 for insertion, 1 for no insert */
745 /* maintain invariant speedSizeCompare(n, n->next) = SPEED_RESULT */
746 static int
insertWinner(const winnerInfo_t w,const constraint_t targetConstraints)747 insertWinner(const winnerInfo_t w, const constraint_t targetConstraints)
748 {
749     BMK_benchResult_t r = w.result;
750     winner_ll_node* cur_node = g_winners;
751     /* first node to insert */
752     if(!feasible(r, targetConstraints)) {
753         return 1;
754     }
755 
756     if(g_winners == NULL) {
757         winner_ll_node* first_node = malloc(sizeof(winner_ll_node));
758         if(first_node == NULL) {
759             return 1;
760         }
761         first_node->next = NULL;
762         first_node->res = w;
763         g_winners = first_node;
764         return 0;
765     }
766 
767     while(cur_node->next != NULL) {
768         switch(speedSizeCompare(cur_node->res.result, r)) {
769             case WORSE_RESULT:
770             {
771                 return 1; /* never insert if better */
772             }
773             case BETTER_RESULT:
774             {
775                 winner_ll_node* tmp;
776                 cur_node->res = cur_node->next->res;
777                 tmp = cur_node->next;
778                 cur_node->next = cur_node->next->next;
779                 free(tmp);
780                 break;
781             }
782             case SIZE_RESULT:
783             {
784                 cur_node = cur_node->next;
785                 break;
786             }
787             case SPEED_RESULT: /* insert after first size result, then return */
788             {
789                 winner_ll_node* newnode = malloc(sizeof(winner_ll_node));
790                 if(newnode == NULL) {
791                     return 1;
792                 }
793                 newnode->res = cur_node->res;
794                 cur_node->res = w;
795                 newnode->next = cur_node->next;
796                 cur_node->next = newnode;
797                 return 0;
798             }
799         }
800 
801     }
802 
803     assert(cur_node->next == NULL);
804     switch(speedSizeCompare(cur_node->res.result, r)) {
805         case WORSE_RESULT:
806         {
807             return 1; /* never insert if better */
808         }
809         case BETTER_RESULT:
810         {
811             cur_node->res = w;
812             return 0;
813         }
814         case SIZE_RESULT:
815         {
816             winner_ll_node* newnode = malloc(sizeof(winner_ll_node));
817             if(newnode == NULL) {
818                 return 1;
819             }
820             newnode->res = w;
821             newnode->next = NULL;
822             cur_node->next = newnode;
823             return 0;
824         }
825         case SPEED_RESULT: /* insert before first size result, then return */
826         {
827             winner_ll_node* newnode = malloc(sizeof(winner_ll_node));
828             if(newnode == NULL) {
829                 return 1;
830             }
831             newnode->res = cur_node->res;
832             cur_node->res = w;
833             newnode->next = cur_node->next;
834             cur_node->next = newnode;
835             return 0;
836         }
837         default:
838             return 1;
839     }
840 }
841 
842 static void
BMK_displayOneResult(FILE * f,winnerInfo_t res,const size_t srcSize)843 BMK_displayOneResult(FILE* f, winnerInfo_t res, const size_t srcSize)
844 {
845     varInds_t v;
846     int first = 1;
847     res.params = cParamUnsetMin(res.params);
848     fprintf(f, "    {");
849     for (v = 0; v < NUM_PARAMS; v++) {
850         if (g_silenceParams[v]) { continue; }
851         if (!first) { fprintf(f, ","); }
852         displayParamVal(f, v, res.params.vals[v], 3);
853         first = 0;
854     }
855 
856     {   double const ratio = res.result.cSize ?
857                             (double)srcSize / res.result.cSize : 0;
858         double const cSpeedMBps = (double)res.result.cSpeed / MB_UNIT;
859         double const dSpeedMBps = (double)res.result.dSpeed / MB_UNIT;
860 
861         fprintf(f, " },     /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
862                             ratio, cSpeedMBps, dSpeedMBps);
863     }
864 }
865 
866 /* Writes to f the results of a parameter benchmark */
867 /* when used with --optimize, will only print results better than previously discovered */
868 static void
BMK_printWinner(FILE * f,const int cLevel,const BMK_benchResult_t result,const paramValues_t params,const size_t srcSize)869 BMK_printWinner(FILE* f, const int cLevel, const BMK_benchResult_t result, const paramValues_t params, const size_t srcSize)
870 {
871     char lvlstr[15] = "Custom Level";
872     winnerInfo_t w;
873     w.params = params;
874     w.result = result;
875 
876     fprintf(f, "\r%79s\r", "");
877 
878     if(cLevel != CUSTOM_LEVEL) {
879         snprintf(lvlstr, 15, "  Level %2d  ", cLevel);
880     }
881 
882     if(TIMED) {
883         const U64 mn_in_ns = 60ULL * TIMELOOP_NANOSEC;
884         const U64 time_ns = UTIL_clockSpanNano(g_time);
885         const U64 minutes = time_ns / mn_in_ns;
886         fprintf(f, "%1lu:%2lu:%05.2f - ",
887                 (unsigned long) minutes / 60,
888                 (unsigned long) minutes % 60,
889                 (double)(time_ns - (minutes * mn_in_ns)) / TIMELOOP_NANOSEC );
890     }
891 
892     fprintf(f, "/* %s */   ", lvlstr);
893     BMK_displayOneResult(f, w, srcSize);
894 }
895 
896 static void
BMK_printWinnerOpt(FILE * f,const U32 cLevel,const BMK_benchResult_t result,const paramValues_t params,const constraint_t targetConstraints,const size_t srcSize)897 BMK_printWinnerOpt(FILE* f, const U32 cLevel, const BMK_benchResult_t result, const paramValues_t params, const constraint_t targetConstraints, const size_t srcSize)
898 {
899     /* global winner used for constraints */
900                                     /* cSize, cSpeed, dSpeed, cMem */
901     static winnerInfo_t g_winner = { { (size_t)-1LL, 0, 0, (size_t)-1LL },
902                                      { { PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET, PARAM_UNSET } }
903                                    };
904     if ( DEBUG
905       || compareResultLT(g_winner.result, result, targetConstraints, srcSize)
906       || g_displayLevel >= 4) {
907         if ( DEBUG
908           && compareResultLT(g_winner.result, result, targetConstraints, srcSize)) {
909             DISPLAY("New Winner: \n");
910         }
911 
912         if(g_displayLevel >= 2) {
913             BMK_printWinner(f, cLevel, result, params, srcSize);
914         }
915 
916         if(compareResultLT(g_winner.result, result, targetConstraints, srcSize)) {
917             if(g_displayLevel >= 1) { BMK_paramValues_into_commandLine(f, params); }
918             g_winner.result = result;
919             g_winner.params = params;
920         }
921     }
922 
923     if(g_optmode && g_optimizer && (DEBUG || g_displayLevel == 3)) {
924         winnerInfo_t w;
925         winner_ll_node* n;
926         w.result = result;
927         w.params = params;
928         insertWinner(w, targetConstraints);
929 
930         if(!DEBUG) { fprintf(f, "\033c"); }
931         fprintf(f, "\n");
932 
933         /* the table */
934         fprintf(f, "================================\n");
935         for(n = g_winners; n != NULL; n = n->next) {
936             BMK_displayOneResult(f, n->res, srcSize);
937         }
938         fprintf(f, "================================\n");
939         fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n",
940             (double)srcSize / g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT);
941 
942 
943         fprintf(f, "Overall Winner: \n");
944         BMK_displayOneResult(f, g_winner, srcSize);
945         BMK_paramValues_into_commandLine(f, g_winner.params);
946 
947         fprintf(f, "Latest BMK: \n");\
948         BMK_displayOneResult(f, w, srcSize);
949     }
950 }
951 
952 
953 /* BMK_print_cLevelEntry() :
954  * Writes one cLevelTable entry, for one level.
955  * f must exist, be already opened, and be seekable.
956  * this function cannot error.
957  */
958 static void
BMK_print_cLevelEntry(FILE * f,const int cLevel,paramValues_t params,const BMK_benchResult_t result,const size_t srcSize)959 BMK_print_cLevelEntry(FILE* f, const int cLevel,
960                       paramValues_t params,
961                       const BMK_benchResult_t result, const size_t srcSize)
962 {
963     varInds_t v;
964     int first = 1;
965 
966     assert(cLevel >= 0);
967     assert(cLevel <= NB_LEVELS_TRACKED);
968     params = cParamUnsetMin(params);
969 
970     fprintf(f, "   {");
971     /* print cParams.
972      * assumption : all cParams are present and in order in the following range */
973     for (v = 0; v <= strt_ind; v++) {
974         if (!first) { fprintf(f, ","); }
975         displayParamVal(f, v, params.vals[v], 3);
976         first = 0;
977     }
978     /* print comment */
979     {   double const ratio = result.cSize ?
980                             (double)srcSize / result.cSize : 0;
981         double const cSpeedMBps = (double)result.cSpeed / MB_UNIT;
982         double const dSpeedMBps = (double)result.dSpeed / MB_UNIT;
983 
984         fprintf(f, " },   /* level %2i:  R=%5.3f at %5.1f MB/s - %5.1f MB/s */\n",
985                              cLevel, ratio, cSpeedMBps, dSpeedMBps);
986     }
987 }
988 
989 
990 /* BMK_print_cLevelTable() :
991  * print candidate compression table into proposed FILE* f.
992  * f must exist, be already opened, and be seekable.
993  * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized
994  * this function cannot error.
995  */
996 static void
BMK_print_cLevelTable(FILE * f,const winnerInfo_t * winners,const size_t srcSize)997 BMK_print_cLevelTable(FILE* f, const winnerInfo_t* winners, const size_t srcSize)
998 {
999     int cLevel;
1000 
1001     fprintf(f, "\n /* Proposed configurations : */ \n");
1002     fprintf(f, "   /* W,  C,  H,  S,  L,  T, strat */ \n");
1003 
1004     for (cLevel=0; cLevel <= NB_LEVELS_TRACKED; cLevel++)
1005         BMK_print_cLevelEntry(f,
1006                               cLevel, winners[cLevel].params,
1007                               winners[cLevel].result, srcSize);
1008 }
1009 
1010 
1011 /* BMK_saveAndPrint_cLevelTable() :
1012  * save candidate compression table into FILE* f,
1013  * and then to stdout.
1014  * f must exist, be already opened, and be seekable.
1015  * winners must be a table of NB_LEVELS_TRACKED+1 elements winnerInfo_t, all entries presumed initialized
1016  * this function cannot error.
1017  */
1018 static void
BMK_saveAndPrint_cLevelTable(FILE * const f,const winnerInfo_t * winners,const size_t srcSize)1019 BMK_saveAndPrint_cLevelTable(FILE* const f,
1020                        const winnerInfo_t* winners,
1021                        const size_t srcSize)
1022 {
1023     fseek(f, 0, SEEK_SET);
1024     BMK_print_cLevelTable(f, winners, srcSize);
1025     fflush(f);
1026     BMK_print_cLevelTable(stdout, winners, srcSize);
1027 }
1028 
1029 
1030 /*-*******************************************************
1031 *  Functions to Benchmark
1032 *********************************************************/
1033 
1034 typedef struct {
1035     ZSTD_CCtx* cctx;
1036     const void* dictBuffer;
1037     size_t dictBufferSize;
1038     int cLevel;
1039     const paramValues_t* comprParams;
1040 } BMK_initCCtxArgs;
1041 
local_initCCtx(void * payload)1042 static size_t local_initCCtx(void* payload) {
1043     const BMK_initCCtxArgs* ag = (const BMK_initCCtxArgs*)payload;
1044     varInds_t i;
1045     ZSTD_CCtx_reset(ag->cctx, ZSTD_reset_session_and_parameters);
1046     ZSTD_CCtx_setParameter(ag->cctx, ZSTD_c_compressionLevel, ag->cLevel);
1047 
1048     for(i = 0; i < NUM_PARAMS; i++) {
1049         if(ag->comprParams->vals[i] != PARAM_UNSET)
1050         ZSTD_CCtx_setParameter(ag->cctx, cctxSetParamTable[i], ag->comprParams->vals[i]);
1051     }
1052     ZSTD_CCtx_loadDictionary(ag->cctx, ag->dictBuffer, ag->dictBufferSize);
1053 
1054     return 0;
1055 }
1056 
1057 typedef struct {
1058     ZSTD_DCtx* dctx;
1059     const void* dictBuffer;
1060     size_t dictBufferSize;
1061 } BMK_initDCtxArgs;
1062 
local_initDCtx(void * payload)1063 static size_t local_initDCtx(void* payload) {
1064     const BMK_initDCtxArgs* ag = (const BMK_initDCtxArgs*)payload;
1065     ZSTD_DCtx_reset(ag->dctx, ZSTD_reset_session_and_parameters);
1066     ZSTD_DCtx_loadDictionary(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
1067     return 0;
1068 }
1069 
1070 /* additional argument is just the context */
local_defaultCompress(const void * srcBuffer,size_t srcSize,void * dstBuffer,size_t dstSize,void * addArgs)1071 static size_t local_defaultCompress(
1072                             const void* srcBuffer, size_t srcSize,
1073                             void* dstBuffer, size_t dstSize,
1074                             void* addArgs)
1075 {
1076     ZSTD_CCtx* cctx = (ZSTD_CCtx*)addArgs;
1077     assert(dstSize == ZSTD_compressBound(srcSize)); /* specific to this version, which is only used in paramgrill */
1078     return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize);
1079 }
1080 
1081 /* additional argument is just the context */
local_defaultDecompress(const void * srcBuffer,size_t srcSize,void * dstBuffer,size_t dstSize,void * addArgs)1082 static size_t local_defaultDecompress(
1083     const void* srcBuffer, size_t srcSize,
1084     void* dstBuffer, size_t dstSize,
1085     void* addArgs) {
1086     size_t moreToFlush = 1;
1087     ZSTD_DCtx* dctx = (ZSTD_DCtx*)addArgs;
1088     ZSTD_inBuffer in;
1089     ZSTD_outBuffer out;
1090     in.src = srcBuffer;
1091     in.size = srcSize;
1092     in.pos = 0;
1093     out.dst = dstBuffer;
1094     out.size = dstSize;
1095     out.pos = 0;
1096     while (moreToFlush) {
1097         if(out.pos == out.size) {
1098             return (size_t)-ZSTD_error_dstSize_tooSmall;
1099         }
1100         moreToFlush = ZSTD_decompressStream(dctx,
1101                             &out, &in);
1102         if (ZSTD_isError(moreToFlush)) {
1103             return moreToFlush;
1104         }
1105     }
1106     return out.pos;
1107 
1108 }
1109 
1110 /*-************************************
1111 *  Data Initialization Functions
1112 **************************************/
1113 
1114 typedef struct {
1115     void* srcBuffer;
1116     size_t srcSize;
1117     const void** srcPtrs;
1118     size_t* srcSizes;
1119     void** dstPtrs;
1120     size_t* dstCapacities;
1121     size_t* dstSizes;
1122     void** resPtrs;
1123     size_t* resSizes;
1124     size_t nbBlocks;
1125     size_t maxBlockSize;
1126 } buffers_t;
1127 
1128 typedef struct {
1129     size_t dictSize;
1130     void* dictBuffer;
1131     ZSTD_CCtx* cctx;
1132     ZSTD_DCtx* dctx;
1133 } contexts_t;
1134 
freeNonSrcBuffers(const buffers_t b)1135 static void freeNonSrcBuffers(const buffers_t b) {
1136     free((void*)b.srcPtrs);
1137     free(b.srcSizes);
1138 
1139     if(b.dstPtrs != NULL) {
1140         free(b.dstPtrs[0]);
1141     }
1142     free(b.dstPtrs);
1143     free(b.dstCapacities);
1144     free(b.dstSizes);
1145 
1146     if(b.resPtrs != NULL) {
1147         free(b.resPtrs[0]);
1148     }
1149     free(b.resPtrs);
1150     free(b.resSizes);
1151 }
1152 
freeBuffers(const buffers_t b)1153 static void freeBuffers(const buffers_t b) {
1154     if(b.srcPtrs != NULL) {
1155         free(b.srcBuffer);
1156     }
1157     freeNonSrcBuffers(b);
1158 }
1159 
1160 /* srcBuffer will be freed by freeBuffers now */
createBuffersFromMemory(buffers_t * buff,void * srcBuffer,const size_t nbFiles,const size_t * fileSizes)1161 static int createBuffersFromMemory(buffers_t* buff, void * srcBuffer, const size_t nbFiles,
1162     const size_t* fileSizes)
1163 {
1164     size_t pos = 0, n, blockSize;
1165     U32 maxNbBlocks, blockNb = 0;
1166     buff->srcSize = 0;
1167     for(n = 0; n < nbFiles; n++) {
1168         buff->srcSize += fileSizes[n];
1169     }
1170 
1171     if(buff->srcSize == 0) {
1172         DISPLAY("No data to bench\n");
1173         return 1;
1174     }
1175 
1176     blockSize = g_blockSize ? g_blockSize : buff->srcSize;
1177     maxNbBlocks = (U32) ((buff->srcSize + (blockSize-1)) / blockSize) + (U32)nbFiles;
1178 
1179     buff->srcPtrs = (const void**)calloc(maxNbBlocks, sizeof(void*));
1180     buff->srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1181 
1182     buff->dstPtrs = (void**)calloc(maxNbBlocks, sizeof(void*));
1183     buff->dstCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1184     buff->dstSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1185 
1186     buff->resPtrs = (void**)calloc(maxNbBlocks, sizeof(void*));
1187     buff->resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
1188 
1189     if(!buff->srcPtrs || !buff->srcSizes || !buff->dstPtrs || !buff->dstCapacities || !buff->dstSizes || !buff->resPtrs || !buff->resSizes) {
1190         DISPLAY("alloc error\n");
1191         freeNonSrcBuffers(*buff);
1192         return 1;
1193     }
1194 
1195     buff->srcBuffer = srcBuffer;
1196     buff->srcPtrs[0] = (const void*)buff->srcBuffer;
1197     buff->dstPtrs[0] = malloc(ZSTD_compressBound(buff->srcSize) + (maxNbBlocks * 1024));
1198     buff->resPtrs[0] = malloc(buff->srcSize);
1199 
1200     if(!buff->dstPtrs[0] || !buff->resPtrs[0]) {
1201         DISPLAY("alloc error\n");
1202         freeNonSrcBuffers(*buff);
1203         return 1;
1204     }
1205 
1206     for(n = 0; n < nbFiles; n++) {
1207         size_t pos_end = pos + fileSizes[n];
1208         for(; pos < pos_end; blockNb++) {
1209             buff->srcPtrs[blockNb] = (const void*)((char*)srcBuffer + pos);
1210             buff->srcSizes[blockNb] = blockSize;
1211             pos += blockSize;
1212         }
1213 
1214         if(fileSizes[n] > 0) { buff->srcSizes[blockNb - 1] = ((fileSizes[n] - 1) % blockSize) + 1; }
1215         pos = pos_end;
1216     }
1217 
1218     buff->dstCapacities[0] = ZSTD_compressBound(buff->srcSizes[0]);
1219     buff->dstSizes[0] = buff->dstCapacities[0];
1220     buff->resSizes[0] = buff->srcSizes[0];
1221     buff->maxBlockSize = buff->srcSizes[0];
1222 
1223     for(n = 1; n < blockNb; n++) {
1224         buff->dstPtrs[n] = ((char*)buff->dstPtrs[n-1]) + buff->dstCapacities[n-1];
1225         buff->resPtrs[n] = ((char*)buff->resPtrs[n-1]) + buff->resSizes[n-1];
1226         buff->dstCapacities[n] = ZSTD_compressBound(buff->srcSizes[n]);
1227         buff->dstSizes[n] = buff->dstCapacities[n];
1228         buff->resSizes[n] = buff->srcSizes[n];
1229 
1230         buff->maxBlockSize = MAX(buff->maxBlockSize, buff->srcSizes[n]);
1231     }
1232 
1233     buff->nbBlocks = blockNb;
1234 
1235     return 0;
1236 }
1237 
1238 /* allocates buffer's arguments. returns success / failure */
createBuffers(buffers_t * buff,const char * const * const fileNamesTable,size_t nbFiles)1239 static int createBuffers(buffers_t* buff, const char* const * const fileNamesTable,
1240                           size_t nbFiles) {
1241     size_t pos = 0;
1242     size_t n;
1243     size_t totalSizeToLoad = (size_t)UTIL_getTotalFileSize(fileNamesTable, (U32)nbFiles);
1244     size_t benchedSize = MIN(BMK_findMaxMem(totalSizeToLoad * 3) / 3, totalSizeToLoad);
1245     size_t* fileSizes = calloc(sizeof(size_t), nbFiles);
1246     void* srcBuffer = NULL;
1247     int ret = 0;
1248 
1249     if(!totalSizeToLoad || !benchedSize) {
1250         ret = 1;
1251         DISPLAY("Nothing to Bench\n");
1252         goto _cleanUp;
1253     }
1254 
1255     srcBuffer = malloc(benchedSize);
1256 
1257     if(!fileSizes || !srcBuffer) {
1258         ret = 1;
1259         goto _cleanUp;
1260     }
1261 
1262     for(n = 0; n < nbFiles; n++) {
1263         FILE* f;
1264         U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
1265         if (UTIL_isDirectory(fileNamesTable[n])) {
1266             DISPLAY("Ignoring %s directory...       \n", fileNamesTable[n]);
1267             continue;
1268         }
1269         if (fileSize == UTIL_FILESIZE_UNKNOWN) {
1270             DISPLAY("Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]);
1271             continue;
1272         }
1273         f = fopen(fileNamesTable[n], "rb");
1274         if (f==NULL) {
1275             DISPLAY("impossible to open file %s\n", fileNamesTable[n]);
1276             fclose(f);
1277             ret = 10;
1278             goto _cleanUp;
1279         }
1280 
1281         DISPLAYLEVEL(2, "Loading %s...       \r", fileNamesTable[n]);
1282 
1283         if (fileSize + pos > benchedSize) fileSize = benchedSize - pos, nbFiles=n;   /* buffer too small - stop after this file */
1284         {
1285             char* buffer = (char*)(srcBuffer);
1286             size_t const readSize = fread((buffer)+pos, 1, (size_t)fileSize, f);
1287             fclose(f);
1288             if (readSize != (size_t)fileSize) {
1289                 DISPLAY("could not read %s", fileNamesTable[n]);
1290                 ret = 1;
1291                 goto _cleanUp;
1292             }
1293 
1294             fileSizes[n] = readSize;
1295             pos += readSize;
1296         }
1297     }
1298 
1299     ret = createBuffersFromMemory(buff, srcBuffer, nbFiles, fileSizes);
1300 
1301 _cleanUp:
1302     if(ret) { free(srcBuffer); }
1303     free(fileSizes);
1304     return ret;
1305 }
1306 
freeContexts(const contexts_t ctx)1307 static void freeContexts(const contexts_t ctx) {
1308     free(ctx.dictBuffer);
1309     ZSTD_freeCCtx(ctx.cctx);
1310     ZSTD_freeDCtx(ctx.dctx);
1311 }
1312 
createContexts(contexts_t * ctx,const char * dictFileName)1313 static int createContexts(contexts_t* ctx, const char* dictFileName) {
1314     FILE* f;
1315     size_t readSize;
1316     ctx->cctx = ZSTD_createCCtx();
1317     ctx->dctx = ZSTD_createDCtx();
1318     assert(ctx->cctx != NULL);
1319     assert(ctx->dctx != NULL);
1320 
1321     if(dictFileName == NULL) {
1322         ctx->dictSize = 0;
1323         ctx->dictBuffer = NULL;
1324         return 0;
1325     }
1326     {   U64 const dictFileSize = UTIL_getFileSize(dictFileName);
1327         assert(dictFileSize != UTIL_FILESIZE_UNKNOWN);
1328         ctx->dictSize = (size_t)dictFileSize;
1329         assert((U64)ctx->dictSize == dictFileSize); /* check overflow */
1330     }
1331     ctx->dictBuffer = malloc(ctx->dictSize);
1332 
1333     f = fopen(dictFileName, "rb");
1334 
1335     if (f==NULL) {
1336         DISPLAY("unable to open file\n");
1337         freeContexts(*ctx);
1338         return 1;
1339     }
1340 
1341     if (ctx->dictSize > 64 MB || !(ctx->dictBuffer)) {
1342         DISPLAY("dictionary too large\n");
1343         fclose(f);
1344         freeContexts(*ctx);
1345         return 1;
1346     }
1347     readSize = fread(ctx->dictBuffer, 1, ctx->dictSize, f);
1348     fclose(f);
1349     if (readSize != ctx->dictSize) {
1350         DISPLAY("unable to read file\n");
1351         freeContexts(*ctx);
1352         return 1;
1353     }
1354     return 0;
1355 }
1356 
1357 /*-************************************
1358 *  Optimizer Memoization Functions
1359 **************************************/
1360 
1361 /* return: new length */
1362 /* keep old array, will need if iter over strategy. */
1363 /* prunes useless params */
sanitizeVarArray(varInds_t * varNew,const size_t varLength,const varInds_t * varArray,const ZSTD_strategy strat)1364 static size_t sanitizeVarArray(varInds_t* varNew, const size_t varLength, const varInds_t* varArray, const ZSTD_strategy strat) {
1365     size_t i, j = 0;
1366     for(i = 0; i < varLength; i++) {
1367         if( !((varArray[i] == clog_ind && strat == ZSTD_fast)
1368             || (varArray[i] == slog_ind && strat == ZSTD_fast)
1369             || (varArray[i] == slog_ind && strat == ZSTD_dfast)
1370             || (varArray[i] == tlen_ind && strat < ZSTD_btopt && strat != ZSTD_fast))) {
1371             varNew[j] = varArray[i];
1372             j++;
1373         }
1374     }
1375     return j;
1376 }
1377 
1378 /* res should be NUM_PARAMS size */
1379 /* constructs varArray from paramValues_t style parameter */
1380 /* pass in using dict. */
variableParams(const paramValues_t paramConstraints,varInds_t * res,const int usingDictionary)1381 static size_t variableParams(const paramValues_t paramConstraints, varInds_t* res, const int usingDictionary) {
1382     varInds_t i;
1383     size_t j = 0;
1384     for(i = 0; i < NUM_PARAMS; i++) {
1385         if(paramConstraints.vals[i] == PARAM_UNSET) {
1386             if(i == fadt_ind && !usingDictionary) continue; /* don't use fadt if no dictionary */
1387             res[j] = i; j++;
1388         }
1389     }
1390     return j;
1391 }
1392 
1393 /* length of memo table given free variables */
memoTableLen(const varInds_t * varyParams,const size_t varyLen)1394 static size_t memoTableLen(const varInds_t* varyParams, const size_t varyLen) {
1395     size_t arrayLen = 1;
1396     size_t i;
1397     for(i = 0; i < varyLen; i++) {
1398         if(varyParams[i] == strt_ind) continue; /* strategy separated by table */
1399         arrayLen *= rangetable[varyParams[i]];
1400     }
1401     return arrayLen;
1402 }
1403 
1404 /* returns unique index in memotable of compression parameters */
memoTableIndDirect(const paramValues_t * ptr,const varInds_t * varyParams,const size_t varyLen)1405 static unsigned memoTableIndDirect(const paramValues_t* ptr, const varInds_t* varyParams, const size_t varyLen) {
1406     size_t i;
1407     unsigned ind = 0;
1408     for(i = 0; i < varyLen; i++) {
1409         varInds_t v = varyParams[i];
1410         if(v == strt_ind) continue; /* exclude strategy from memotable */
1411         ind *= rangetable[v]; ind += (unsigned)invRangeMap(v, ptr->vals[v]);
1412     }
1413     return ind;
1414 }
1415 
memoTableGet(const memoTable_t * memoTableArray,const paramValues_t p)1416 static size_t memoTableGet(const memoTable_t* memoTableArray, const paramValues_t p) {
1417     const memoTable_t mt = memoTableArray[p.vals[strt_ind]];
1418     switch(mt.tableType) {
1419         case directMap:
1420             return mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)];
1421         case xxhashMap:
1422             return mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen];
1423         case noMemo:
1424             return 0;
1425     }
1426     return 0; /* should never happen, stop compiler warnings */
1427 }
1428 
memoTableSet(const memoTable_t * memoTableArray,const paramValues_t p,const BYTE value)1429 static void memoTableSet(const memoTable_t* memoTableArray, const paramValues_t p, const BYTE value) {
1430     const memoTable_t mt = memoTableArray[p.vals[strt_ind]];
1431     switch(mt.tableType) {
1432         case directMap:
1433             mt.table[memoTableIndDirect(&p, mt.varArray, mt.varLen)] = value; break;
1434         case xxhashMap:
1435             mt.table[(XXH64(&p.vals, sizeof(U32) * NUM_PARAMS, 0) >> 3) % mt.tableLen] = value; break;
1436         case noMemo:
1437             break;
1438     }
1439 }
1440 
1441 /* frees all allocated memotables */
1442 /* secret contract :
1443  * mtAll is a table of (ZSTD_STRATEGY_MAX+1) memoTable_t */
freeMemoTableArray(memoTable_t * const mtAll)1444 static void freeMemoTableArray(memoTable_t* const mtAll) {
1445     int i;
1446     if(mtAll == NULL) { return; }
1447     for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) {
1448         free(mtAll[i].table);
1449     }
1450     free(mtAll);
1451 }
1452 
1453 /* inits memotables for all (including mallocs), all strategies */
1454 /* takes unsanitized varyParams */
1455 static memoTable_t*
createMemoTableArray(const paramValues_t p,const varInds_t * const varyParams,const size_t varyLen,const U32 memoTableLog)1456 createMemoTableArray(const paramValues_t p,
1457                      const varInds_t* const varyParams,
1458                      const size_t varyLen,
1459                      const U32 memoTableLog)
1460 {
1461     memoTable_t* const mtAll = (memoTable_t*)calloc(sizeof(memoTable_t),(ZSTD_STRATEGY_MAX + 1));
1462     ZSTD_strategy i, stratMin = ZSTD_STRATEGY_MIN, stratMax = ZSTD_STRATEGY_MAX;
1463 
1464     if(mtAll == NULL) {
1465         return NULL;
1466     }
1467 
1468     for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) {
1469         mtAll[i].varLen = sanitizeVarArray(mtAll[i].varArray, varyLen, varyParams, i);
1470     }
1471 
1472     /* no memoization */
1473     if(memoTableLog == 0) {
1474         for(i = 1; i <= (int)ZSTD_STRATEGY_MAX; i++) {
1475             mtAll[i].tableType = noMemo;
1476             mtAll[i].table = NULL;
1477             mtAll[i].tableLen = 0;
1478         }
1479         return mtAll;
1480     }
1481 
1482 
1483     if(p.vals[strt_ind] != PARAM_UNSET) {
1484         stratMin = p.vals[strt_ind];
1485         stratMax = p.vals[strt_ind];
1486     }
1487 
1488 
1489     for(i = stratMin; i <= stratMax; i++) {
1490         size_t mtl = memoTableLen(mtAll[i].varArray, mtAll[i].varLen);
1491         mtAll[i].tableType = directMap;
1492 
1493         if(memoTableLog != PARAM_UNSET && mtl > (1ULL << memoTableLog)) { /* use hash table */ /* provide some option to only use hash tables? */
1494             mtAll[i].tableType = xxhashMap;
1495             mtl = ((size_t)1 << memoTableLog);
1496         }
1497 
1498         mtAll[i].table = (BYTE*)calloc(sizeof(BYTE), mtl);
1499         mtAll[i].tableLen = mtl;
1500 
1501         if(mtAll[i].table == NULL) {
1502             freeMemoTableArray(mtAll);
1503             return NULL;
1504         }
1505     }
1506 
1507     return mtAll;
1508 }
1509 
1510 /* Sets pc to random unmeasured set of parameters */
1511 /* specify strategy */
randomConstrainedParams(paramValues_t * pc,const memoTable_t * memoTableArray,const ZSTD_strategy st)1512 static void randomConstrainedParams(paramValues_t* pc, const memoTable_t* memoTableArray, const ZSTD_strategy st)
1513 {
1514     size_t j;
1515     const memoTable_t mt = memoTableArray[st];
1516     pc->vals[strt_ind] = st;
1517     for(j = 0; j < mt.tableLen; j++) {
1518         int i;
1519         for(i = 0; i < NUM_PARAMS; i++) {
1520             varInds_t v = mt.varArray[i];
1521             if(v == strt_ind) continue;
1522             pc->vals[v] = rangeMap(v, FUZ_rand(&g_rand) % rangetable[v]);
1523         }
1524 
1525         if(!(memoTableGet(memoTableArray, *pc))) break; /* only pick unpicked params. */
1526     }
1527 }
1528 
1529 /*-************************************
1530 *  Benchmarking Functions
1531 **************************************/
1532 
display_params_tested(paramValues_t cParams)1533 static void display_params_tested(paramValues_t cParams)
1534 {
1535     varInds_t vi;
1536     DISPLAYLEVEL(3, "\r testing :");
1537     for (vi=0; vi < NUM_PARAMS; vi++) {
1538         DISPLAYLEVEL(3, "%3u,", (unsigned)cParams.vals[vi]);
1539     }
1540     DISPLAYLEVEL(3, "\b    \r");
1541 }
1542 
1543 /* Replicate functionality of benchMemAdvanced, but with pre-split src / dst buffers */
1544 /* The purpose is so that sufficient information is returned so that a decompression call to benchMemInvertible is possible */
1545 /* BMK_benchMemAdvanced(srcBuffer,srcSize, dstBuffer, dstSize, fileSizes, nbFiles, 0, &cParams, dictBuffer, dictSize, ctx, dctx, 0, "File", &adv); */
1546 /* nbSeconds used in same way as in BMK_advancedParams_t */
1547 /* if in decodeOnly, then srcPtr's will be compressed blocks, and uncompressedBlocks will be written to dstPtrs */
1548 /* dictionary nullable, nothing else though. */
1549 /* note : it would be a lot better if this function was present in benchzstd.c,
1550  * sharing code with benchMemAdvanced(), since it's technically a part of it */
1551 static BMK_benchOutcome_t
BMK_benchMemInvertible(buffers_t buf,contexts_t ctx,int cLevel,const paramValues_t * comprParams,BMK_mode_t mode,unsigned nbSeconds)1552 BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
1553                         int cLevel, const paramValues_t* comprParams,
1554                         BMK_mode_t mode, unsigned nbSeconds)
1555 {
1556     U32 i;
1557     BMK_benchResult_t bResult;
1558     const void *const *const srcPtrs = (const void *const *const)buf.srcPtrs;
1559     size_t const *const srcSizes = buf.srcSizes;
1560     void** const dstPtrs = buf.dstPtrs;
1561     size_t const *const dstCapacities = buf.dstCapacities;
1562     size_t* const dstSizes = buf.dstSizes;
1563     void** const resPtrs = buf.resPtrs;
1564     size_t const *const resSizes = buf.resSizes;
1565     const void* dictBuffer = ctx.dictBuffer;
1566     const size_t dictBufferSize = ctx.dictSize;
1567     const size_t nbBlocks = buf.nbBlocks;
1568     const size_t srcSize = buf.srcSize;
1569     ZSTD_CCtx* cctx = ctx.cctx;
1570     ZSTD_DCtx* dctx = ctx.dctx;
1571 
1572     /* init */
1573     display_params_tested(*comprParams);
1574     memset(&bResult, 0, sizeof(bResult));
1575 
1576     /* warming up memory */
1577     for (i = 0; i < buf.nbBlocks; i++) {
1578         if (mode != BMK_decodeOnly) {
1579             RDG_genBuffer(dstPtrs[i], dstCapacities[i], 0.10, 0.50, 1);
1580         } else {
1581             RDG_genBuffer(resPtrs[i], resSizes[i], 0.10, 0.50, 1);
1582         }
1583     }
1584 
1585     /* Bench */
1586     {
1587         /* init args */
1588         int compressionCompleted = (mode == BMK_decodeOnly);
1589         int decompressionCompleted = (mode == BMK_compressOnly);
1590         BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(nbSeconds * 1000, 1000);
1591         BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(nbSeconds * 1000, 1000);
1592         BMK_benchParams_t cbp, dbp;
1593         BMK_initCCtxArgs cctxprep;
1594         BMK_initDCtxArgs dctxprep;
1595 
1596         cbp.benchFn = local_defaultCompress;
1597         cbp.benchPayload = cctx;
1598         cbp.initFn = local_initCCtx;
1599         cbp.initPayload = &cctxprep;
1600         cbp.errorFn = ZSTD_isError;
1601         cbp.blockCount = nbBlocks;
1602         cbp.srcBuffers = srcPtrs;
1603         cbp.srcSizes = srcSizes;
1604         cbp.dstBuffers = dstPtrs;
1605         cbp.dstCapacities = dstCapacities;
1606         cbp.blockResults = dstSizes;
1607 
1608         cctxprep.cctx = cctx;
1609         cctxprep.dictBuffer = dictBuffer;
1610         cctxprep.dictBufferSize = dictBufferSize;
1611         cctxprep.cLevel = cLevel;
1612         cctxprep.comprParams = comprParams;
1613 
1614         dbp.benchFn = local_defaultDecompress;
1615         dbp.benchPayload = dctx;
1616         dbp.initFn = local_initDCtx;
1617         dbp.initPayload = &dctxprep;
1618         dbp.errorFn = ZSTD_isError;
1619         dbp.blockCount = nbBlocks;
1620         dbp.srcBuffers = (const void* const *) dstPtrs;
1621         dbp.srcSizes = dstCapacities;
1622         dbp.dstBuffers = resPtrs;
1623         dbp.dstCapacities = resSizes;
1624         dbp.blockResults = NULL;
1625 
1626         dctxprep.dctx = dctx;
1627         dctxprep.dictBuffer = dictBuffer;
1628         dctxprep.dictBufferSize = dictBufferSize;
1629 
1630         assert(timeStateCompress != NULL);
1631         assert(timeStateDecompress != NULL);
1632         while(!compressionCompleted) {
1633             BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress, cbp);
1634 
1635             if (!BMK_isSuccessful_runOutcome(cOutcome)) {
1636                 BMK_benchOutcome_t bOut;
1637                 memset(&bOut, 0, sizeof(bOut));
1638                 bOut.tag = 1;   /* should rather be a function or a constant */
1639                 BMK_freeTimedFnState(timeStateCompress);
1640                 BMK_freeTimedFnState(timeStateDecompress);
1641                 return bOut;
1642             }
1643             {   BMK_runTime_t const rResult = BMK_extract_runTime(cOutcome);
1644                 bResult.cSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun);
1645                 bResult.cSize = rResult.sumOfReturn;
1646             }
1647             compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress);
1648         }
1649 
1650         while (!decompressionCompleted) {
1651             BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp);
1652 
1653             if (!BMK_isSuccessful_runOutcome(dOutcome)) {
1654                 BMK_benchOutcome_t bOut;
1655                 memset(&bOut, 0, sizeof(bOut));
1656                 bOut.tag = 1;   /* should rather be a function or a constant */
1657                 BMK_freeTimedFnState(timeStateCompress);
1658                 BMK_freeTimedFnState(timeStateDecompress);
1659                 return bOut;
1660             }
1661             {   BMK_runTime_t const rResult = BMK_extract_runTime(dOutcome);
1662                 bResult.dSpeed = (unsigned long long)((double)srcSize * TIMELOOP_NANOSEC / rResult.nanoSecPerRun);
1663             }
1664             decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
1665         }
1666 
1667         BMK_freeTimedFnState(timeStateCompress);
1668         BMK_freeTimedFnState(timeStateDecompress);
1669     }
1670 
1671    /* Bench */
1672     bResult.cMem = ((size_t)1 << (comprParams->vals[wlog_ind])) + ZSTD_sizeof_CCtx(cctx);
1673 
1674     {   BMK_benchOutcome_t bOut;
1675         bOut.tag = 0;
1676         bOut.internal_never_use_directly = bResult;  /* should be a function */
1677         return bOut;
1678     }
1679 }
1680 
1681 /* BMK_benchParam() :
1682  * benchmark a set of `cParams` over sample `buf`,
1683  * store the result in `resultPtr`.
1684  * @return : 0 if success, 1 if error */
BMK_benchParam(BMK_benchResult_t * resultPtr,buffers_t buf,contexts_t ctx,paramValues_t cParams)1685 static int BMK_benchParam ( BMK_benchResult_t* resultPtr,
1686                             buffers_t buf, contexts_t ctx,
1687                             paramValues_t cParams)
1688 {
1689     BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx,
1690                                                         BASE_CLEVEL, &cParams,
1691                                                         BMK_both, 3);
1692     if (!BMK_isSuccessful_benchOutcome(outcome)) return 1;
1693     *resultPtr = BMK_extract_benchResult(outcome);
1694     return 0;
1695 }
1696 
1697 
1698 /* Benchmarking which stops when we are sufficiently sure the solution is infeasible / worse than the winner */
1699 #define VARIANCE 1.2
allBench(BMK_benchResult_t * resultPtr,const buffers_t buf,const contexts_t ctx,const paramValues_t cParams,const constraint_t target,BMK_benchResult_t * winnerResult,int feas)1700 static int allBench(BMK_benchResult_t* resultPtr,
1701                 const buffers_t buf, const contexts_t ctx,
1702                 const paramValues_t cParams,
1703                 const constraint_t target,
1704                 BMK_benchResult_t* winnerResult, int feas)
1705 {
1706     BMK_benchResult_t benchres;
1707     double uncertaintyConstantC = 3., uncertaintyConstantD = 3.;
1708     double winnerRS;
1709 
1710     BMK_benchOutcome_t const outcome = BMK_benchMemInvertible(buf, ctx, BASE_CLEVEL, &cParams, BMK_both, 2);
1711     if (!BMK_isSuccessful_benchOutcome(outcome)) {
1712         DEBUGOUTPUT("Benchmarking failed \n");
1713         return ERROR_RESULT;
1714     }
1715     benchres = BMK_extract_benchResult(outcome);
1716 
1717     winnerRS = resultScore(*winnerResult, buf.srcSize, target);
1718     DEBUGOUTPUT("WinnerScore: %f \n ", winnerRS);
1719 
1720     *resultPtr = benchres;
1721 
1722     /* anything with worse ratio in feas is definitely worse, discard */
1723     if(feas && benchres.cSize < winnerResult->cSize && !g_optmode) {
1724         return WORSE_RESULT;
1725     }
1726 
1727     /* calculate uncertainty in compression / decompression runs */
1728     if (benchres.cSpeed) {
1729         U64 const loopDurationC = (((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed);
1730         uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC);
1731     }
1732 
1733     if (benchres.dSpeed) {
1734         U64 const loopDurationD = (((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed);
1735         uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD);
1736     }
1737 
1738     /* optimistic assumption of benchres */
1739     {   BMK_benchResult_t resultMax = benchres;
1740         resultMax.cSpeed = (unsigned long long)(resultMax.cSpeed * uncertaintyConstantC * VARIANCE);
1741         resultMax.dSpeed = (unsigned long long)(resultMax.dSpeed * uncertaintyConstantD * VARIANCE);
1742 
1743         /* disregard infeasible results in feas mode */
1744         /* disregard if resultMax < winner in infeas mode */
1745         if((feas && !feasible(resultMax, target)) ||
1746           (!feas && (winnerRS > resultScore(resultMax, buf.srcSize, target)))) {
1747             return WORSE_RESULT;
1748         }
1749     }
1750 
1751     /* compare by resultScore when in infeas */
1752     /* compare by compareResultLT when in feas */
1753     if((!feas && (resultScore(benchres, buf.srcSize, target) > resultScore(*winnerResult, buf.srcSize, target))) ||
1754        (feas && (compareResultLT(*winnerResult, benchres, target, buf.srcSize))) )  {
1755         return BETTER_RESULT;
1756     } else {
1757         return WORSE_RESULT;
1758     }
1759 }
1760 
1761 
1762 #define INFEASIBLE_THRESHOLD 200
1763 /* Memoized benchmarking, won't benchmark anything which has already been benchmarked before. */
benchMemo(BMK_benchResult_t * resultPtr,const buffers_t buf,const contexts_t ctx,const paramValues_t cParams,const constraint_t target,BMK_benchResult_t * winnerResult,memoTable_t * const memoTableArray,const int feas)1764 static int benchMemo(BMK_benchResult_t* resultPtr,
1765                 const buffers_t buf, const contexts_t ctx,
1766                 const paramValues_t cParams,
1767                 const constraint_t target,
1768                 BMK_benchResult_t* winnerResult, memoTable_t* const memoTableArray,
1769                 const int feas) {
1770     static int bmcount = 0;
1771     int res;
1772 
1773     if ( memoTableGet(memoTableArray, cParams) >= INFEASIBLE_THRESHOLD
1774       || redundantParams(cParams, target, buf.maxBlockSize) ) {
1775         return WORSE_RESULT;
1776     }
1777 
1778     res = allBench(resultPtr, buf, ctx, cParams, target, winnerResult, feas);
1779 
1780     if(DEBUG && !(bmcount % 250)) {
1781         DISPLAY("Count: %d\n", bmcount);
1782         bmcount++;
1783     }
1784     BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, *resultPtr, cParams, target, buf.srcSize);
1785 
1786     if(res == BETTER_RESULT || feas) {
1787         memoTableSet(memoTableArray, cParams, 255); /* what happens if collisions are frequent */
1788     }
1789     return res;
1790 }
1791 
1792 
1793 typedef struct {
1794     U64 cSpeed_min;
1795     U64 dSpeed_min;
1796     U32 windowLog_max;
1797     ZSTD_strategy strategy_max;
1798 } level_constraints_t;
1799 
1800 static level_constraints_t g_level_constraint[NB_LEVELS_TRACKED+1];
1801 
BMK_init_level_constraints(int bytePerSec_level1)1802 static void BMK_init_level_constraints(int bytePerSec_level1)
1803 {
1804     assert(NB_LEVELS_TRACKED >= ZSTD_maxCLevel());
1805     memset(g_level_constraint, 0, sizeof(g_level_constraint));
1806     g_level_constraint[1].cSpeed_min = bytePerSec_level1;
1807     g_level_constraint[1].dSpeed_min = 0;
1808     g_level_constraint[1].windowLog_max = 19;
1809     g_level_constraint[1].strategy_max = ZSTD_fast;
1810 
1811     /* establish speed objectives (relative to level 1) */
1812     {   int l;
1813         for (l=2; l<=NB_LEVELS_TRACKED; l++) {
1814             g_level_constraint[l].cSpeed_min = (g_level_constraint[l-1].cSpeed_min * 49) / 64;
1815             g_level_constraint[l].dSpeed_min = 0;
1816             g_level_constraint[l].windowLog_max = (l<20) ? 23 : l+5;   /* only --ultra levels >= 20 can use windowlog > 23 */
1817             g_level_constraint[l].strategy_max = ZSTD_STRATEGY_MAX;
1818     }   }
1819 }
1820 
BMK_seed(winnerInfo_t * winners,const paramValues_t params,const buffers_t buf,const contexts_t ctx)1821 static int BMK_seed(winnerInfo_t* winners,
1822                     const paramValues_t params,
1823                     const buffers_t buf,
1824                     const contexts_t ctx)
1825 {
1826     BMK_benchResult_t testResult;
1827     int better = 0;
1828     int cLevel;
1829 
1830     BMK_benchParam(&testResult, buf, ctx, params);
1831 
1832     for (cLevel = 1; cLevel <= NB_LEVELS_TRACKED; cLevel++) {
1833 
1834         if (testResult.cSpeed < g_level_constraint[cLevel].cSpeed_min)
1835             continue;   /* not fast enough for this level */
1836         if (testResult.dSpeed < g_level_constraint[cLevel].dSpeed_min)
1837             continue;   /* not fast enough for this level */
1838         if (params.vals[wlog_ind] > g_level_constraint[cLevel].windowLog_max)
1839             continue;   /* too much memory for this level */
1840         if (params.vals[strt_ind] > (U32)g_level_constraint[cLevel].strategy_max)
1841             continue;   /* forbidden strategy for this level */
1842         if (winners[cLevel].result.cSize==0) {
1843             /* first solution for this cLevel */
1844             winners[cLevel].result = testResult;
1845             winners[cLevel].params = params;
1846             BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize);
1847             better = 1;
1848             continue;
1849         }
1850 
1851         if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) {
1852             /* Validate solution is "good enough" */
1853             double W_ratio = (double)buf.srcSize / testResult.cSize;
1854             double O_ratio = (double)buf.srcSize / winners[cLevel].result.cSize;
1855             double W_ratioNote = log (W_ratio);
1856             double O_ratioNote = log (O_ratio);
1857             size_t W_DMemUsed = (1 << params.vals[wlog_ind]) + (16 KB);
1858             size_t O_DMemUsed = (1 << winners[cLevel].params.vals[wlog_ind]) + (16 KB);
1859             double W_DMemUsed_note = W_ratioNote * ( 40 + 9*cLevel) - log((double)W_DMemUsed);
1860             double O_DMemUsed_note = O_ratioNote * ( 40 + 9*cLevel) - log((double)O_DMemUsed);
1861 
1862             size_t W_CMemUsed = ((size_t)1 << params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(params));
1863             size_t O_CMemUsed = ((size_t)1 << winners[cLevel].params.vals[wlog_ind]) + ZSTD_estimateCCtxSize_usingCParams(pvalsToCParams(winners[cLevel].params));
1864             double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed);
1865             double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed);
1866 
1867             double W_CSpeed_note = W_ratioNote * (double)( 30 + 10*cLevel) + log(testResult.cSpeed);
1868             double O_CSpeed_note = O_ratioNote * (double)( 30 + 10*cLevel) + log(winners[cLevel].result.cSpeed);
1869 
1870             double W_DSpeed_note = W_ratioNote * (double)( 20 + 2*cLevel) + log(testResult.dSpeed);
1871             double O_DSpeed_note = O_ratioNote * (double)( 20 + 2*cLevel) + log(winners[cLevel].result.dSpeed);
1872 
1873             if (W_DMemUsed_note < O_DMemUsed_note) {
1874                 /* uses too much Decompression memory for too little benefit */
1875                 if (W_ratio > O_ratio)
1876                 DISPLAYLEVEL(3, "Decompression Memory : %5.3f @ %4.1f MB  vs  %5.3f @ %4.1f MB   : not enough for level %i\n",
1877                          W_ratio, (double)(W_DMemUsed) / 1024 / 1024,
1878                          O_ratio, (double)(O_DMemUsed) / 1024 / 1024,   cLevel);
1879                 continue;
1880             }
1881             if (W_CMemUsed_note < O_CMemUsed_note) {
1882                 /* uses too much memory for compression for too little benefit */
1883                 if (W_ratio > O_ratio)
1884                 DISPLAYLEVEL(3, "Compression Memory : %5.3f @ %4.1f MB  vs  %5.3f @ %4.1f MB   : not enough for level %i\n",
1885                          W_ratio, (double)(W_CMemUsed) / 1024 / 1024,
1886                          O_ratio, (double)(O_CMemUsed) / 1024 / 1024,
1887                          cLevel);
1888                 continue;
1889             }
1890             if (W_CSpeed_note   < O_CSpeed_note  ) {
1891                 /* too large compression speed difference for the compression benefit */
1892                 if (W_ratio > O_ratio)
1893                 DISPLAYLEVEL(3, "Compression Speed : %5.3f @ %4.1f MB/s  vs  %5.3f @ %4.1f MB/s   : not enough for level %i\n",
1894                          W_ratio, (double)testResult.cSpeed / MB_UNIT,
1895                          O_ratio, (double)winners[cLevel].result.cSpeed / MB_UNIT,
1896                          cLevel);
1897                 continue;
1898             }
1899             if (W_DSpeed_note   < O_DSpeed_note  ) {
1900                 /* too large decompression speed difference for the compression benefit */
1901                 if (W_ratio > O_ratio)
1902                 DISPLAYLEVEL(3, "Decompression Speed : %5.3f @ %4.1f MB/s  vs  %5.3f @ %4.1f MB/s   : not enough for level %i\n",
1903                          W_ratio, (double)testResult.dSpeed / MB_UNIT,
1904                          O_ratio, (double)winners[cLevel].result.dSpeed / MB_UNIT,
1905                          cLevel);
1906                 continue;
1907             }
1908 
1909             if (W_ratio < O_ratio)
1910                 DISPLAYLEVEL(3, "Solution %4.3f selected over %4.3f at level %i, due to better secondary statistics \n",
1911                                 W_ratio, O_ratio, cLevel);
1912 
1913             winners[cLevel].result = testResult;
1914             winners[cLevel].params = params;
1915             BMK_print_cLevelEntry(stdout, cLevel, params, testResult, buf.srcSize);
1916 
1917             better = 1;
1918     }   }
1919 
1920     return better;
1921 }
1922 
1923 /*-************************************
1924 *  Compression Level Table Generation Functions
1925 **************************************/
1926 
1927 #define PARAMTABLELOG   25
1928 #define PARAMTABLESIZE (1<<PARAMTABLELOG)
1929 #define PARAMTABLEMASK (PARAMTABLESIZE-1)
1930 static BYTE g_alreadyTested[PARAMTABLESIZE] = {0};   /* init to zero */
1931 
NB_TESTS_PLAYED(paramValues_t p)1932 static BYTE* NB_TESTS_PLAYED(paramValues_t p)
1933 {
1934     ZSTD_compressionParameters const cParams = pvalsToCParams(sanitizeParams(p));
1935     unsigned long long const h64 = XXH64(&cParams, sizeof(cParams), 0);
1936     return &g_alreadyTested[(h64 >> 3) & PARAMTABLEMASK];
1937 }
1938 
playAround(FILE * f,winnerInfo_t * winners,paramValues_t p,const buffers_t buf,const contexts_t ctx)1939 static void playAround(FILE* f,
1940                        winnerInfo_t* winners,
1941                        paramValues_t p,
1942                        const buffers_t buf, const contexts_t ctx)
1943 {
1944     int nbVariations = 0;
1945     UTIL_time_t const clockStart = UTIL_getTime();
1946 
1947     while (UTIL_clockSpanMicro(clockStart) < g_maxVariationTime) {
1948         if (nbVariations++ > g_maxNbVariations) break;
1949 
1950         do {
1951             int i;
1952             for(i = 0; i < 4; i++) {
1953                 paramVaryOnce(FUZ_rand(&g_rand) % (strt_ind + 1),
1954                               ((FUZ_rand(&g_rand) & 1) << 1) - 1,
1955                               &p);
1956             }
1957         } while (!paramValid(p));
1958 
1959         /* exclude faster if already played params */
1960         if (FUZ_rand(&g_rand) & ((1 << *NB_TESTS_PLAYED(p))-1))
1961             continue;
1962 
1963         /* test */
1964         {   BYTE* const b = NB_TESTS_PLAYED(p);
1965             (*b)++;
1966         }
1967         if (!BMK_seed(winners, p, buf, ctx)) continue;
1968 
1969         /* improvement found => search more */
1970         BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize);
1971         playAround(f, winners, p, buf, ctx);
1972     }
1973 
1974 }
1975 
1976 static void
BMK_selectRandomStart(FILE * f,winnerInfo_t * winners,const buffers_t buf,const contexts_t ctx)1977 BMK_selectRandomStart( FILE* f,
1978                        winnerInfo_t* winners,
1979                        const buffers_t buf, const contexts_t ctx)
1980 {
1981     U32 const id = FUZ_rand(&g_rand) % (NB_LEVELS_TRACKED+1);
1982     if ((id==0) || (winners[id].params.vals[wlog_ind]==0)) {
1983         /* use some random entry */
1984         paramValues_t const p = adjustParams(cParamsToPVals(pvalsToCParams(randomParams())), /* defaults nonCompression parameters */
1985                                              buf.srcSize, 0);
1986         playAround(f, winners, p, buf, ctx);
1987     } else {
1988         playAround(f, winners, winners[id].params, buf, ctx);
1989     }
1990 }
1991 
1992 
1993 /* BMK_generate_cLevelTable() :
1994  * test a large number of configurations
1995  * and distribute them across compression levels according to speed conditions.
1996  * display and save all intermediate results into rfName = "grillResults.txt".
1997  * the function automatically stops after g_timeLimit_s.
1998  * this function cannot error, it directly exit() in case of problem.
1999  */
BMK_generate_cLevelTable(const buffers_t buf,const contexts_t ctx)2000 static void BMK_generate_cLevelTable(const buffers_t buf, const contexts_t ctx)
2001 {
2002     paramValues_t params;
2003     winnerInfo_t winners[NB_LEVELS_TRACKED+1];
2004     const char* const rfName = "grillResults.txt";
2005     FILE* const f = fopen(rfName, "w");
2006 
2007     /* init */
2008     assert(g_singleRun==0);
2009     memset(winners, 0, sizeof(winners));
2010     if (f==NULL) { DISPLAY("error opening %s \n", rfName); exit(1); }
2011 
2012     if (g_target) {
2013         BMK_init_level_constraints(g_target * MB_UNIT);
2014     } else {
2015         /* baseline config for level 1 */
2016         paramValues_t const l1params = cParamsToPVals(ZSTD_getCParams(1, buf.maxBlockSize, ctx.dictSize));
2017         BMK_benchResult_t testResult;
2018         BMK_benchParam(&testResult, buf, ctx, l1params);
2019         BMK_init_level_constraints((int)((testResult.cSpeed * 31) / 32));
2020     }
2021 
2022     /* populate initial solution */
2023     {   const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
2024         int i;
2025         for (i=0; i<=maxSeeds; i++) {
2026             params = cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, 0));
2027             BMK_seed(winners, params, buf, ctx);
2028     }   }
2029     BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize);
2030 
2031     /* start tests */
2032     {   const UTIL_time_t grillStart = UTIL_getTime();
2033         do {
2034             BMK_selectRandomStart(f, winners, buf, ctx);
2035         } while (BMK_timeSpan_s(grillStart) < g_timeLimit_s);
2036     }
2037 
2038     /* end summary */
2039     BMK_saveAndPrint_cLevelTable(f, winners, buf.srcSize);
2040     DISPLAY("grillParams operations completed \n");
2041 
2042     /* clean up*/
2043     fclose(f);
2044 }
2045 
2046 
2047 /*-************************************
2048 *  Single Benchmark Functions
2049 **************************************/
2050 
2051 static int
benchOnce(const buffers_t buf,const contexts_t ctx,const int cLevel)2052 benchOnce(const buffers_t buf, const contexts_t ctx, const int cLevel)
2053 {
2054     BMK_benchResult_t testResult;
2055     g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevel, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize);
2056 
2057     if (BMK_benchParam(&testResult, buf, ctx, g_params)) {
2058         DISPLAY("Error during benchmarking\n");
2059         return 1;
2060     }
2061 
2062     BMK_printWinner(stdout, CUSTOM_LEVEL, testResult, g_params, buf.srcSize);
2063 
2064     return 0;
2065 }
2066 
benchSample(double compressibility,int cLevel)2067 static int benchSample(double compressibility, int cLevel)
2068 {
2069     const char* const name = "Sample 10MB";
2070     size_t const benchedSize = 10 MB;
2071     void* const srcBuffer = malloc(benchedSize);
2072     int ret = 0;
2073 
2074     buffers_t buf;
2075     contexts_t ctx;
2076 
2077     if(srcBuffer == NULL) {
2078         DISPLAY("Out of Memory\n");
2079         return 2;
2080     }
2081 
2082     RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
2083 
2084     if(createBuffersFromMemory(&buf, srcBuffer, 1, &benchedSize)) {
2085         DISPLAY("Buffer Creation Error\n");
2086         free(srcBuffer);
2087         return 3;
2088     }
2089 
2090     if(createContexts(&ctx, NULL)) {
2091         DISPLAY("Context Creation Error\n");
2092         freeBuffers(buf);
2093         return 1;
2094     }
2095 
2096     /* bench */
2097     DISPLAY("\r%79s\r", "");
2098     DISPLAY("using %s %i%%: \n", name, (int)(compressibility*100));
2099 
2100     if(g_singleRun) {
2101         ret = benchOnce(buf, ctx, cLevel);
2102     } else {
2103         BMK_generate_cLevelTable(buf, ctx);
2104     }
2105 
2106     freeBuffers(buf);
2107     freeContexts(ctx);
2108 
2109     return ret;
2110 }
2111 
2112 /* benchFiles() :
2113  * note: while this function takes a table of filenames,
2114  * in practice, only the first filename will be used */
benchFiles(const char ** fileNamesTable,int nbFiles,const char * dictFileName,int cLevel)2115 static int benchFiles(const char** fileNamesTable, int nbFiles,
2116                       const char* dictFileName, int cLevel)
2117 {
2118     buffers_t buf;
2119     contexts_t ctx;
2120     int ret = 0;
2121 
2122     if (createBuffers(&buf, fileNamesTable, nbFiles)) {
2123         DISPLAY("unable to load files\n");
2124         return 1;
2125     }
2126 
2127     if (createContexts(&ctx, dictFileName)) {
2128         DISPLAY("unable to load dictionary\n");
2129         freeBuffers(buf);
2130         return 2;
2131     }
2132 
2133     DISPLAY("\r%79s\r", "");
2134     if (nbFiles == 1) {
2135         DISPLAY("using %s : \n", fileNamesTable[0]);
2136     } else {
2137         DISPLAY("using %d Files : \n", nbFiles);
2138     }
2139 
2140     if (g_singleRun) {
2141         ret = benchOnce(buf, ctx, cLevel);
2142     } else {
2143         BMK_generate_cLevelTable(buf, ctx);
2144     }
2145 
2146     freeBuffers(buf);
2147     freeContexts(ctx);
2148     return ret;
2149 }
2150 
2151 
2152 /*-************************************
2153 *  Local Optimization Functions
2154 **************************************/
2155 
2156 /* One iteration of hill climbing. Specifically, it first tries all
2157  * valid parameter configurations w/ manhattan distance 1 and picks the best one
2158  * failing that, it progressively tries candidates further and further away (up to #dim + 2)
2159  * if it finds a candidate exceeding winnerInfo, it will repeat. Otherwise, it will stop the
2160  * current stage of hill climbing.
2161  * Each iteration of hill climbing proceeds in 2 'phases'. Phase 1 climbs according to
2162  * the resultScore function, which is effectively a linear increase in reward until it reaches
2163  * the constraint-satisfying value, it which point any excess results in only logarithmic reward.
2164  * This aims to find some constraint-satisfying point.
2165  * Phase 2 optimizes in accordance with what the original function sets out to maximize, with
2166  * all feasible solutions valued over all infeasible solutions.
2167  */
2168 
2169 /* sanitize all params here.
2170  * all generation after random should be sanitized. (maybe sanitize random)
2171  */
climbOnce(const constraint_t target,memoTable_t * mtAll,const buffers_t buf,const contexts_t ctx,const paramValues_t init)2172 static winnerInfo_t climbOnce(const constraint_t target,
2173                 memoTable_t* mtAll,
2174                 const buffers_t buf, const contexts_t ctx,
2175                 const paramValues_t init)
2176 {
2177     /*
2178      * cparam - currently considered 'center'
2179      * candidate - params to benchmark/results
2180      * winner - best option found so far.
2181      */
2182     paramValues_t cparam = init;
2183     winnerInfo_t candidateInfo, winnerInfo;
2184     int better = 1;
2185     int feas = 0;
2186 
2187     winnerInfo = initWinnerInfo(init);
2188     candidateInfo = winnerInfo;
2189 
2190     {   winnerInfo_t bestFeasible1 = initWinnerInfo(cparam);
2191         DEBUGOUTPUT("Climb Part 1\n");
2192         while(better) {
2193             int offset;
2194             size_t i, dist;
2195             const size_t varLen = mtAll[cparam.vals[strt_ind]].varLen;
2196             better = 0;
2197             DEBUGOUTPUT("Start\n");
2198             cparam = winnerInfo.params;
2199             candidateInfo.params = cparam;
2200              /* all dist-1 candidates */
2201             for (i = 0; i < varLen; i++) {
2202                 for (offset = -1; offset <= 1; offset += 2) {
2203                     CHECKTIME(winnerInfo);
2204                     candidateInfo.params = cparam;
2205                     paramVaryOnce(mtAll[cparam.vals[strt_ind]].varArray[i],
2206                                   offset,
2207                                   &candidateInfo.params);
2208 
2209                     if(paramValid(candidateInfo.params)) {
2210                         int res;
2211                         res = benchMemo(&candidateInfo.result, buf, ctx,
2212                             sanitizeParams(candidateInfo.params), target, &winnerInfo.result, mtAll, feas);
2213                         DEBUGOUTPUT("Res: %d\n", res);
2214                         if(res == BETTER_RESULT) { /* synonymous with better when called w/ infeasibleBM */
2215                             winnerInfo = candidateInfo;
2216                             better = 1;
2217                             if(compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) {
2218                                 bestFeasible1 = winnerInfo;
2219                             }
2220                         }
2221                     }
2222                 }  /* for (offset = -1; offset <= 1; offset += 2) */
2223             }   /* for (i = 0; i < varLen; i++) */
2224 
2225             if(better) {
2226                 continue;
2227             }
2228 
2229             for (dist = 2; dist < varLen + 2; dist++) { /* varLen is # dimensions */
2230                 for (i = 0; i < (1ULL << varLen) / varLen + 2; i++) {
2231                     int res;
2232                     CHECKTIME(winnerInfo);
2233                     candidateInfo.params = cparam;
2234                     /* param error checking already done here */
2235                     paramVariation(&candidateInfo.params, mtAll, (U32)dist);
2236 
2237                     res = benchMemo(&candidateInfo.result,
2238                                 buf, ctx,
2239                                 sanitizeParams(candidateInfo.params), target,
2240                                 &winnerInfo.result, mtAll, feas);
2241                     DEBUGOUTPUT("Res: %d\n", res);
2242                     if (res == BETTER_RESULT) { /* synonymous with better in this case*/
2243                         winnerInfo = candidateInfo;
2244                         better = 1;
2245                         if (compareResultLT(bestFeasible1.result, winnerInfo.result, target, buf.srcSize)) {
2246                             bestFeasible1 = winnerInfo;
2247                         }
2248                         break;
2249                     }
2250                 }
2251 
2252                 if (better) {
2253                     break;
2254                 }
2255             }   /* for(dist = 2; dist < varLen + 2; dist++) */
2256 
2257             if (!better) { /* infeas -> feas -> stop */
2258                 if (feas) return winnerInfo;
2259                 feas = 1;
2260                 better = 1;
2261                 winnerInfo = bestFeasible1; /* note with change, bestFeasible may not necessarily be feasible, but if one has been benchmarked, it will be. */
2262                 DEBUGOUTPUT("Climb Part 2\n");
2263             }
2264         }
2265         winnerInfo = bestFeasible1;
2266     }
2267 
2268     return winnerInfo;
2269 }
2270 
2271 /* Optimizes for a fixed strategy */
2272 
2273 /* flexible parameters: iterations of failed climbing (or if we do non-random, maybe this is when everything is close to visited)
2274    weight more on visit for bad results, less on good results/more on later results / ones with more failures.
2275    allocate memoTable here.
2276  */
2277 static winnerInfo_t
optimizeFixedStrategy(const buffers_t buf,const contexts_t ctx,const constraint_t target,paramValues_t paramTarget,const ZSTD_strategy strat,memoTable_t * memoTableArray,const int tries)2278 optimizeFixedStrategy(const buffers_t buf, const contexts_t ctx,
2279                       const constraint_t target, paramValues_t paramTarget,
2280                       const ZSTD_strategy strat,
2281                       memoTable_t* memoTableArray, const int tries)
2282 {
2283     int i = 0;
2284 
2285     paramValues_t init;
2286     winnerInfo_t winnerInfo, candidateInfo;
2287     winnerInfo = initWinnerInfo(emptyParams());
2288     /* so climb is given the right fixed strategy */
2289     paramTarget.vals[strt_ind] = strat;
2290     /* to pass ZSTD_checkCParams */
2291     paramTarget = cParamUnsetMin(paramTarget);
2292 
2293     init = paramTarget;
2294 
2295     for(i = 0; i < tries; i++) {
2296         DEBUGOUTPUT("Restart\n");
2297         do {
2298             randomConstrainedParams(&init, memoTableArray, strat);
2299         } while(redundantParams(init, target, buf.maxBlockSize));
2300         candidateInfo = climbOnce(target, memoTableArray, buf, ctx, init);
2301         if (compareResultLT(winnerInfo.result, candidateInfo.result, target, buf.srcSize)) {
2302             winnerInfo = candidateInfo;
2303             BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winnerInfo.result, winnerInfo.params, target, buf.srcSize);
2304             i = 0;
2305             continue;
2306         }
2307         CHECKTIME(winnerInfo);
2308         i++;
2309     }
2310     return winnerInfo;
2311 }
2312 
2313 /* goes best, best-1, best+1, best-2, ... */
2314 /* return 0 if nothing remaining */
nextStrategy(const int currentStrategy,const int bestStrategy)2315 static int nextStrategy(const int currentStrategy, const int bestStrategy)
2316 {
2317     if(bestStrategy <= currentStrategy) {
2318         int candidate = 2 * bestStrategy - currentStrategy - 1;
2319         if(candidate < 1) {
2320             candidate = currentStrategy + 1;
2321             if(candidate > (int)ZSTD_STRATEGY_MAX) {
2322                 return 0;
2323             } else {
2324                 return candidate;
2325             }
2326         } else {
2327             return candidate;
2328         }
2329     } else { /* bestStrategy >= currentStrategy */
2330         int candidate = 2 * bestStrategy - currentStrategy;
2331         if(candidate > (int)ZSTD_STRATEGY_MAX) {
2332             candidate = currentStrategy - 1;
2333             if(candidate < 1) {
2334                 return 0;
2335             } else {
2336                 return candidate;
2337             }
2338         } else {
2339             return candidate;
2340         }
2341     }
2342 }
2343 
2344 /* experiment with playing with this and decay value */
2345 
2346 /* main fn called when using --optimize */
2347 /* Does strategy selection by benchmarking default compression levels
2348  * then optimizes by strategy, starting with the best one and moving
2349  * progressively moving further away by number
2350  * args:
2351  * fileNamesTable - list of files to benchmark
2352  * nbFiles - length of fileNamesTable
2353  * dictFileName - name of dictionary file if one, else NULL
2354  * target - performance constraints (cSpeed, dSpeed, cMem)
2355  * paramTarget - parameter constraints (i.e. restriction search space to where strategy = ZSTD_fast)
2356  * cLevel - compression level to exceed (all solutions must be > lvl in cSpeed + ratio)
2357  */
2358 
2359 static unsigned g_maxTries = 5;
2360 #define TRY_DECAY 1
2361 
2362 static int
optimizeForSize(const char * const * const fileNamesTable,const size_t nbFiles,const char * dictFileName,constraint_t target,paramValues_t paramTarget,const int cLevelOpt,const int cLevelRun,const U32 memoTableLog)2363 optimizeForSize(const char* const * const fileNamesTable, const size_t nbFiles,
2364                 const char* dictFileName,
2365                 constraint_t target, paramValues_t paramTarget,
2366                 const int cLevelOpt, const int cLevelRun,
2367                 const U32 memoTableLog)
2368 {
2369     varInds_t varArray [NUM_PARAMS];
2370     int ret = 0;
2371     const size_t varLen = variableParams(paramTarget, varArray, dictFileName != NULL);
2372     winnerInfo_t winner = initWinnerInfo(emptyParams());
2373     memoTable_t* allMT = NULL;
2374     paramValues_t paramBase;
2375     contexts_t ctx;
2376     buffers_t buf;
2377     g_time = UTIL_getTime();
2378 
2379     if (createBuffers(&buf, fileNamesTable, nbFiles)) {
2380         DISPLAY("unable to load files\n");
2381         return 1;
2382     }
2383 
2384     if (createContexts(&ctx, dictFileName)) {
2385         DISPLAY("unable to load dictionary\n");
2386         freeBuffers(buf);
2387         return 2;
2388     }
2389 
2390     if (nbFiles == 1) {
2391         DISPLAYLEVEL(2, "Loading %s...       \r", fileNamesTable[0]);
2392     } else {
2393         DISPLAYLEVEL(2, "Loading %lu Files...       \r", (unsigned long)nbFiles);
2394     }
2395 
2396     /* sanitize paramTarget */
2397     optimizerAdjustInput(&paramTarget, buf.maxBlockSize);
2398     paramBase = cParamUnsetMin(paramTarget);
2399 
2400     allMT = createMemoTableArray(paramTarget, varArray, varLen, memoTableLog);
2401 
2402     if (!allMT) {
2403         DISPLAY("MemoTable Init Error\n");
2404         ret = 2;
2405         goto _cleanUp;
2406     }
2407 
2408     /* default strictnesses */
2409     if (g_strictness == PARAM_UNSET) {
2410         if(g_optmode) {
2411             g_strictness = 100;
2412         } else {
2413             g_strictness = 90;
2414         }
2415     } else {
2416         if(0 >= g_strictness || g_strictness > 100) {
2417             DISPLAY("Strictness Outside of Bounds\n");
2418             ret = 4;
2419             goto _cleanUp;
2420         }
2421     }
2422 
2423     /* use level'ing mode instead of normal target mode */
2424     if (g_optmode) {
2425         winner.params = cParamsToPVals(ZSTD_getCParams(cLevelOpt, buf.maxBlockSize, ctx.dictSize));
2426         if(BMK_benchParam(&winner.result, buf, ctx, winner.params)) {
2427             ret = 3;
2428             goto _cleanUp;
2429         }
2430 
2431         g_lvltarget = winner.result;
2432         g_lvltarget.cSpeed = (g_lvltarget.cSpeed * g_strictness) / 100;
2433         g_lvltarget.dSpeed = (g_lvltarget.dSpeed * g_strictness) / 100;
2434         g_lvltarget.cSize = (g_lvltarget.cSize * 100) / g_strictness;
2435 
2436         target.cSpeed = (U32)g_lvltarget.cSpeed;
2437         target.dSpeed = (U32)g_lvltarget.dSpeed;
2438 
2439         BMK_printWinnerOpt(stdout, cLevelOpt, winner.result, winner.params, target, buf.srcSize);
2440     }
2441 
2442     /* Don't want it to return anything worse than the best known result */
2443     if (g_singleRun) {
2444         BMK_benchResult_t res;
2445         g_params = adjustParams(overwriteParams(cParamsToPVals(ZSTD_getCParams(cLevelRun, buf.maxBlockSize, ctx.dictSize)), g_params), buf.maxBlockSize, ctx.dictSize);
2446         if (BMK_benchParam(&res, buf, ctx, g_params)) {
2447             ret = 45;
2448             goto _cleanUp;
2449         }
2450         if(compareResultLT(winner.result, res, relaxTarget(target), buf.srcSize)) {
2451             winner.result = res;
2452             winner.params = g_params;
2453         }
2454     }
2455 
2456     /* bench */
2457     DISPLAYLEVEL(2, "\r%79s\r", "");
2458     if(nbFiles == 1) {
2459         DISPLAYLEVEL(2, "optimizing for %s", fileNamesTable[0]);
2460     } else {
2461         DISPLAYLEVEL(2, "optimizing for %lu Files", (unsigned long)nbFiles);
2462     }
2463 
2464     if(target.cSpeed != 0) { DISPLAYLEVEL(2," - limit compression speed %u MB/s", (unsigned)(target.cSpeed >> 20)); }
2465     if(target.dSpeed != 0) { DISPLAYLEVEL(2, " - limit decompression speed %u MB/s", (unsigned)(target.dSpeed >> 20)); }
2466     if(target.cMem != (U32)-1) { DISPLAYLEVEL(2, " - limit memory %u MB", (unsigned)(target.cMem >> 20)); }
2467 
2468     DISPLAYLEVEL(2, "\n");
2469     init_clockGranularity();
2470 
2471     {   paramValues_t CParams;
2472 
2473         /* find best solution from default params */
2474         {   const int maxSeeds = g_noSeed ? 1 : ZSTD_maxCLevel();
2475             DEBUGOUTPUT("Strategy Selection\n");
2476             if (paramTarget.vals[strt_ind] == PARAM_UNSET) {
2477                 BMK_benchResult_t candidate;
2478                 int i;
2479                 for (i=1; i<=maxSeeds; i++) {
2480                     int ec;
2481                     CParams = overwriteParams(cParamsToPVals(ZSTD_getCParams(i, buf.maxBlockSize, ctx.dictSize)), paramTarget);
2482                     ec = BMK_benchParam(&candidate, buf, ctx, CParams);
2483                     BMK_printWinnerOpt(stdout, i, candidate, CParams, target, buf.srcSize);
2484 
2485                     if(!ec && compareResultLT(winner.result, candidate, relaxTarget(target), buf.srcSize)) {
2486                         winner.result = candidate;
2487                         winner.params = CParams;
2488                     }
2489 
2490                     CHECKTIMEGT(ret, 0, _displayCleanUp); /* if pass time limit, stop */
2491                     /* if the current params are too slow, just stop. */
2492                     if(target.cSpeed > candidate.cSpeed * 3 / 2) { break; }
2493                 }
2494 
2495                 BMK_printWinnerOpt(stdout, CUSTOM_LEVEL, winner.result, winner.params, target, buf.srcSize);
2496             }
2497         }
2498 
2499         DEBUGOUTPUT("Real Opt\n");
2500         /* start 'real' optimization */
2501         {   int bestStrategy = (int)winner.params.vals[strt_ind];
2502             if (paramTarget.vals[strt_ind] == PARAM_UNSET) {
2503                 int st = bestStrategy;
2504                 int tries = g_maxTries;
2505 
2506                 /* one iterations of hill climbing with the level-defined parameters. */
2507                 {   winnerInfo_t const w1 = climbOnce(target, allMT, buf, ctx, winner.params);
2508                     if (compareResultLT(winner.result, w1.result, target, buf.srcSize)) {
2509                         winner = w1;
2510                     }
2511                     CHECKTIMEGT(ret, 0, _displayCleanUp);
2512                 }
2513 
2514                 while(st && tries > 0) {
2515                     winnerInfo_t wc;
2516                     DEBUGOUTPUT("StrategySwitch: %s\n", g_stratName[st]);
2517 
2518                     wc = optimizeFixedStrategy(buf, ctx, target, paramBase, st, allMT, tries);
2519 
2520                     if(compareResultLT(winner.result, wc.result, target, buf.srcSize)) {
2521                         winner = wc;
2522                         tries = g_maxTries;
2523                         bestStrategy = st;
2524                     } else {
2525                         st = nextStrategy(st, bestStrategy);
2526                         tries -= TRY_DECAY;
2527                     }
2528                     CHECKTIMEGT(ret, 0, _displayCleanUp);
2529                 }
2530             } else {
2531                 winner = optimizeFixedStrategy(buf, ctx, target, paramBase, paramTarget.vals[strt_ind], allMT, g_maxTries);
2532             }
2533 
2534         }
2535 
2536         /* no solution found */
2537         if(winner.result.cSize == (size_t)-1) {
2538             ret = 1;
2539             DISPLAY("No feasible solution found\n");
2540             goto _cleanUp;
2541         }
2542 
2543         /* end summary */
2544 _displayCleanUp:
2545         if (g_displayLevel >= 0) {
2546             BMK_displayOneResult(stdout, winner, buf.srcSize);
2547         }
2548         BMK_paramValues_into_commandLine(stdout, winner.params);
2549         DISPLAYLEVEL(1, "grillParams size - optimizer completed \n");
2550     }
2551 
2552 _cleanUp:
2553     freeContexts(ctx);
2554     freeBuffers(buf);
2555     freeMemoTableArray(allMT);
2556     return ret;
2557 }
2558 
2559 /*-************************************
2560 *  CLI parsing functions
2561 **************************************/
2562 
2563 /** longCommandWArg() :
2564  *  check if *stringPtr is the same as longCommand.
2565  *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
2566  * @return 0 and doesn't modify *stringPtr otherwise.
2567  * from zstdcli.c
2568  */
longCommandWArg(const char ** stringPtr,const char * longCommand)2569 static int longCommandWArg(const char** stringPtr, const char* longCommand)
2570 {
2571     size_t const comSize = strlen(longCommand);
2572     int const result = !strncmp(*stringPtr, longCommand, comSize);
2573     if (result) *stringPtr += comSize;
2574     return result;
2575 }
2576 
errorOut(const char * msg)2577 static void errorOut(const char* msg)
2578 {
2579     DISPLAY("%s \n", msg); exit(1);
2580 }
2581 
2582 /*! readU32FromChar() :
2583  * @return : unsigned integer value read from input in `char` format.
2584  *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
2585  *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
2586  *  Note : function will exit() program if digit sequence overflows */
readU32FromChar(const char ** stringPtr)2587 static unsigned readU32FromChar(const char** stringPtr)
2588 {
2589     const char errorMsg[] = "error: numeric value too large";
2590     unsigned sign = 1;
2591     unsigned result = 0;
2592     if(**stringPtr == '-') { sign = (unsigned)-1; (*stringPtr)++; }
2593     while ((**stringPtr >='0') && (**stringPtr <='9')) {
2594         unsigned const max = (((unsigned)(-1)) / 10) - 1;
2595         if (result > max) errorOut(errorMsg);
2596         result *= 10;
2597         assert(**stringPtr >= '0');
2598         result += (unsigned)(**stringPtr - '0');
2599         (*stringPtr)++ ;
2600     }
2601     if ((**stringPtr=='K') || (**stringPtr=='M')) {
2602         unsigned const maxK = ((unsigned)(-1)) >> 10;
2603         if (result > maxK) errorOut(errorMsg);
2604         result <<= 10;
2605         if (**stringPtr=='M') {
2606             if (result > maxK) errorOut(errorMsg);
2607             result <<= 10;
2608         }
2609         (*stringPtr)++;  /* skip `K` or `M` */
2610         if (**stringPtr=='i') (*stringPtr)++;
2611         if (**stringPtr=='B') (*stringPtr)++;
2612     }
2613     return result * sign;
2614 }
2615 
readDoubleFromChar(const char ** stringPtr)2616 static double readDoubleFromChar(const char** stringPtr)
2617 {
2618     double result = 0, divide = 10;
2619     while ((**stringPtr >='0') && (**stringPtr <='9')) {
2620         result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
2621     }
2622     if(**stringPtr!='.') {
2623         return result;
2624     }
2625     (*stringPtr)++;
2626     while ((**stringPtr >='0') && (**stringPtr <='9')) {
2627         result += (double)(**stringPtr - '0') / divide, divide *= 10, (*stringPtr)++ ;
2628     }
2629     return result;
2630 }
2631 
usage(const char * exename)2632 static int usage(const char* exename)
2633 {
2634     DISPLAY( "Usage :\n");
2635     DISPLAY( "      %s [arg] file\n", exename);
2636     DISPLAY( "Arguments :\n");
2637     DISPLAY( " file : path to the file used as reference (if none, generates a compressible sample)\n");
2638     DISPLAY( " -H/-h  : Help (this text + advanced options)\n");
2639     return 0;
2640 }
2641 
usage_advanced(void)2642 static int usage_advanced(void)
2643 {
2644     DISPLAY( "\nAdvanced options :\n");
2645     DISPLAY( " -T#          : set level 1 speed objective \n");
2646     DISPLAY( " -B#          : cut input into blocks of size # (default : single block) \n");
2647     DISPLAY( " --optimize=  : same as -O with more verbose syntax (see README.md)\n");
2648     DISPLAY( " -S           : Single run \n");
2649     DISPLAY( " --zstd       : Single run, parameter selection same as zstdcli \n");
2650     DISPLAY( " -P#          : generated sample compressibility (default : %.1f%%) \n", COMPRESSIBILITY_DEFAULT * 100);
2651     DISPLAY( " -t#          : Caps runtime of operation in seconds (default : %u seconds (%.1f hours)) \n",
2652                                 (unsigned)g_timeLimit_s, (double)g_timeLimit_s / 3600);
2653     DISPLAY( " -v           : Prints Benchmarking output\n");
2654     DISPLAY( " -D           : Next argument dictionary file\n");
2655     DISPLAY( " -s           : Seperate Files\n");
2656     return 0;
2657 }
2658 
badusage(const char * exename)2659 static int badusage(const char* exename)
2660 {
2661     DISPLAY("Wrong parameters\n");
2662     usage(exename);
2663     return 1;
2664 }
2665 
2666 #define PARSE_SUB_ARGS(stringLong, stringShort, variable) { \
2667     if ( longCommandWArg(&argument, stringLong)             \
2668       || longCommandWArg(&argument, stringShort) ) {        \
2669           variable = readU32FromChar(&argument);            \
2670           if (argument[0]==',') {                           \
2671               argument++; continue;                         \
2672           } else break;                                     \
2673 }   }
2674 
2675 /* 1 if successful parse, 0 otherwise */
parse_params(const char ** argptr,paramValues_t * pv)2676 static int parse_params(const char** argptr, paramValues_t* pv) {
2677     int matched = 0;
2678     const char* argOrig = *argptr;
2679     varInds_t v;
2680     for(v = 0; v < NUM_PARAMS; v++) {
2681         if ( longCommandWArg(argptr,g_shortParamNames[v])
2682           || longCommandWArg(argptr, g_paramNames[v]) ) {
2683             if(**argptr == '=') {
2684                 (*argptr)++;
2685                 pv->vals[v] = readU32FromChar(argptr);
2686                 matched = 1;
2687                 break;
2688             }
2689         }
2690         /* reset and try again */
2691         *argptr = argOrig;
2692     }
2693     return matched;
2694 }
2695 
2696 /*-************************************
2697 *  Main
2698 **************************************/
2699 
main(int argc,const char ** argv)2700 int main(int argc, const char** argv)
2701 {
2702     int i,
2703         filenamesStart=0,
2704         result;
2705     const char* exename=argv[0];
2706     const char* input_filename = NULL;
2707     const char* dictFileName = NULL;
2708     U32 main_pause = 0;
2709     int cLevelOpt = 0, cLevelRun = 0;
2710     int seperateFiles = 0;
2711     double compressibility = COMPRESSIBILITY_DEFAULT;
2712     U32 memoTableLog = PARAM_UNSET;
2713     constraint_t target = { 0, 0, (U32)-1 };
2714 
2715     paramValues_t paramTarget = emptyParams();
2716     g_params = emptyParams();
2717 
2718     assert(argc>=1);   /* for exename */
2719 
2720     for(i=1; i<argc; i++) {
2721         const char* argument = argv[i];
2722         DEBUGOUTPUT("%d: %s\n", i, argument);
2723         assert(argument != NULL);
2724 
2725         if(!strcmp(argument,"--no-seed")) { g_noSeed = 1; continue; }
2726 
2727         if (longCommandWArg(&argument, "--optimize=")) {
2728             g_optimizer = 1;
2729             for ( ; ;) {
2730                 if(parse_params(&argument, &paramTarget)) { if(argument[0] == ',') { argument++; continue; } else break; }
2731                 PARSE_SUB_ARGS("compressionSpeed=" ,  "cSpeed=", target.cSpeed);
2732                 PARSE_SUB_ARGS("decompressionSpeed=", "dSpeed=", target.dSpeed);
2733                 PARSE_SUB_ARGS("compressionMemory=" , "cMem=", target.cMem);
2734                 PARSE_SUB_ARGS("strict=", "stc=", g_strictness);
2735                 PARSE_SUB_ARGS("maxTries=", "tries=", g_maxTries);
2736                 PARSE_SUB_ARGS("memoLimitLog=", "memLog=", memoTableLog);
2737                 if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelOpt = (int)readU32FromChar(&argument); g_optmode = 1; if (argument[0]==',') { argument++; continue; } else break; }
2738                 if (longCommandWArg(&argument, "speedForRatio=") || longCommandWArg(&argument, "speedRatio=")) { g_ratioMultiplier = readDoubleFromChar(&argument); if (argument[0]==',') { argument++; continue; } else break; }
2739 
2740                 DISPLAY("invalid optimization parameter \n");
2741                 return 1;
2742             }
2743 
2744             if (argument[0] != 0) {
2745                 DISPLAY("invalid --optimize= format\n");
2746                 return 1; /* check the end of string */
2747             }
2748             continue;
2749         } else if (longCommandWArg(&argument, "--zstd=")) {
2750         /* Decode command (note : aggregated commands are allowed) */
2751             g_singleRun = 1;
2752             for ( ; ;) {
2753                 if(parse_params(&argument, &g_params)) { if(argument[0] == ',') { argument++; continue; } else break; }
2754                 if (longCommandWArg(&argument, "level=") || longCommandWArg(&argument, "lvl=")) { cLevelRun = (int)readU32FromChar(&argument); g_params = emptyParams(); if (argument[0]==',') { argument++; continue; } else break; }
2755 
2756                 DISPLAY("invalid compression parameter \n");
2757                 return 1;
2758             }
2759 
2760             if (argument[0] != 0) {
2761                 DISPLAY("invalid --zstd= format\n");
2762                 return 1; /* check the end of string */
2763             }
2764             continue;
2765             /* if not return, success */
2766 
2767         } else if (longCommandWArg(&argument, "--display=")) {
2768             /* Decode command (note : aggregated commands are allowed) */
2769             memset(g_silenceParams, 1, sizeof(g_silenceParams));
2770             for ( ; ;) {
2771                 int found = 0;
2772                 varInds_t v;
2773                 for(v = 0; v < NUM_PARAMS; v++) {
2774                     if(longCommandWArg(&argument, g_shortParamNames[v]) || longCommandWArg(&argument, g_paramNames[v])) {
2775                         g_silenceParams[v] = 0;
2776                         found = 1;
2777                     }
2778                 }
2779                 if(longCommandWArg(&argument, "compressionParameters") || longCommandWArg(&argument, "cParams")) {
2780                     for(v = 0; v <= strt_ind; v++) {
2781                         g_silenceParams[v] = 0;
2782                     }
2783                     found = 1;
2784                 }
2785 
2786 
2787                 if(found) {
2788                     if(argument[0]==',') {
2789                         continue;
2790                     } else {
2791                         break;
2792                     }
2793                 }
2794                 DISPLAY("invalid parameter name parameter \n");
2795                 return 1;
2796             }
2797 
2798             if (argument[0] != 0) {
2799                 DISPLAY("invalid --display format\n");
2800                 return 1; /* check the end of string */
2801             }
2802             continue;
2803         } else if (argument[0]=='-') {
2804             argument++;
2805 
2806             while (argument[0]!=0) {
2807 
2808                 switch(argument[0])
2809                 {
2810                     /* Display help on usage */
2811                 case 'h' :
2812                 case 'H': usage(exename); usage_advanced(); return 0;
2813 
2814                     /* Pause at the end (hidden option) */
2815                 case 'p': main_pause = 1; argument++; break;
2816 
2817                     /* Sample compressibility (when no file provided) */
2818                 case 'P':
2819                     argument++;
2820                     {   U32 const proba32 = readU32FromChar(&argument);
2821                         compressibility = (double)proba32 / 100.;
2822                     }
2823                     break;
2824 
2825                     /* Run Single conf */
2826                 case 'S':
2827                     g_singleRun = 1;
2828                     argument++;
2829                     for ( ; ; ) {
2830                         switch(*argument)
2831                         {
2832                         case 'w':
2833                             argument++;
2834                             g_params.vals[wlog_ind] = readU32FromChar(&argument);
2835                             continue;
2836                         case 'c':
2837                             argument++;
2838                             g_params.vals[clog_ind] = readU32FromChar(&argument);
2839                             continue;
2840                         case 'h':
2841                             argument++;
2842                             g_params.vals[hlog_ind] = readU32FromChar(&argument);
2843                             continue;
2844                         case 's':
2845                             argument++;
2846                             g_params.vals[slog_ind] = readU32FromChar(&argument);
2847                             continue;
2848                         case 'l':  /* search length */
2849                             argument++;
2850                             g_params.vals[mml_ind] = readU32FromChar(&argument);
2851                             continue;
2852                         case 't':  /* target length */
2853                             argument++;
2854                             g_params.vals[tlen_ind] = readU32FromChar(&argument);
2855                             continue;
2856                         case 'S':  /* strategy */
2857                             argument++;
2858                             g_params.vals[strt_ind] = readU32FromChar(&argument);
2859                             continue;
2860                         case 'f':  /* forceAttachDict */
2861                             argument++;
2862                             g_params.vals[fadt_ind] = readU32FromChar(&argument);
2863                             continue;
2864                         case 'L':
2865                             {   argument++;
2866                                 cLevelRun = (int)readU32FromChar(&argument);
2867                                 g_params = emptyParams();
2868                                 continue;
2869                             }
2870                         default : ;
2871                         }
2872                         break;
2873                     }
2874 
2875                     break;
2876 
2877                     /* target level1 speed objective, in MB/s */
2878                 case 'T':
2879                     argument++;
2880                     g_target = readU32FromChar(&argument);
2881                     break;
2882 
2883                     /* cut input into blocks */
2884                 case 'B':
2885                     argument++;
2886                     g_blockSize = readU32FromChar(&argument);
2887                     DISPLAY("using %u KB block size \n", (unsigned)(g_blockSize>>10));
2888                     break;
2889 
2890                     /* caps runtime (in seconds) */
2891                 case 't':
2892                     argument++;
2893                     g_timeLimit_s = readU32FromChar(&argument);
2894                     break;
2895 
2896                 case 's':
2897                     argument++;
2898                     seperateFiles = 1;
2899                     break;
2900 
2901                 case 'q':
2902                     while (argument[0] == 'q') { argument++; g_displayLevel--; }
2903                     break;
2904 
2905                 case 'v':
2906                     while (argument[0] == 'v') { argument++; g_displayLevel++; }
2907                     break;
2908 
2909                 /* load dictionary file (only applicable for optimizer rn) */
2910                 case 'D':
2911                     if(i == argc - 1) { /* last argument, return error. */
2912                         DISPLAY("Dictionary file expected but not given : %d\n", i);
2913                         return 1;
2914                     } else {
2915                         i++;
2916                         dictFileName = argv[i];
2917                         argument += strlen(argument);
2918                     }
2919                     break;
2920 
2921                     /* Unknown command */
2922                 default : return badusage(exename);
2923                 }
2924             }
2925             continue;
2926         }   /* if (argument[0]=='-') */
2927 
2928         /* first provided filename is input */
2929         if (!input_filename) { input_filename=argument; filenamesStart=i; continue; }
2930     }
2931 
2932     /* Welcome message */
2933     DISPLAYLEVEL(2, WELCOME_MESSAGE);
2934 
2935     if (filenamesStart==0) {
2936         if (g_optimizer) {
2937             DISPLAY("Optimizer Expects File\n");
2938             return 1;
2939         } else {
2940             result = benchSample(compressibility, cLevelRun);
2941         }
2942     } else {
2943         if(seperateFiles) {
2944             for(i = 0; i < argc - filenamesStart; i++) {
2945                 if (g_optimizer) {
2946                     result = optimizeForSize(argv+filenamesStart + i, 1, dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog);
2947                     if(result) { DISPLAY("Error on File %d", i); return result; }
2948                 } else {
2949                     result = benchFiles(argv+filenamesStart + i, 1, dictFileName, cLevelRun);
2950                     if(result) { DISPLAY("Error on File %d", i); return result; }
2951                 }
2952             }
2953         } else {
2954             if (g_optimizer) {
2955                 assert(filenamesStart < argc);
2956                 result = optimizeForSize(argv+filenamesStart, (size_t)(argc-filenamesStart), dictFileName, target, paramTarget, cLevelOpt, cLevelRun, memoTableLog);
2957             } else {
2958                 result = benchFiles(argv+filenamesStart, argc-filenamesStart, dictFileName, cLevelRun);
2959             }
2960         }
2961     }
2962 
2963     if (main_pause) { int unused; printf("press enter...\n"); unused = getchar(); (void)unused; }
2964 
2965     return result;
2966 }
2967