• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * xxhsum - Command line interface for xxhash algorithms
3  * Copyright (C) 2013-2020 Yann Collet
4  *
5  * GPL v2 License
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * You can contact the author at:
22  *   - xxHash homepage: https://www.xxhash.com
23  *   - xxHash source repository: https://github.com/Cyan4973/xxHash
24  */
25 
26 /*
27  * xxhsum:
28  * Provides hash value of a file content, or a list of files, or stdin
29  * Display convention is Big Endian, for both 32 and 64 bits algorithms
30  */
31 
32 /* Transitional headers */
33 #include "cli/xsum_config.h"
34 #include "cli/xsum_arch.h"
35 #include "cli/xsum_os_specific.h"
36 #include "cli/xsum_output.h"
37 #include "cli/xsum_sanity_check.h"
38 #ifdef XXH_INLINE_ALL
39 #  include "cli/xsum_os_specific.c"
40 #  include "cli/xsum_output.c"
41 #  include "cli/xsum_sanity_check.c"
42 #endif
43 
44 /* ************************************
45  *  Includes
46  **************************************/
47 #include <limits.h>
48 #include <stdlib.h>     /* malloc, calloc, free, exit */
49 #include <string.h>     /* strcmp, memcpy */
50 #include <stdio.h>      /* fprintf, fopen, ftello64, fread, stdin, stdout, _fileno (when present) */
51 #include <sys/types.h>  /* stat, stat64, _stat64 */
52 #include <sys/stat.h>   /* stat, stat64, _stat64 */
53 #include <time.h>       /* clock_t, clock, CLOCKS_PER_SEC */
54 #include <assert.h>     /* assert */
55 #include <errno.h>      /* errno */
56 
57 #define XXH_STATIC_LINKING_ONLY   /* *_state_t */
58 #include "xxhash.h"
59 
60 #ifdef XXHSUM_DISPATCH
61 #  include "xxh_x86dispatch.h"
62 #endif
63 
XSUM_isLittleEndian(void)64 static unsigned XSUM_isLittleEndian(void)
65 {
66     const union { XSUM_U32 u; XSUM_U8 c[4]; } one = { 1 };   /* don't use static: performance detrimental  */
67     return one.c[0];
68 }
69 
70 static const int g_nbBits = (int)(sizeof(void*)*8);
71 static const char g_lename[] = "little endian";
72 static const char g_bename[] = "big endian";
73 #define ENDIAN_NAME (XSUM_isLittleEndian() ? g_lename : g_bename)
74 static const char author[] = "Yann Collet";
75 #define WELCOME_MESSAGE(exename) "%s %s by %s \n", exename, XSUM_PROGRAM_VERSION, author
76 #define FULL_WELCOME_MESSAGE(exename) "%s %s by %s \n" \
77                     "compiled as %i-bit %s %s with " XSUM_CC_VERSION_FMT " \n", \
78                     exename, XSUM_PROGRAM_VERSION, author, \
79                     g_nbBits, XSUM_ARCH, ENDIAN_NAME, XSUM_CC_VERSION
80 
81 #define KB *( 1<<10)
82 #define MB *( 1<<20)
83 #define GB *(1U<<30)
84 
85 static size_t XSUM_DEFAULT_SAMPLE_SIZE = 100 KB;
86 #define NBLOOPS    3                              /* Default number of benchmark iterations */
87 #define TIMELOOP_S 1
88 #define TIMELOOP  (TIMELOOP_S * CLOCKS_PER_SEC)   /* target timing per iteration */
89 #define TIMELOOP_MIN (TIMELOOP / 2)               /* minimum timing to validate a result */
90 #define XXHSUM32_DEFAULT_SEED 0                   /* Default seed for algo_xxh32 */
91 #define XXHSUM64_DEFAULT_SEED 0                   /* Default seed for algo_xxh64 */
92 
93 #define MAX_MEM    (2 GB - 64 MB)
94 
95 static const char stdinName[] = "-";
96 typedef enum { algo_xxh32=0, algo_xxh64=1, algo_xxh128=2 } AlgoSelected;
97 static AlgoSelected g_defaultAlgo = algo_xxh64;    /* required within main() & XSUM_usage() */
98 
99 /* <16 hex char> <SPC> <SPC> <filename> <'\0'>
100  * '4096' is typical Linux PATH_MAX configuration. */
101 #define DEFAULT_LINE_LENGTH (sizeof(XXH64_hash_t) * 2 + 2 + 4096 + 1)
102 
103 /* Maximum acceptable line length. */
104 #define MAX_LINE_LENGTH (32 KB)
105 
106 
107 /* ************************************
108  *  Display macros
109  **************************************/
110 
111 
112 /* ************************************
113  *  Local variables
114  **************************************/
115 static XSUM_U32 g_nbIterations = NBLOOPS;
116 
117 
118 /* ************************************
119  *  Benchmark Functions
120  **************************************/
XSUM_clockSpan(clock_t start)121 static clock_t XSUM_clockSpan( clock_t start )
122 {
123     return clock() - start;   /* works even if overflow; Typical max span ~ 30 mn */
124 }
125 
XSUM_findMaxMem(XSUM_U64 requiredMem)126 static size_t XSUM_findMaxMem(XSUM_U64 requiredMem)
127 {
128     size_t const step = 64 MB;
129     void* testmem = NULL;
130 
131     requiredMem = (((requiredMem >> 26) + 1) << 26);
132     requiredMem += 2*step;
133     if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
134 
135     while (!testmem) {
136         if (requiredMem > step) requiredMem -= step;
137         else requiredMem >>= 1;
138         testmem = malloc ((size_t)requiredMem);
139     }
140     free (testmem);
141 
142     /* keep some space available */
143     if (requiredMem > step) requiredMem -= step;
144     else requiredMem >>= 1;
145 
146     return (size_t)requiredMem;
147 }
148 
149 /*
150  * Allocates a string containing s1 and s2 concatenated. Acts like strdup.
151  * The result must be freed.
152  */
XSUM_strcatDup(const char * s1,const char * s2)153 static char* XSUM_strcatDup(const char* s1, const char* s2)
154 {
155     assert(s1 != NULL);
156     assert(s2 != NULL);
157     {   size_t len1 = strlen(s1);
158         size_t len2 = strlen(s2);
159         char* buf = (char*)malloc(len1 + len2 + 1);
160         if (buf != NULL) {
161             /* strcpy(buf, s1) */
162             memcpy(buf, s1, len1);
163             /* strcat(buf, s2) */
164             memcpy(buf + len1, s2, len2 + 1);
165         }
166         return buf;
167     }
168 }
169 
170 
171 /*
172  * A secret buffer used for benchmarking XXH3's withSecret variants.
173  *
174  * In order for the bench to be realistic, the secret buffer would need to be
175  * pre-generated.
176  *
177  * Adding a pointer to the parameter list would be messy.
178  */
179 static XSUM_U8 g_benchSecretBuf[XXH3_SECRET_SIZE_MIN];
180 
181 /*
182  * Wrappers for the benchmark.
183  *
184  * If you would like to add other hashes to the bench, create a wrapper and add
185  * it to the g_hashesToBench table. It will automatically be added.
186  */
187 typedef XSUM_U32 (*hashFunction)(const void* buffer, size_t bufferSize, XSUM_U32 seed);
188 
localXXH32(const void * buffer,size_t bufferSize,XSUM_U32 seed)189 static XSUM_U32 localXXH32(const void* buffer, size_t bufferSize, XSUM_U32 seed)
190 {
191     return XXH32(buffer, bufferSize, seed);
192 }
localXXH64(const void * buffer,size_t bufferSize,XSUM_U32 seed)193 static XSUM_U32 localXXH64(const void* buffer, size_t bufferSize, XSUM_U32 seed)
194 {
195     return (XSUM_U32)XXH64(buffer, bufferSize, seed);
196 }
localXXH3_64b(const void * buffer,size_t bufferSize,XSUM_U32 seed)197 static XSUM_U32 localXXH3_64b(const void* buffer, size_t bufferSize, XSUM_U32 seed)
198 {
199     (void)seed;
200     return (XSUM_U32)XXH3_64bits(buffer, bufferSize);
201 }
localXXH3_64b_seeded(const void * buffer,size_t bufferSize,XSUM_U32 seed)202 static XSUM_U32 localXXH3_64b_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
203 {
204     return (XSUM_U32)XXH3_64bits_withSeed(buffer, bufferSize, seed);
205 }
localXXH3_64b_secret(const void * buffer,size_t bufferSize,XSUM_U32 seed)206 static XSUM_U32 localXXH3_64b_secret(const void* buffer, size_t bufferSize, XSUM_U32 seed)
207 {
208     (void)seed;
209     return (XSUM_U32)XXH3_64bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf));
210 }
localXXH3_128b(const void * buffer,size_t bufferSize,XSUM_U32 seed)211 static XSUM_U32 localXXH3_128b(const void* buffer, size_t bufferSize, XSUM_U32 seed)
212 {
213     (void)seed;
214     return (XSUM_U32)(XXH3_128bits(buffer, bufferSize).low64);
215 }
localXXH3_128b_seeded(const void * buffer,size_t bufferSize,XSUM_U32 seed)216 static XSUM_U32 localXXH3_128b_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
217 {
218     return (XSUM_U32)(XXH3_128bits_withSeed(buffer, bufferSize, seed).low64);
219 }
localXXH3_128b_secret(const void * buffer,size_t bufferSize,XSUM_U32 seed)220 static XSUM_U32 localXXH3_128b_secret(const void* buffer, size_t bufferSize, XSUM_U32 seed)
221 {
222     (void)seed;
223     return (XSUM_U32)(XXH3_128bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf)).low64);
224 }
localXXH3_stream(const void * buffer,size_t bufferSize,XSUM_U32 seed)225 static XSUM_U32 localXXH3_stream(const void* buffer, size_t bufferSize, XSUM_U32 seed)
226 {
227     XXH3_state_t state;
228     (void)seed;
229     XXH3_64bits_reset(&state);
230     XXH3_64bits_update(&state, buffer, bufferSize);
231     return (XSUM_U32)XXH3_64bits_digest(&state);
232 }
localXXH3_stream_seeded(const void * buffer,size_t bufferSize,XSUM_U32 seed)233 static XSUM_U32 localXXH3_stream_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
234 {
235     XXH3_state_t state;
236     XXH3_INITSTATE(&state);
237     XXH3_64bits_reset_withSeed(&state, (XXH64_hash_t)seed);
238     XXH3_64bits_update(&state, buffer, bufferSize);
239     return (XSUM_U32)XXH3_64bits_digest(&state);
240 }
localXXH128_stream(const void * buffer,size_t bufferSize,XSUM_U32 seed)241 static XSUM_U32 localXXH128_stream(const void* buffer, size_t bufferSize, XSUM_U32 seed)
242 {
243     XXH3_state_t state;
244     (void)seed;
245     XXH3_128bits_reset(&state);
246     XXH3_128bits_update(&state, buffer, bufferSize);
247     return (XSUM_U32)(XXH3_128bits_digest(&state).low64);
248 }
localXXH128_stream_seeded(const void * buffer,size_t bufferSize,XSUM_U32 seed)249 static XSUM_U32 localXXH128_stream_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
250 {
251     XXH3_state_t state;
252     XXH3_INITSTATE(&state);
253     XXH3_128bits_reset_withSeed(&state, (XXH64_hash_t)seed);
254     XXH3_128bits_update(&state, buffer, bufferSize);
255     return (XSUM_U32)(XXH3_128bits_digest(&state).low64);
256 }
257 
258 
259 typedef struct {
260     const char*  name;
261     hashFunction func;
262 } hashInfo;
263 
264 #define NB_HASHFUNC 12
265 static const hashInfo g_hashesToBench[NB_HASHFUNC] = {
266     { "XXH32",             &localXXH32 },
267     { "XXH64",             &localXXH64 },
268     { "XXH3_64b",          &localXXH3_64b },
269     { "XXH3_64b w/seed",   &localXXH3_64b_seeded },
270     { "XXH3_64b w/secret", &localXXH3_64b_secret },
271     { "XXH128",            &localXXH3_128b },
272     { "XXH128 w/seed",     &localXXH3_128b_seeded },
273     { "XXH128 w/secret",   &localXXH3_128b_secret },
274     { "XXH3_stream",       &localXXH3_stream },
275     { "XXH3_stream w/seed",&localXXH3_stream_seeded },
276     { "XXH128_stream",     &localXXH128_stream },
277     { "XXH128_stream w/seed",&localXXH128_stream_seeded },
278 };
279 
280 #define NB_TESTFUNC (1 + 2 * NB_HASHFUNC)
281 static char g_testIDs[NB_TESTFUNC] = { 0 };
282 static const char k_testIDs_default[NB_TESTFUNC] = { 0,
283         1 /*XXH32*/, 0,
284         1 /*XXH64*/, 0,
285         1 /*XXH3*/, 0, 0, 0, 0, 0,
286         1 /*XXH128*/ };
287 
288 #define HASHNAME_MAX 29
XSUM_benchHash(hashFunction h,const char * hName,int testID,const void * buffer,size_t bufferSize)289 static void XSUM_benchHash(hashFunction h, const char* hName, int testID,
290                           const void* buffer, size_t bufferSize)
291 {
292     XSUM_U32 nbh_perIteration = (XSUM_U32)((300 MB) / (bufferSize+1)) + 1;  /* first iteration conservatively aims for 300 MB/s */
293     unsigned iterationNb, nbIterations = g_nbIterations + !g_nbIterations /* min 1 */;
294     double fastestH = 100000000.;
295     assert(HASHNAME_MAX > 2);
296     XSUM_logVerbose(2, "\r%80s\r", "");       /* Clean display line */
297 
298     for (iterationNb = 1; iterationNb <= nbIterations; iterationNb++) {
299         XSUM_U32 r=0;
300         clock_t cStart;
301 
302         XSUM_logVerbose(2, "%2u-%-*.*s : %10u ->\r",
303                         iterationNb,
304                         HASHNAME_MAX, HASHNAME_MAX, hName,
305                         (unsigned)bufferSize);
306         cStart = clock();
307         while (clock() == cStart);   /* starts clock() at its exact beginning */
308         cStart = clock();
309 
310         {   XSUM_U32 u;
311             for (u=0; u<nbh_perIteration; u++)
312                 r += h(buffer, bufferSize, u);
313         }
314         if (r==0) XSUM_logVerbose(3,".\r");  /* do something with r to defeat compiler "optimizing" hash away */
315 
316         {   clock_t const nbTicks = XSUM_clockSpan(cStart);
317             double const ticksPerHash = ((double)nbTicks / TIMELOOP) / nbh_perIteration;
318             /*
319              * clock() is the only decent portable timer, but it isn't very
320              * precise.
321              *
322              * Sometimes, this lack of precision is enough that the benchmark
323              * finishes before there are enough ticks to get a meaningful result.
324              *
325              * For example, on a Core 2 Duo (without any sort of Turbo Boost),
326              * the imprecise timer caused peculiar results like so:
327              *
328              *    XXH3_64b                   4800.0 MB/s // conveniently even
329              *    XXH3_64b unaligned         4800.0 MB/s
330              *    XXH3_64b seeded            9600.0 MB/s // magical 2x speedup?!
331              *    XXH3_64b seeded unaligned  4800.0 MB/s
332              *
333              * If we sense a suspiciously low number of ticks, we increase the
334              * iterations until we can get something meaningful.
335              */
336             if (nbTicks < TIMELOOP_MIN) {
337                 /* Not enough time spent in benchmarking, risk of rounding bias */
338                 if (nbTicks == 0) { /* faster than resolution timer */
339                     nbh_perIteration *= 100;
340                 } else {
341                     /*
342                      * update nbh_perIteration so that the next round lasts
343                      * approximately 1 second.
344                      */
345                     double nbh_perSecond = (1 / ticksPerHash) + 1;
346                     if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20);   /* avoid overflow */
347                     nbh_perIteration = (XSUM_U32)nbh_perSecond;
348                 }
349                 /* g_nbIterations==0 => quick evaluation, no claim of accuracy */
350                 if (g_nbIterations>0) {
351                     iterationNb--;   /* new round for a more accurate speed evaluation */
352                     continue;
353                 }
354             }
355             if (ticksPerHash < fastestH) fastestH = ticksPerHash;
356             if (fastestH>0.) { /* avoid div by zero */
357                 XSUM_logVerbose(2, "%2u-%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
358                             iterationNb,
359                             HASHNAME_MAX, HASHNAME_MAX, hName,
360                             (unsigned)bufferSize,
361                             (double)1 / fastestH,
362                             ((double)bufferSize / (1 MB)) / fastestH);
363         }   }
364         {   double nbh_perSecond = (1 / fastestH) + 1;
365             if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20);   /* avoid overflow */
366             nbh_perIteration = (XSUM_U32)nbh_perSecond;
367         }
368     }
369     XSUM_logVerbose(1, "%2i#%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \n",
370                     testID,
371                     HASHNAME_MAX, HASHNAME_MAX, hName,
372                     (unsigned)bufferSize,
373                     (double)1 / fastestH,
374                     ((double)bufferSize / (1 MB)) / fastestH);
375     if (XSUM_logLevel<1)
376         XSUM_logVerbose(0, "%u, ", (unsigned)((double)1 / fastestH));
377 }
378 
379 
380 /*!
381  * XSUM_benchMem():
382  * buffer: Must be 16-byte aligned.
383  * The real allocated size of buffer is supposed to be >= (bufferSize+3).
384  * returns: 0 on success, 1 if error (invalid mode selected)
385  */
XSUM_benchMem(const void * buffer,size_t bufferSize)386 static void XSUM_benchMem(const void* buffer, size_t bufferSize)
387 {
388     assert((((size_t)buffer) & 15) == 0);  /* ensure alignment */
389     XSUM_fillTestBuffer(g_benchSecretBuf, sizeof(g_benchSecretBuf));
390     {   int i;
391         for (i = 1; i < NB_TESTFUNC; i++) {
392             int const hashFuncID = (i-1) / 2;
393             assert(g_hashesToBench[hashFuncID].name != NULL);
394             if (g_testIDs[i] == 0) continue;
395             /* aligned */
396             if ((i % 2) == 1) {
397                 XSUM_benchHash(g_hashesToBench[hashFuncID].func, g_hashesToBench[hashFuncID].name, i, buffer, bufferSize);
398             }
399             /* unaligned */
400             if ((i % 2) == 0) {
401                 /* Append "unaligned". */
402                 char* const hashNameBuf = XSUM_strcatDup(g_hashesToBench[hashFuncID].name, " unaligned");
403                 assert(hashNameBuf != NULL);
404                 XSUM_benchHash(g_hashesToBench[hashFuncID].func, hashNameBuf, i, ((const char*)buffer)+3, bufferSize);
405                 free(hashNameBuf);
406             }
407     }   }
408 }
409 
XSUM_selectBenchedSize(const char * fileName)410 static size_t XSUM_selectBenchedSize(const char* fileName)
411 {
412     XSUM_U64 const inFileSize = XSUM_getFileSize(fileName);
413     size_t benchedSize = (size_t) XSUM_findMaxMem(inFileSize);
414     if ((XSUM_U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
415     if (benchedSize < inFileSize) {
416         XSUM_log("Not enough memory for '%s' full size; testing %i MB only...\n", fileName, (int)(benchedSize>>20));
417     }
418     return benchedSize;
419 }
420 
421 
XSUM_benchFiles(char * const * fileNamesTable,int nbFiles)422 static int XSUM_benchFiles(char*const* fileNamesTable, int nbFiles)
423 {
424     int fileIdx;
425     for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
426         const char* const inFileName = fileNamesTable[fileIdx];
427         assert(inFileName != NULL);
428 
429         {   FILE* const inFile = XSUM_fopen( inFileName, "rb" );
430             size_t const benchedSize = XSUM_selectBenchedSize(inFileName);
431             char* const buffer = (char*)calloc(benchedSize+16+3, 1);
432             void* const alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF);  /* align on next 16 bytes */
433 
434             /* Checks */
435             if (inFile==NULL){
436                 XSUM_log("Error: Could not open '%s': %s.\n", inFileName, strerror(errno));
437                 free(buffer);
438                 exit(11);
439             }
440             if(!buffer) {
441                 XSUM_log("\nError: Out of memory.\n");
442                 fclose(inFile);
443                 exit(12);
444             }
445 
446             /* Fill input buffer */
447             {   size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile);
448                 fclose(inFile);
449                 if(readSize != benchedSize) {
450                     XSUM_log("\nError: Could not read '%s': %s.\n", inFileName, strerror(errno));
451                     free(buffer);
452                     exit(13);
453             }   }
454 
455             /* bench */
456             XSUM_benchMem(alignedBuffer, benchedSize);
457 
458             free(buffer);
459     }   }
460     return 0;
461 }
462 
463 
XSUM_benchInternal(size_t keySize)464 static int XSUM_benchInternal(size_t keySize)
465 {
466     void* const buffer = calloc(keySize+16+3, 1);
467     if (buffer == NULL) {
468         XSUM_log("\nError: Out of memory.\n");
469         exit(12);
470     }
471 
472     {   const void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF);  /* align on next 16 bytes */
473 
474         /* bench */
475         XSUM_logVerbose(1, "Sample of ");
476         if (keySize > 10 KB) {
477             XSUM_logVerbose(1, "%u KB", (unsigned)(keySize >> 10));
478         } else {
479             XSUM_logVerbose(1, "%u bytes", (unsigned)keySize);
480         }
481         XSUM_logVerbose(1, "...        \n");
482 
483         XSUM_benchMem(alignedBuffer, keySize);
484         free(buffer);
485     }
486     return 0;
487 }
488 
489 /* ********************************************************
490 *  File Hashing
491 **********************************************************/
492 
493 /* for support of --little-endian display mode */
XSUM_display_LittleEndian(const void * ptr,size_t length)494 static void XSUM_display_LittleEndian(const void* ptr, size_t length)
495 {
496     const XSUM_U8* const p = (const XSUM_U8*)ptr;
497     size_t idx;
498     for (idx=length-1; idx<length; idx--)    /* intentional underflow to negative to detect end */
499         XSUM_output("%02x", p[idx]);
500 }
501 
XSUM_display_BigEndian(const void * ptr,size_t length)502 static void XSUM_display_BigEndian(const void* ptr, size_t length)
503 {
504     const XSUM_U8* const p = (const XSUM_U8*)ptr;
505     size_t idx;
506     for (idx=0; idx<length; idx++)
507         XSUM_output("%02x", p[idx]);
508 }
509 
510 typedef union {
511     XXH32_hash_t   xxh32;
512     XXH64_hash_t   xxh64;
513     XXH128_hash_t xxh128;
514 } Multihash;
515 
516 /*
517  * XSUM_hashStream:
518  * Reads data from `inFile`, generating an incremental hash of type hashType,
519  * using `buffer` of size `blockSize` for temporary storage.
520  */
521 static Multihash
XSUM_hashStream(FILE * inFile,AlgoSelected hashType,void * buffer,size_t blockSize)522 XSUM_hashStream(FILE* inFile,
523                 AlgoSelected hashType,
524                 void* buffer, size_t blockSize)
525 {
526     XXH32_state_t state32;
527     XXH64_state_t state64;
528     XXH3_state_t state128;
529 
530     /* Init */
531     (void)XXH32_reset(&state32, XXHSUM32_DEFAULT_SEED);
532     (void)XXH64_reset(&state64, XXHSUM64_DEFAULT_SEED);
533     (void)XXH3_128bits_reset(&state128);
534 
535     /* Load file & update hash */
536     {   size_t readSize;
537         while ((readSize = fread(buffer, 1, blockSize, inFile)) > 0) {
538             switch(hashType)
539             {
540             case algo_xxh32:
541                 (void)XXH32_update(&state32, buffer, readSize);
542                 break;
543             case algo_xxh64:
544                 (void)XXH64_update(&state64, buffer, readSize);
545                 break;
546             case algo_xxh128:
547                 (void)XXH3_128bits_update(&state128, buffer, readSize);
548                 break;
549             default:
550                 assert(0);
551             }
552         }
553         if (ferror(inFile)) {
554             XSUM_log("Error: a failure occurred reading the input file.\n");
555             exit(1);
556     }   }
557 
558     {   Multihash finalHash = {0};
559         switch(hashType)
560         {
561         case algo_xxh32:
562             finalHash.xxh32 = XXH32_digest(&state32);
563             break;
564         case algo_xxh64:
565             finalHash.xxh64 = XXH64_digest(&state64);
566             break;
567         case algo_xxh128:
568             finalHash.xxh128 = XXH3_128bits_digest(&state128);
569             break;
570         default:
571             assert(0);
572         }
573         return finalHash;
574     }
575 }
576 
577                                        /* algo_xxh32, algo_xxh64, algo_xxh128 */
578 static const char* XSUM_algoName[] =    { "XXH32",    "XXH64",    "XXH128" };
579 static const char* XSUM_algoLE_name[] = { "XXH32_LE", "XXH64_LE", "XXH128_LE" };
580 static const size_t XSUM_algoLength[] = { 4,          8,          16 };
581 
582 #define XSUM_TABLE_ELT_SIZE(table)   (sizeof(table) / sizeof(*table))
583 
584 typedef void (*XSUM_displayHash_f)(const void*, size_t);  /* display function signature */
585 
XSUM_printLine_BSD_internal(const char * filename,const void * canonicalHash,const AlgoSelected hashType,const char * algoString[],XSUM_displayHash_f f_displayHash)586 static void XSUM_printLine_BSD_internal(const char* filename,
587                                         const void* canonicalHash, const AlgoSelected hashType,
588                                         const char* algoString[],
589                                         XSUM_displayHash_f f_displayHash)
590 {
591     assert(0 <= hashType && hashType <= XSUM_TABLE_ELT_SIZE(XSUM_algoName));
592     {   const char* const typeString = algoString[hashType];
593         const size_t hashLength = XSUM_algoLength[hashType];
594         XSUM_output("%s (%s) = ", typeString, filename);
595         f_displayHash(canonicalHash, hashLength);
596         XSUM_output("\n");
597 }   }
598 
XSUM_printLine_BSD_LE(const char * filename,const void * canonicalHash,const AlgoSelected hashType)599 static void XSUM_printLine_BSD_LE(const char* filename, const void* canonicalHash, const AlgoSelected hashType)
600 {
601     XSUM_printLine_BSD_internal(filename, canonicalHash, hashType, XSUM_algoLE_name, XSUM_display_LittleEndian);
602 }
603 
XSUM_printLine_BSD(const char * filename,const void * canonicalHash,const AlgoSelected hashType)604 static void XSUM_printLine_BSD(const char* filename, const void* canonicalHash, const AlgoSelected hashType)
605 {
606     XSUM_printLine_BSD_internal(filename, canonicalHash, hashType, XSUM_algoName, XSUM_display_BigEndian);
607 }
608 
XSUM_printLine_GNU_internal(const char * filename,const void * canonicalHash,const AlgoSelected hashType,XSUM_displayHash_f f_displayHash)609 static void XSUM_printLine_GNU_internal(const char* filename,
610                                const void* canonicalHash, const AlgoSelected hashType,
611                                XSUM_displayHash_f f_displayHash)
612 {
613     assert(0 <= hashType && hashType <= XSUM_TABLE_ELT_SIZE(XSUM_algoName));
614     {   const size_t hashLength = XSUM_algoLength[hashType];
615         f_displayHash(canonicalHash, hashLength);
616         XSUM_output("  %s\n", filename);
617 }   }
618 
XSUM_printLine_GNU(const char * filename,const void * canonicalHash,const AlgoSelected hashType)619 static void XSUM_printLine_GNU(const char* filename,
620                                const void* canonicalHash, const AlgoSelected hashType)
621 {
622     XSUM_printLine_GNU_internal(filename, canonicalHash, hashType, XSUM_display_BigEndian);
623 }
624 
XSUM_printLine_GNU_LE(const char * filename,const void * canonicalHash,const AlgoSelected hashType)625 static void XSUM_printLine_GNU_LE(const char* filename,
626                                   const void* canonicalHash, const AlgoSelected hashType)
627 {
628     XSUM_printLine_GNU_internal(filename, canonicalHash, hashType, XSUM_display_LittleEndian);
629 }
630 
631 typedef enum { big_endian, little_endian} Display_endianess;
632 
633 typedef enum { display_gnu, display_bsd } Display_convention;
634 
635 typedef void (*XSUM_displayLine_f)(const char*, const void*, AlgoSelected);  /* line display signature */
636 
637 static XSUM_displayLine_f XSUM_kDisplayLine_fTable[2][2] = {
638     { XSUM_printLine_GNU, XSUM_printLine_GNU_LE },
639     { XSUM_printLine_BSD, XSUM_printLine_BSD_LE }
640 };
641 
XSUM_hashFile(const char * fileName,const AlgoSelected hashType,const Display_endianess displayEndianess,const Display_convention convention)642 static int XSUM_hashFile(const char* fileName,
643                          const AlgoSelected hashType,
644                          const Display_endianess displayEndianess,
645                          const Display_convention convention)
646 {
647     size_t const blockSize = 64 KB;
648     XSUM_displayLine_f const f_displayLine = XSUM_kDisplayLine_fTable[convention][displayEndianess];
649     FILE* inFile;
650     Multihash hashValue;
651     assert(displayEndianess==big_endian || displayEndianess==little_endian);
652     assert(convention==display_gnu || convention==display_bsd);
653 
654     /* Check file existence */
655     if (fileName == stdinName) {
656         inFile = stdin;
657         fileName = "stdin";
658         XSUM_setBinaryMode(stdin);
659     } else {
660         if (XSUM_isDirectory(fileName)) {
661             XSUM_log("xxhsum: %s: Is a directory \n", fileName);
662             return 1;
663         }
664         inFile = XSUM_fopen( fileName, "rb" );
665         if (inFile==NULL) {
666             XSUM_log("Error: Could not open '%s': %s. \n", fileName, strerror(errno));
667             return 1;
668     }   }
669 
670     /* Memory allocation & streaming */
671     {   void* const buffer = malloc(blockSize);
672         if (buffer == NULL) {
673             XSUM_log("\nError: Out of memory.\n");
674             fclose(inFile);
675             return 1;
676         }
677 
678         /* Stream file & update hash */
679         hashValue = XSUM_hashStream(inFile, hashType, buffer, blockSize);
680 
681         fclose(inFile);
682         free(buffer);
683     }
684 
685     /* display Hash value in selected format */
686     switch(hashType)
687     {
688     case algo_xxh32:
689         {   XXH32_canonical_t hcbe32;
690             (void)XXH32_canonicalFromHash(&hcbe32, hashValue.xxh32);
691             f_displayLine(fileName, &hcbe32, hashType);
692             break;
693         }
694     case algo_xxh64:
695         {   XXH64_canonical_t hcbe64;
696             (void)XXH64_canonicalFromHash(&hcbe64, hashValue.xxh64);
697             f_displayLine(fileName, &hcbe64, hashType);
698             break;
699         }
700     case algo_xxh128:
701         {   XXH128_canonical_t hcbe128;
702             (void)XXH128_canonicalFromHash(&hcbe128, hashValue.xxh128);
703             f_displayLine(fileName, &hcbe128, hashType);
704             break;
705         }
706     default:
707         assert(0);  /* not possible */
708     }
709 
710     return 0;
711 }
712 
713 
714 /*
715  * XSUM_hashFiles:
716  * If fnTotal==0, read from stdin instead.
717  */
XSUM_hashFiles(char * const * fnList,int fnTotal,AlgoSelected hashType,Display_endianess displayEndianess,Display_convention convention)718 static int XSUM_hashFiles(char*const * fnList, int fnTotal,
719                           AlgoSelected hashType,
720                           Display_endianess displayEndianess,
721                           Display_convention convention)
722 {
723     int fnNb;
724     int result = 0;
725 
726     if (fnTotal==0)
727         return XSUM_hashFile(stdinName, hashType, displayEndianess, convention);
728 
729     for (fnNb=0; fnNb<fnTotal; fnNb++)
730         result |= XSUM_hashFile(fnList[fnNb], hashType, displayEndianess, convention);
731     XSUM_logVerbose(2, "\r%70s\r", "");
732     return result;
733 }
734 
735 
736 typedef enum {
737     GetLine_ok,
738     GetLine_eof,
739     GetLine_exceedMaxLineLength,
740     GetLine_outOfMemory
741 } GetLineResult;
742 
743 typedef enum {
744     CanonicalFromString_ok,
745     CanonicalFromString_invalidFormat
746 } CanonicalFromStringResult;
747 
748 typedef enum {
749     ParseLine_ok,
750     ParseLine_invalidFormat
751 } ParseLineResult;
752 
753 typedef enum {
754     LineStatus_hashOk,
755     LineStatus_hashFailed,
756     LineStatus_failedToOpen
757 } LineStatus;
758 
759 typedef union {
760     XXH32_canonical_t xxh32;
761     XXH64_canonical_t xxh64;
762     XXH128_canonical_t xxh128;
763 } Canonical;
764 
765 typedef struct {
766     Canonical   canonical;
767     const char* filename;
768     int         xxhBits;    /* canonical type: 32:xxh32, 64:xxh64, 128:xxh128 */
769 } ParsedLine;
770 
771 typedef struct {
772     unsigned long   nProperlyFormattedLines;
773     unsigned long   nImproperlyFormattedLines;
774     unsigned long   nMismatchedChecksums;
775     unsigned long   nOpenOrReadFailures;
776     unsigned long   nMixedFormatLines;
777     int             quit;
778 } ParseFileReport;
779 
780 typedef struct {
781     const char*     inFileName;
782     FILE*           inFile;
783     int             lineMax;
784     char*           lineBuf;
785     size_t          blockSize;
786     char*           blockBuf;
787     XSUM_U32             strictMode;
788     XSUM_U32             statusOnly;
789     XSUM_U32             warn;
790     XSUM_U32             quiet;
791     ParseFileReport report;
792 } ParseFileArg;
793 
794 
795 /*
796  * Reads a line from stream `inFile`.
797  * Returns GetLine_ok, if it reads line successfully.
798  * Returns GetLine_eof, if stream reaches EOF.
799  * Returns GetLine_exceedMaxLineLength, if line length is longer than MAX_LINE_LENGTH.
800  * Returns GetLine_outOfMemory, if line buffer memory allocation failed.
801  */
XSUM_getLine(char ** lineBuf,int * lineMax,FILE * inFile)802 static GetLineResult XSUM_getLine(char** lineBuf, int* lineMax, FILE* inFile)
803 {
804     GetLineResult result = GetLine_ok;
805     size_t len = 0;
806 
807     if ((*lineBuf == NULL) || (*lineMax<1)) {
808         free(*lineBuf);  /* in case it's != NULL */
809         *lineMax = 0;
810         *lineBuf = (char*)malloc(DEFAULT_LINE_LENGTH);
811         if(*lineBuf == NULL) return GetLine_outOfMemory;
812         *lineMax = DEFAULT_LINE_LENGTH;
813     }
814 
815     for (;;) {
816         const int c = fgetc(inFile);
817         if (c == EOF) {
818             /*
819              * If we meet EOF before first character, returns GetLine_eof,
820              * otherwise GetLine_ok.
821              */
822             if (len == 0) result = GetLine_eof;
823             break;
824         }
825 
826         /* Make enough space for len+1 (for final NUL) bytes. */
827         if (len+1 >= (size_t)*lineMax) {
828             char* newLineBuf = NULL;
829             size_t newBufSize = (size_t)*lineMax;
830 
831             newBufSize += (newBufSize/2) + 1; /* x 1.5 */
832             if (newBufSize > MAX_LINE_LENGTH) newBufSize = MAX_LINE_LENGTH;
833             if (len+1 >= newBufSize) return GetLine_exceedMaxLineLength;
834 
835             newLineBuf = (char*) realloc(*lineBuf, newBufSize);
836             if (newLineBuf == NULL) return GetLine_outOfMemory;
837 
838             *lineBuf = newLineBuf;
839             *lineMax = (int)newBufSize;
840         }
841 
842         if (c == '\n') break;
843         (*lineBuf)[len++] = (char) c;
844     }
845 
846     (*lineBuf)[len] = '\0';
847     return result;
848 }
849 
850 
851 /*
852  * Converts one hexadecimal character to integer.
853  * Returns -1 if the given character is not hexadecimal.
854  */
charToHex(char c)855 static int charToHex(char c)
856 {
857     int result = -1;
858     if (c >= '0' && c <= '9') {
859         result = (int) (c - '0');
860     } else if (c >= 'A' && c <= 'F') {
861         result = (int) (c - 'A') + 0x0a;
862     } else if (c >= 'a' && c <= 'f') {
863         result = (int) (c - 'a') + 0x0a;
864     }
865     return result;
866 }
867 
868 
869 /*
870  * Converts canonical ASCII hexadecimal string `hashStr`
871  * to the big endian binary representation in unsigned char array `dst`.
872  *
873  * Returns CanonicalFromString_invalidFormat if hashStr is not well formatted.
874  * Returns CanonicalFromString_ok if hashStr is parsed successfully.
875  */
XSUM_canonicalFromString(unsigned char * dst,size_t dstSize,const char * hashStr,int reverseBytes)876 static CanonicalFromStringResult XSUM_canonicalFromString(unsigned char* dst,
877                                                           size_t dstSize,
878                                                           const char* hashStr,
879                                                           int reverseBytes)
880 {
881     size_t i;
882     for (i = 0; i < dstSize; ++i) {
883         int h0, h1;
884         size_t j = reverseBytes ? dstSize - i - 1 : i;
885 
886         h0 = charToHex(hashStr[j*2 + 0]);
887         if (h0 < 0) return CanonicalFromString_invalidFormat;
888 
889         h1 = charToHex(hashStr[j*2 + 1]);
890         if (h1 < 0) return CanonicalFromString_invalidFormat;
891 
892         dst[i] = (unsigned char) ((h0 << 4) | h1);
893     }
894     return CanonicalFromString_ok;
895 }
896 
897 
898 /*
899  * Parse single line of xxHash checksum file.
900  * Returns ParseLine_invalidFormat if the line is not well formatted.
901  * Returns ParseLine_ok if the line is parsed successfully.
902  * And members of XSUM_parseLine will be filled by parsed values.
903  *
904  *  - line must be terminated with '\0' without a trailing newline.
905  *  - Since parsedLine.filename will point within given argument `line`,
906  *    users must keep `line`s content when they are using parsedLine.
907  *  - The line may be modified to carve up the information it contains.
908  *
909  * xxHash checksum lines should have the following format:
910  *
911  *      <8, 16, or 32 hexadecimal char> <space> <space> <filename...> <'\0'>
912  *
913  * or:
914  *
915  *      <algorithm> <' ('> <filename> <') = '> <hexstring> <'\0'>
916  */
XSUM_parseLine(ParsedLine * parsedLine,char * line,int rev)917 static ParseLineResult XSUM_parseLine(ParsedLine* parsedLine, char* line, int rev)
918 {
919     char* const firstSpace = strchr(line, ' ');
920     const char* hash_ptr;
921     size_t hash_len;
922 
923     parsedLine->filename = NULL;
924     parsedLine->xxhBits = 0;
925 
926     if (firstSpace == NULL || !firstSpace[1]) return ParseLine_invalidFormat;
927 
928     if (firstSpace[1] == '(') {
929         char* lastSpace = strrchr(line, ' ');
930         if (lastSpace - firstSpace < 5) return ParseLine_invalidFormat;
931         if (lastSpace[-1] != '=' || lastSpace[-2] != ' ' || lastSpace[-3] != ')') return ParseLine_invalidFormat;
932         lastSpace[-3] = '\0'; /* Terminate the filename */
933         *firstSpace = '\0';
934         rev = strstr(line, "_LE") != NULL; /* was output little-endian */
935         hash_ptr = lastSpace + 1;
936         hash_len = strlen(hash_ptr);
937         /* NOTE: This currently ignores the hash description at the start of the string.
938          * In the future we should parse it and verify that it matches the hash length.
939          * It could also be used to allow both XXH64 & XXH3_64bits to be differentiated. */
940     } else {
941         hash_ptr = line;
942         hash_len = (size_t)(firstSpace - line);
943     }
944 
945     switch (hash_len)
946     {
947     case 8:
948         {   XXH32_canonical_t* xxh32c = &parsedLine->canonical.xxh32;
949             if (XSUM_canonicalFromString(xxh32c->digest, sizeof(xxh32c->digest), hash_ptr, rev)
950                 != CanonicalFromString_ok) {
951                 return ParseLine_invalidFormat;
952             }
953             parsedLine->xxhBits = 32;
954             break;
955         }
956 
957     case 16:
958         {   XXH64_canonical_t* xxh64c = &parsedLine->canonical.xxh64;
959             if (XSUM_canonicalFromString(xxh64c->digest, sizeof(xxh64c->digest), hash_ptr, rev)
960                 != CanonicalFromString_ok) {
961                 return ParseLine_invalidFormat;
962             }
963             parsedLine->xxhBits = 64;
964             break;
965         }
966 
967     case 32:
968         {   XXH128_canonical_t* xxh128c = &parsedLine->canonical.xxh128;
969             if (XSUM_canonicalFromString(xxh128c->digest, sizeof(xxh128c->digest), hash_ptr, rev)
970                 != CanonicalFromString_ok) {
971                 return ParseLine_invalidFormat;
972             }
973             parsedLine->xxhBits = 128;
974             break;
975         }
976 
977     default:
978             return ParseLine_invalidFormat;
979             break;
980     }
981 
982     /* note : skipping second separation character, which can be anything,
983      * allowing insertion of custom markers such as '*' */
984     parsedLine->filename = firstSpace + 2;
985     return ParseLine_ok;
986 }
987 
988 
989 /*!
990  * Parse xxHash checksum file.
991  */
XSUM_parseFile1(ParseFileArg * XSUM_parseFileArg,int rev)992 static void XSUM_parseFile1(ParseFileArg* XSUM_parseFileArg, int rev)
993 {
994     const char* const inFileName = XSUM_parseFileArg->inFileName;
995     ParseFileReport* const report = &XSUM_parseFileArg->report;
996 
997     unsigned long lineNumber = 0;
998     memset(report, 0, sizeof(*report));
999 
1000     while (!report->quit) {
1001         LineStatus lineStatus = LineStatus_hashFailed;
1002         ParsedLine parsedLine;
1003         memset(&parsedLine, 0, sizeof(parsedLine));
1004 
1005         lineNumber++;
1006         if (lineNumber == 0) {
1007             /* This is unlikely happen, but md5sum.c has this error check. */
1008             XSUM_log("%s: Error: Too many checksum lines\n", inFileName);
1009             report->quit = 1;
1010             break;
1011         }
1012 
1013         {   GetLineResult const XSUM_getLineResult = XSUM_getLine(&XSUM_parseFileArg->lineBuf,
1014                                                         &XSUM_parseFileArg->lineMax,
1015                                                          XSUM_parseFileArg->inFile);
1016             if (XSUM_getLineResult != GetLine_ok) {
1017                 if (XSUM_getLineResult == GetLine_eof) break;
1018 
1019                 switch (XSUM_getLineResult)
1020                 {
1021                 case GetLine_ok:
1022                 case GetLine_eof:
1023                     /* These cases never happen.  See above XSUM_getLineResult related "if"s.
1024                        They exist just for make gcc's -Wswitch-enum happy. */
1025                     assert(0);
1026                     break;
1027 
1028                 default:
1029                     XSUM_log("%s:%lu: Error: Unknown error.\n", inFileName, lineNumber);
1030                     break;
1031 
1032                 case GetLine_exceedMaxLineLength:
1033                     XSUM_log("%s:%lu: Error: Line too long.\n", inFileName, lineNumber);
1034                     break;
1035 
1036                 case GetLine_outOfMemory:
1037                     XSUM_log("%s:%lu: Error: Out of memory.\n", inFileName, lineNumber);
1038                     break;
1039                 }
1040                 report->quit = 1;
1041                 break;
1042         }   }
1043 
1044         if (XSUM_parseLine(&parsedLine, XSUM_parseFileArg->lineBuf, rev) != ParseLine_ok) {
1045             report->nImproperlyFormattedLines++;
1046             if (XSUM_parseFileArg->warn) {
1047                 XSUM_log("%s:%lu: Error: Improperly formatted checksum line.\n",
1048                         inFileName, lineNumber);
1049             }
1050             continue;
1051         }
1052 
1053         report->nProperlyFormattedLines++;
1054 
1055         do {
1056             FILE* const fp = XSUM_fopen(parsedLine.filename, "rb");
1057             if (fp == NULL) {
1058                 lineStatus = LineStatus_failedToOpen;
1059                 break;
1060             }
1061             lineStatus = LineStatus_hashFailed;
1062             switch (parsedLine.xxhBits)
1063             {
1064             case 32:
1065                 {   Multihash const xxh = XSUM_hashStream(fp, algo_xxh32, XSUM_parseFileArg->blockBuf, XSUM_parseFileArg->blockSize);
1066                     if (xxh.xxh32 == XXH32_hashFromCanonical(&parsedLine.canonical.xxh32)) {
1067                         lineStatus = LineStatus_hashOk;
1068                 }   }
1069                 break;
1070 
1071             case 64:
1072                 {   Multihash const xxh = XSUM_hashStream(fp, algo_xxh64, XSUM_parseFileArg->blockBuf, XSUM_parseFileArg->blockSize);
1073                     if (xxh.xxh64 == XXH64_hashFromCanonical(&parsedLine.canonical.xxh64)) {
1074                         lineStatus = LineStatus_hashOk;
1075                 }   }
1076                 break;
1077 
1078             case 128:
1079                 {   Multihash const xxh = XSUM_hashStream(fp, algo_xxh128, XSUM_parseFileArg->blockBuf, XSUM_parseFileArg->blockSize);
1080                     if (XXH128_isEqual(xxh.xxh128, XXH128_hashFromCanonical(&parsedLine.canonical.xxh128))) {
1081                         lineStatus = LineStatus_hashOk;
1082                 }   }
1083                 break;
1084 
1085             default:
1086                 break;
1087             }
1088             fclose(fp);
1089         } while (0);
1090 
1091         switch (lineStatus)
1092         {
1093         default:
1094             XSUM_log("%s: Error: Unknown error.\n", inFileName);
1095             report->quit = 1;
1096             break;
1097 
1098         case LineStatus_failedToOpen:
1099             report->nOpenOrReadFailures++;
1100             if (!XSUM_parseFileArg->statusOnly) {
1101                 XSUM_output("%s:%lu: Could not open or read '%s': %s.\n",
1102                     inFileName, lineNumber, parsedLine.filename, strerror(errno));
1103             }
1104             break;
1105 
1106         case LineStatus_hashOk:
1107         case LineStatus_hashFailed:
1108             {   int b = 1;
1109                 if (lineStatus == LineStatus_hashOk) {
1110                     /* If --quiet is specified, don't display "OK" */
1111                     if (XSUM_parseFileArg->quiet) b = 0;
1112                 } else {
1113                     report->nMismatchedChecksums++;
1114                 }
1115 
1116                 if (b && !XSUM_parseFileArg->statusOnly) {
1117                     XSUM_output("%s: %s\n", parsedLine.filename
1118                         , lineStatus == LineStatus_hashOk ? "OK" : "FAILED");
1119             }   }
1120             break;
1121         }
1122     }   /* while (!report->quit) */
1123 }
1124 
1125 
1126 /*  Parse xxHash checksum file.
1127  *  Returns 1, if all procedures were succeeded.
1128  *  Returns 0, if any procedures was failed.
1129  *
1130  *  If strictMode != 0, return error code if any line is invalid.
1131  *  If statusOnly != 0, don't generate any output.
1132  *  If warn != 0, print a warning message to stderr.
1133  *  If quiet != 0, suppress "OK" line.
1134  *
1135  *  "All procedures are succeeded" means:
1136  *    - Checksum file contains at least one line and less than SIZE_T_MAX lines.
1137  *    - All files are properly opened and read.
1138  *    - All hash values match with its content.
1139  *    - (strict mode) All lines in checksum file are consistent and well formatted.
1140  */
XSUM_checkFile(const char * inFileName,const Display_endianess displayEndianess,XSUM_U32 strictMode,XSUM_U32 statusOnly,XSUM_U32 warn,XSUM_U32 quiet)1141 static int XSUM_checkFile(const char* inFileName,
1142                           const Display_endianess displayEndianess,
1143                           XSUM_U32 strictMode,
1144                           XSUM_U32 statusOnly,
1145                           XSUM_U32 warn,
1146                           XSUM_U32 quiet)
1147 {
1148     int result = 0;
1149     FILE* inFile = NULL;
1150     ParseFileArg XSUM_parseFileArgBody;
1151     ParseFileArg* const XSUM_parseFileArg = &XSUM_parseFileArgBody;
1152     ParseFileReport* const report = &XSUM_parseFileArg->report;
1153 
1154     /* note: stdinName is special constant pointer.  It is not a string. */
1155     if (inFileName == stdinName) {
1156         /*
1157          * Note: Since we expect text input for xxhash -c mode,
1158          * we don't set binary mode for stdin.
1159          */
1160         inFileName = "stdin";
1161         inFile = stdin;
1162     } else {
1163         inFile = XSUM_fopen( inFileName, "rt" );
1164     }
1165 
1166     if (inFile == NULL) {
1167         XSUM_log("Error: Could not open '%s': %s\n", inFileName, strerror(errno));
1168         return 0;
1169     }
1170 
1171     XSUM_parseFileArg->inFileName  = inFileName;
1172     XSUM_parseFileArg->inFile      = inFile;
1173     XSUM_parseFileArg->lineMax     = DEFAULT_LINE_LENGTH;
1174     XSUM_parseFileArg->lineBuf     = (char*) malloc((size_t)XSUM_parseFileArg->lineMax);
1175     XSUM_parseFileArg->blockSize   = 64 * 1024;
1176     XSUM_parseFileArg->blockBuf    = (char*) malloc(XSUM_parseFileArg->blockSize);
1177     XSUM_parseFileArg->strictMode  = strictMode;
1178     XSUM_parseFileArg->statusOnly  = statusOnly;
1179     XSUM_parseFileArg->warn        = warn;
1180     XSUM_parseFileArg->quiet       = quiet;
1181 
1182     if ( (XSUM_parseFileArg->lineBuf == NULL)
1183       || (XSUM_parseFileArg->blockBuf == NULL) ) {
1184         XSUM_log("Error: : memory allocation failed \n");
1185         exit(1);
1186     }
1187     XSUM_parseFile1(XSUM_parseFileArg, displayEndianess != big_endian);
1188 
1189     free(XSUM_parseFileArg->blockBuf);
1190     free(XSUM_parseFileArg->lineBuf);
1191 
1192     if (inFile != stdin) fclose(inFile);
1193 
1194     /* Show error/warning messages.  All messages are copied from md5sum.c
1195      */
1196     if (report->nProperlyFormattedLines == 0) {
1197         XSUM_log("%s: no properly formatted xxHash checksum lines found\n", inFileName);
1198     } else if (!statusOnly) {
1199         if (report->nImproperlyFormattedLines) {
1200             XSUM_output("%lu %s improperly formatted\n"
1201                 , report->nImproperlyFormattedLines
1202                 , report->nImproperlyFormattedLines == 1 ? "line is" : "lines are");
1203         }
1204         if (report->nOpenOrReadFailures) {
1205             XSUM_output("%lu listed %s could not be read\n"
1206                 , report->nOpenOrReadFailures
1207                 , report->nOpenOrReadFailures == 1 ? "file" : "files");
1208         }
1209         if (report->nMismatchedChecksums) {
1210             XSUM_output("%lu computed %s did NOT match\n"
1211                 , report->nMismatchedChecksums
1212                 , report->nMismatchedChecksums == 1 ? "checksum" : "checksums");
1213     }   }
1214 
1215     /* Result (exit) code logic is copied from
1216      * gnu coreutils/src/md5sum.c digest_check() */
1217     result =   report->nProperlyFormattedLines != 0
1218             && report->nMismatchedChecksums == 0
1219             && report->nOpenOrReadFailures == 0
1220             && (!strictMode || report->nImproperlyFormattedLines == 0)
1221             && report->quit == 0;
1222     return result;
1223 }
1224 
1225 
XSUM_checkFiles(char * const * fnList,int fnTotal,const Display_endianess displayEndianess,XSUM_U32 strictMode,XSUM_U32 statusOnly,XSUM_U32 warn,XSUM_U32 quiet)1226 static int XSUM_checkFiles(char*const* fnList, int fnTotal,
1227                            const Display_endianess displayEndianess,
1228                            XSUM_U32 strictMode,
1229                            XSUM_U32 statusOnly,
1230                            XSUM_U32 warn,
1231                            XSUM_U32 quiet)
1232 {
1233     int ok = 1;
1234 
1235     /* Special case for stdinName "-",
1236      * note: stdinName is not a string.  It's special pointer. */
1237     if (fnTotal==0) {
1238         ok &= XSUM_checkFile(stdinName, displayEndianess, strictMode, statusOnly, warn, quiet);
1239     } else {
1240         int fnNb;
1241         for (fnNb=0; fnNb<fnTotal; fnNb++)
1242             ok &= XSUM_checkFile(fnList[fnNb], displayEndianess, strictMode, statusOnly, warn, quiet);
1243     }
1244     return ok ? 0 : 1;
1245 }
1246 
1247 
1248 /* ********************************************************
1249 *  Main
1250 **********************************************************/
1251 
XSUM_usage(const char * exename)1252 static int XSUM_usage(const char* exename)
1253 {
1254     XSUM_log( WELCOME_MESSAGE(exename) );
1255     XSUM_log( "Print or verify checksums using fast non-cryptographic algorithm xxHash \n\n" );
1256     XSUM_log( "Usage: %s [options] [files] \n\n", exename);
1257     XSUM_log( "When no filename provided or when '-' is provided, uses stdin as input. \n");
1258     XSUM_log( "Options: \n");
1259     XSUM_log( "  -H#         algorithm selection: 0,1,2 or 32,64,128 (default: %i) \n", (int)g_defaultAlgo);
1260     XSUM_log( "  -c, --check read xxHash checksum from [files] and check them \n");
1261     XSUM_log( "  -h, --help  display a long help page about advanced options \n");
1262     return 0;
1263 }
1264 
1265 
XSUM_usage_advanced(const char * exename)1266 static int XSUM_usage_advanced(const char* exename)
1267 {
1268     XSUM_usage(exename);
1269     XSUM_log( "Advanced :\n");
1270     XSUM_log( "  -V, --version        Display version information \n");
1271     XSUM_log( "      --tag            Produce BSD-style checksum lines \n");
1272     XSUM_log( "      --little-endian  Checksum values use little endian convention (default: big endian) \n");
1273     XSUM_log( "  -b                   Run benchmark \n");
1274     XSUM_log( "  -b#                  Bench only algorithm variant # \n");
1275     XSUM_log( "  -i#                  Number of times to run the benchmark (default: %u) \n", (unsigned)g_nbIterations);
1276     XSUM_log( "  -q, --quiet          Don't display version header in benchmark mode \n");
1277     XSUM_log( "\n");
1278     XSUM_log( "The following four options are useful only when verifying checksums (-c): \n");
1279     XSUM_log( "  -q, --quiet          Don't print OK for each successfully verified file \n");
1280     XSUM_log( "      --status         Don't output anything, status code shows success \n");
1281     XSUM_log( "      --strict         Exit non-zero for improperly formatted checksum lines \n");
1282     XSUM_log( "      --warn           Warn about improperly formatted checksum lines \n");
1283     return 0;
1284 }
1285 
XSUM_badusage(const char * exename)1286 static int XSUM_badusage(const char* exename)
1287 {
1288     XSUM_log("Wrong parameters\n\n");
1289     XSUM_usage(exename);
1290     return 1;
1291 }
1292 
errorOut(const char * msg)1293 static void errorOut(const char* msg)
1294 {
1295     XSUM_log("%s \n", msg);
1296     exit(1);
1297 }
1298 
XSUM_lastNameFromPath(const char * path)1299 static const char* XSUM_lastNameFromPath(const char* path)
1300 {
1301     const char* name = path;
1302     if (strrchr(name, '/')) name = strrchr(name, '/') + 1;
1303     if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */
1304     return name;
1305 }
1306 
1307 /*!
1308  * XSUM_readU32FromCharChecked():
1309  * @return 0 if success, and store the result in *value.
1310  * Allows and interprets K, KB, KiB, M, MB and MiB suffix.
1311  * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
1312  * @return 1 if an overflow error occurs
1313  */
XSUM_readU32FromCharChecked(const char ** stringPtr,XSUM_U32 * value)1314 static int XSUM_readU32FromCharChecked(const char** stringPtr, XSUM_U32* value)
1315 {
1316     static const XSUM_U32 max = (((XSUM_U32)(-1)) / 10) - 1;
1317     XSUM_U32 result = 0;
1318     while ((**stringPtr >='0') && (**stringPtr <='9')) {
1319         if (result > max) return 1; /* overflow error */
1320         result *= 10;
1321         result += (XSUM_U32)(**stringPtr - '0');
1322         (*stringPtr)++ ;
1323     }
1324     if ((**stringPtr=='K') || (**stringPtr=='M')) {
1325         XSUM_U32 const maxK = ((XSUM_U32)(-1)) >> 10;
1326         if (result > maxK) return 1; /* overflow error */
1327         result <<= 10;
1328         if (**stringPtr=='M') {
1329             if (result > maxK) return 1; /* overflow error */
1330             result <<= 10;
1331         }
1332         (*stringPtr)++;  /* skip `K` or `M` */
1333         if (**stringPtr=='i') (*stringPtr)++;
1334         if (**stringPtr=='B') (*stringPtr)++;
1335     }
1336     *value = result;
1337     return 0;
1338 }
1339 
1340 /*!
1341  * XSUM_readU32FromChar():
1342  * @return: unsigned integer value read from input in `char` format.
1343  *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
1344  *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
1345  *  Note: function will exit() program if digit sequence overflows
1346  */
XSUM_readU32FromChar(const char ** stringPtr)1347 static XSUM_U32 XSUM_readU32FromChar(const char** stringPtr) {
1348     XSUM_U32 result;
1349     if (XSUM_readU32FromCharChecked(stringPtr, &result)) {
1350         static const char errorMsg[] = "Error: numeric value too large";
1351         errorOut(errorMsg);
1352     }
1353     return result;
1354 }
1355 
XSUM_main(int argc,char * argv[])1356 XSUM_API int XSUM_main(int argc, char* argv[])
1357 {
1358     int i, filenamesStart = 0;
1359     const char* const exename = XSUM_lastNameFromPath(argv[0]);
1360     XSUM_U32 benchmarkMode = 0;
1361     XSUM_U32 fileCheckMode = 0;
1362     XSUM_U32 strictMode    = 0;
1363     XSUM_U32 statusOnly    = 0;
1364     XSUM_U32 warn          = 0;
1365     int explicitStdin = 0;
1366     XSUM_U32 selectBenchIDs= 0;  /* 0 == use default k_testIDs_default, kBenchAll == bench all */
1367     static const XSUM_U32 kBenchAll = 99;
1368     size_t keySize    = XSUM_DEFAULT_SAMPLE_SIZE;
1369     AlgoSelected algo     = g_defaultAlgo;
1370     Display_endianess displayEndianess = big_endian;
1371     Display_convention convention = display_gnu;
1372 
1373     /* special case: xxhNNsum default to NN bits checksum */
1374     if (strstr(exename,  "xxh32sum") != NULL) algo = g_defaultAlgo = algo_xxh32;
1375     if (strstr(exename,  "xxh64sum") != NULL) algo = g_defaultAlgo = algo_xxh64;
1376     if (strstr(exename, "xxh128sum") != NULL) algo = g_defaultAlgo = algo_xxh128;
1377 
1378     for (i=1; i<argc; i++) {
1379         const char* argument = argv[i];
1380         assert(argument != NULL);
1381 
1382         if (!strcmp(argument, "--check")) { fileCheckMode = 1; continue; }
1383         if (!strcmp(argument, "--benchmark-all")) { benchmarkMode = 1; selectBenchIDs = kBenchAll; continue; }
1384         if (!strcmp(argument, "--bench-all")) { benchmarkMode = 1; selectBenchIDs = kBenchAll; continue; }
1385         if (!strcmp(argument, "--quiet")) { XSUM_logLevel--; continue; }
1386         if (!strcmp(argument, "--little-endian")) { displayEndianess = little_endian; continue; }
1387         if (!strcmp(argument, "--strict")) { strictMode = 1; continue; }
1388         if (!strcmp(argument, "--status")) { statusOnly = 1; continue; }
1389         if (!strcmp(argument, "--warn")) { warn = 1; continue; }
1390         if (!strcmp(argument, "--help")) { return XSUM_usage_advanced(exename); }
1391         if (!strcmp(argument, "--version")) { XSUM_log(FULL_WELCOME_MESSAGE(exename)); XSUM_sanityCheck(); return 0; }
1392         if (!strcmp(argument, "--tag")) { convention = display_bsd; continue; }
1393 
1394         if (!strcmp(argument, "--")) {
1395             if (filenamesStart==0 && i!=argc-1) filenamesStart=i+1; /* only supports a continuous list of filenames */
1396             break;  /* treat rest of arguments as strictly file names */
1397         }
1398         if (*argument != '-') {
1399             if (filenamesStart==0) filenamesStart=i;   /* only supports a continuous list of filenames */
1400             break;  /* treat rest of arguments as strictly file names */
1401         }
1402 
1403         /* command selection */
1404         argument++;   /* note: *argument=='-' */
1405         if (*argument == 0) explicitStdin = 1;
1406 
1407         while (*argument != 0) {
1408             switch(*argument)
1409             {
1410             /* Display version */
1411             case 'V':
1412                 XSUM_log(FULL_WELCOME_MESSAGE(exename)); return 0;
1413 
1414             /* Display help on XSUM_usage */
1415             case 'h':
1416                 return XSUM_usage_advanced(exename);
1417 
1418             /* select hash algorithm */
1419             case 'H': argument++;
1420                 switch(XSUM_readU32FromChar(&argument)) {
1421                     case 0 :
1422                     case 32: algo = algo_xxh32; break;
1423                     case 1 :
1424                     case 64: algo = algo_xxh64; break;
1425                     case 2 :
1426                     case 128: algo = algo_xxh128; break;
1427                     default:
1428                         return XSUM_badusage(exename);
1429                 }
1430                 break;
1431 
1432             /* File check mode */
1433             case 'c':
1434                 fileCheckMode=1;
1435                 argument++;
1436                 break;
1437 
1438             /* Warning mode (file check mode only, alias of "--warning") */
1439             case 'w':
1440                 warn=1;
1441                 argument++;
1442                 break;
1443 
1444             /* Trigger benchmark mode */
1445             case 'b':
1446                 argument++;
1447                 benchmarkMode = 1;
1448                 do {
1449                     if (*argument == ',') argument++;
1450                     selectBenchIDs = XSUM_readU32FromChar(&argument); /* select one specific test */
1451                     if (selectBenchIDs < NB_TESTFUNC) {
1452                         g_testIDs[selectBenchIDs] = 1;
1453                     } else
1454                         selectBenchIDs = kBenchAll;
1455                 } while (*argument == ',');
1456                 break;
1457 
1458             /* Modify Nb Iterations (benchmark only) */
1459             case 'i':
1460                 argument++;
1461                 g_nbIterations = XSUM_readU32FromChar(&argument);
1462                 break;
1463 
1464             /* Modify Block size (benchmark only) */
1465             case 'B':
1466                 argument++;
1467                 keySize = XSUM_readU32FromChar(&argument);
1468                 break;
1469 
1470             /* Modify verbosity of benchmark output (hidden option) */
1471             case 'q':
1472                 argument++;
1473                 XSUM_logLevel--;
1474                 break;
1475 
1476             default:
1477                 return XSUM_badusage(exename);
1478             }
1479         }
1480     }   /* for(i=1; i<argc; i++) */
1481 
1482     /* Check benchmark mode */
1483     if (benchmarkMode) {
1484         XSUM_logVerbose(2, FULL_WELCOME_MESSAGE(exename) );
1485         XSUM_sanityCheck();
1486         if (selectBenchIDs == 0) memcpy(g_testIDs, k_testIDs_default, sizeof(g_testIDs));
1487         if (selectBenchIDs == kBenchAll) memset(g_testIDs, 1, sizeof(g_testIDs));
1488         if (filenamesStart==0) return XSUM_benchInternal(keySize);
1489         return XSUM_benchFiles(argv+filenamesStart, argc-filenamesStart);
1490     }
1491 
1492     /* Check if input is defined as console; trigger an error in this case */
1493     if ( (filenamesStart==0) && XSUM_isConsole(stdin) && !explicitStdin)
1494         return XSUM_badusage(exename);
1495 
1496     if (filenamesStart==0) filenamesStart = argc;
1497     if (fileCheckMode) {
1498         return XSUM_checkFiles(argv+filenamesStart, argc-filenamesStart,
1499                           displayEndianess, strictMode, statusOnly, warn, (XSUM_logLevel < 2) /*quiet*/);
1500     } else {
1501         return XSUM_hashFiles(argv+filenamesStart, argc-filenamesStart, algo, displayEndianess, convention);
1502     }
1503 }
1504