1 /*
2 * xxhsum - Command line interface for xxhash algorithms
3 * Copyright (C) 2013-2020 Yann Collet
4 *
5 * GPL v2 License
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * You can contact the author at:
22 * - xxHash homepage: https://www.xxhash.com
23 * - xxHash source repository: https://github.com/Cyan4973/xxHash
24 */
25
26 /*
27 * xxhsum:
28 * Provides hash value of a file content, or a list of files, or stdin
29 * Display convention is Big Endian, for both 32 and 64 bits algorithms
30 */
31
32 /* Transitional headers */
33 #include "cli/xsum_config.h"
34 #include "cli/xsum_arch.h"
35 #include "cli/xsum_os_specific.h"
36 #include "cli/xsum_output.h"
37 #include "cli/xsum_sanity_check.h"
38 #ifdef XXH_INLINE_ALL
39 # include "cli/xsum_os_specific.c"
40 # include "cli/xsum_output.c"
41 # include "cli/xsum_sanity_check.c"
42 #endif
43
44 /* ************************************
45 * Includes
46 **************************************/
47 #include <limits.h>
48 #include <stdlib.h> /* malloc, calloc, free, exit */
49 #include <string.h> /* strcmp, memcpy */
50 #include <stdio.h> /* fprintf, fopen, ftello64, fread, stdin, stdout, _fileno (when present) */
51 #include <sys/types.h> /* stat, stat64, _stat64 */
52 #include <sys/stat.h> /* stat, stat64, _stat64 */
53 #include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */
54 #include <assert.h> /* assert */
55 #include <errno.h> /* errno */
56
57 #define XXH_STATIC_LINKING_ONLY /* *_state_t */
58 #include "xxhash.h"
59
60 #ifdef XXHSUM_DISPATCH
61 # include "xxh_x86dispatch.h"
62 #endif
63
XSUM_isLittleEndian(void)64 static unsigned XSUM_isLittleEndian(void)
65 {
66 const union { XSUM_U32 u; XSUM_U8 c[4]; } one = { 1 }; /* don't use static: performance detrimental */
67 return one.c[0];
68 }
69
70 static const int g_nbBits = (int)(sizeof(void*)*8);
71 static const char g_lename[] = "little endian";
72 static const char g_bename[] = "big endian";
73 #define ENDIAN_NAME (XSUM_isLittleEndian() ? g_lename : g_bename)
74 static const char author[] = "Yann Collet";
75 #define WELCOME_MESSAGE(exename) "%s %s by %s \n", exename, XSUM_PROGRAM_VERSION, author
76 #define FULL_WELCOME_MESSAGE(exename) "%s %s by %s \n" \
77 "compiled as %i-bit %s %s with " XSUM_CC_VERSION_FMT " \n", \
78 exename, XSUM_PROGRAM_VERSION, author, \
79 g_nbBits, XSUM_ARCH, ENDIAN_NAME, XSUM_CC_VERSION
80
81 #define KB *( 1<<10)
82 #define MB *( 1<<20)
83 #define GB *(1U<<30)
84
85 static size_t XSUM_DEFAULT_SAMPLE_SIZE = 100 KB;
86 #define NBLOOPS 3 /* Default number of benchmark iterations */
87 #define TIMELOOP_S 1
88 #define TIMELOOP (TIMELOOP_S * CLOCKS_PER_SEC) /* target timing per iteration */
89 #define TIMELOOP_MIN (TIMELOOP / 2) /* minimum timing to validate a result */
90 #define XXHSUM32_DEFAULT_SEED 0 /* Default seed for algo_xxh32 */
91 #define XXHSUM64_DEFAULT_SEED 0 /* Default seed for algo_xxh64 */
92
93 #define MAX_MEM (2 GB - 64 MB)
94
95 static const char stdinName[] = "-";
96 typedef enum { algo_xxh32=0, algo_xxh64=1, algo_xxh128=2 } AlgoSelected;
97 static AlgoSelected g_defaultAlgo = algo_xxh64; /* required within main() & XSUM_usage() */
98
99 /* <16 hex char> <SPC> <SPC> <filename> <'\0'>
100 * '4096' is typical Linux PATH_MAX configuration. */
101 #define DEFAULT_LINE_LENGTH (sizeof(XXH64_hash_t) * 2 + 2 + 4096 + 1)
102
103 /* Maximum acceptable line length. */
104 #define MAX_LINE_LENGTH (32 KB)
105
106
107 /* ************************************
108 * Display macros
109 **************************************/
110
111
112 /* ************************************
113 * Local variables
114 **************************************/
115 static XSUM_U32 g_nbIterations = NBLOOPS;
116
117
118 /* ************************************
119 * Benchmark Functions
120 **************************************/
XSUM_clockSpan(clock_t start)121 static clock_t XSUM_clockSpan( clock_t start )
122 {
123 return clock() - start; /* works even if overflow; Typical max span ~ 30 mn */
124 }
125
XSUM_findMaxMem(XSUM_U64 requiredMem)126 static size_t XSUM_findMaxMem(XSUM_U64 requiredMem)
127 {
128 size_t const step = 64 MB;
129 void* testmem = NULL;
130
131 requiredMem = (((requiredMem >> 26) + 1) << 26);
132 requiredMem += 2*step;
133 if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
134
135 while (!testmem) {
136 if (requiredMem > step) requiredMem -= step;
137 else requiredMem >>= 1;
138 testmem = malloc ((size_t)requiredMem);
139 }
140 free (testmem);
141
142 /* keep some space available */
143 if (requiredMem > step) requiredMem -= step;
144 else requiredMem >>= 1;
145
146 return (size_t)requiredMem;
147 }
148
149 /*
150 * Allocates a string containing s1 and s2 concatenated. Acts like strdup.
151 * The result must be freed.
152 */
XSUM_strcatDup(const char * s1,const char * s2)153 static char* XSUM_strcatDup(const char* s1, const char* s2)
154 {
155 assert(s1 != NULL);
156 assert(s2 != NULL);
157 { size_t len1 = strlen(s1);
158 size_t len2 = strlen(s2);
159 char* buf = (char*)malloc(len1 + len2 + 1);
160 if (buf != NULL) {
161 /* strcpy(buf, s1) */
162 memcpy(buf, s1, len1);
163 /* strcat(buf, s2) */
164 memcpy(buf + len1, s2, len2 + 1);
165 }
166 return buf;
167 }
168 }
169
170
171 /*
172 * A secret buffer used for benchmarking XXH3's withSecret variants.
173 *
174 * In order for the bench to be realistic, the secret buffer would need to be
175 * pre-generated.
176 *
177 * Adding a pointer to the parameter list would be messy.
178 */
179 static XSUM_U8 g_benchSecretBuf[XXH3_SECRET_SIZE_MIN];
180
181 /*
182 * Wrappers for the benchmark.
183 *
184 * If you would like to add other hashes to the bench, create a wrapper and add
185 * it to the g_hashesToBench table. It will automatically be added.
186 */
187 typedef XSUM_U32 (*hashFunction)(const void* buffer, size_t bufferSize, XSUM_U32 seed);
188
localXXH32(const void * buffer,size_t bufferSize,XSUM_U32 seed)189 static XSUM_U32 localXXH32(const void* buffer, size_t bufferSize, XSUM_U32 seed)
190 {
191 return XXH32(buffer, bufferSize, seed);
192 }
localXXH64(const void * buffer,size_t bufferSize,XSUM_U32 seed)193 static XSUM_U32 localXXH64(const void* buffer, size_t bufferSize, XSUM_U32 seed)
194 {
195 return (XSUM_U32)XXH64(buffer, bufferSize, seed);
196 }
localXXH3_64b(const void * buffer,size_t bufferSize,XSUM_U32 seed)197 static XSUM_U32 localXXH3_64b(const void* buffer, size_t bufferSize, XSUM_U32 seed)
198 {
199 (void)seed;
200 return (XSUM_U32)XXH3_64bits(buffer, bufferSize);
201 }
localXXH3_64b_seeded(const void * buffer,size_t bufferSize,XSUM_U32 seed)202 static XSUM_U32 localXXH3_64b_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
203 {
204 return (XSUM_U32)XXH3_64bits_withSeed(buffer, bufferSize, seed);
205 }
localXXH3_64b_secret(const void * buffer,size_t bufferSize,XSUM_U32 seed)206 static XSUM_U32 localXXH3_64b_secret(const void* buffer, size_t bufferSize, XSUM_U32 seed)
207 {
208 (void)seed;
209 return (XSUM_U32)XXH3_64bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf));
210 }
localXXH3_128b(const void * buffer,size_t bufferSize,XSUM_U32 seed)211 static XSUM_U32 localXXH3_128b(const void* buffer, size_t bufferSize, XSUM_U32 seed)
212 {
213 (void)seed;
214 return (XSUM_U32)(XXH3_128bits(buffer, bufferSize).low64);
215 }
localXXH3_128b_seeded(const void * buffer,size_t bufferSize,XSUM_U32 seed)216 static XSUM_U32 localXXH3_128b_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
217 {
218 return (XSUM_U32)(XXH3_128bits_withSeed(buffer, bufferSize, seed).low64);
219 }
localXXH3_128b_secret(const void * buffer,size_t bufferSize,XSUM_U32 seed)220 static XSUM_U32 localXXH3_128b_secret(const void* buffer, size_t bufferSize, XSUM_U32 seed)
221 {
222 (void)seed;
223 return (XSUM_U32)(XXH3_128bits_withSecret(buffer, bufferSize, g_benchSecretBuf, sizeof(g_benchSecretBuf)).low64);
224 }
localXXH3_stream(const void * buffer,size_t bufferSize,XSUM_U32 seed)225 static XSUM_U32 localXXH3_stream(const void* buffer, size_t bufferSize, XSUM_U32 seed)
226 {
227 XXH3_state_t state;
228 (void)seed;
229 XXH3_64bits_reset(&state);
230 XXH3_64bits_update(&state, buffer, bufferSize);
231 return (XSUM_U32)XXH3_64bits_digest(&state);
232 }
localXXH3_stream_seeded(const void * buffer,size_t bufferSize,XSUM_U32 seed)233 static XSUM_U32 localXXH3_stream_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
234 {
235 XXH3_state_t state;
236 XXH3_INITSTATE(&state);
237 XXH3_64bits_reset_withSeed(&state, (XXH64_hash_t)seed);
238 XXH3_64bits_update(&state, buffer, bufferSize);
239 return (XSUM_U32)XXH3_64bits_digest(&state);
240 }
localXXH128_stream(const void * buffer,size_t bufferSize,XSUM_U32 seed)241 static XSUM_U32 localXXH128_stream(const void* buffer, size_t bufferSize, XSUM_U32 seed)
242 {
243 XXH3_state_t state;
244 (void)seed;
245 XXH3_128bits_reset(&state);
246 XXH3_128bits_update(&state, buffer, bufferSize);
247 return (XSUM_U32)(XXH3_128bits_digest(&state).low64);
248 }
localXXH128_stream_seeded(const void * buffer,size_t bufferSize,XSUM_U32 seed)249 static XSUM_U32 localXXH128_stream_seeded(const void* buffer, size_t bufferSize, XSUM_U32 seed)
250 {
251 XXH3_state_t state;
252 XXH3_INITSTATE(&state);
253 XXH3_128bits_reset_withSeed(&state, (XXH64_hash_t)seed);
254 XXH3_128bits_update(&state, buffer, bufferSize);
255 return (XSUM_U32)(XXH3_128bits_digest(&state).low64);
256 }
257
258
259 typedef struct {
260 const char* name;
261 hashFunction func;
262 } hashInfo;
263
264 #define NB_HASHFUNC 12
265 static const hashInfo g_hashesToBench[NB_HASHFUNC] = {
266 { "XXH32", &localXXH32 },
267 { "XXH64", &localXXH64 },
268 { "XXH3_64b", &localXXH3_64b },
269 { "XXH3_64b w/seed", &localXXH3_64b_seeded },
270 { "XXH3_64b w/secret", &localXXH3_64b_secret },
271 { "XXH128", &localXXH3_128b },
272 { "XXH128 w/seed", &localXXH3_128b_seeded },
273 { "XXH128 w/secret", &localXXH3_128b_secret },
274 { "XXH3_stream", &localXXH3_stream },
275 { "XXH3_stream w/seed",&localXXH3_stream_seeded },
276 { "XXH128_stream", &localXXH128_stream },
277 { "XXH128_stream w/seed",&localXXH128_stream_seeded },
278 };
279
280 #define NB_TESTFUNC (1 + 2 * NB_HASHFUNC)
281 static char g_testIDs[NB_TESTFUNC] = { 0 };
282 static const char k_testIDs_default[NB_TESTFUNC] = { 0,
283 1 /*XXH32*/, 0,
284 1 /*XXH64*/, 0,
285 1 /*XXH3*/, 0, 0, 0, 0, 0,
286 1 /*XXH128*/ };
287
288 #define HASHNAME_MAX 29
XSUM_benchHash(hashFunction h,const char * hName,int testID,const void * buffer,size_t bufferSize)289 static void XSUM_benchHash(hashFunction h, const char* hName, int testID,
290 const void* buffer, size_t bufferSize)
291 {
292 XSUM_U32 nbh_perIteration = (XSUM_U32)((300 MB) / (bufferSize+1)) + 1; /* first iteration conservatively aims for 300 MB/s */
293 unsigned iterationNb, nbIterations = g_nbIterations + !g_nbIterations /* min 1 */;
294 double fastestH = 100000000.;
295 assert(HASHNAME_MAX > 2);
296 XSUM_logVerbose(2, "\r%80s\r", ""); /* Clean display line */
297
298 for (iterationNb = 1; iterationNb <= nbIterations; iterationNb++) {
299 XSUM_U32 r=0;
300 clock_t cStart;
301
302 XSUM_logVerbose(2, "%2u-%-*.*s : %10u ->\r",
303 iterationNb,
304 HASHNAME_MAX, HASHNAME_MAX, hName,
305 (unsigned)bufferSize);
306 cStart = clock();
307 while (clock() == cStart); /* starts clock() at its exact beginning */
308 cStart = clock();
309
310 { XSUM_U32 u;
311 for (u=0; u<nbh_perIteration; u++)
312 r += h(buffer, bufferSize, u);
313 }
314 if (r==0) XSUM_logVerbose(3,".\r"); /* do something with r to defeat compiler "optimizing" hash away */
315
316 { clock_t const nbTicks = XSUM_clockSpan(cStart);
317 double const ticksPerHash = ((double)nbTicks / TIMELOOP) / nbh_perIteration;
318 /*
319 * clock() is the only decent portable timer, but it isn't very
320 * precise.
321 *
322 * Sometimes, this lack of precision is enough that the benchmark
323 * finishes before there are enough ticks to get a meaningful result.
324 *
325 * For example, on a Core 2 Duo (without any sort of Turbo Boost),
326 * the imprecise timer caused peculiar results like so:
327 *
328 * XXH3_64b 4800.0 MB/s // conveniently even
329 * XXH3_64b unaligned 4800.0 MB/s
330 * XXH3_64b seeded 9600.0 MB/s // magical 2x speedup?!
331 * XXH3_64b seeded unaligned 4800.0 MB/s
332 *
333 * If we sense a suspiciously low number of ticks, we increase the
334 * iterations until we can get something meaningful.
335 */
336 if (nbTicks < TIMELOOP_MIN) {
337 /* Not enough time spent in benchmarking, risk of rounding bias */
338 if (nbTicks == 0) { /* faster than resolution timer */
339 nbh_perIteration *= 100;
340 } else {
341 /*
342 * update nbh_perIteration so that the next round lasts
343 * approximately 1 second.
344 */
345 double nbh_perSecond = (1 / ticksPerHash) + 1;
346 if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20); /* avoid overflow */
347 nbh_perIteration = (XSUM_U32)nbh_perSecond;
348 }
349 /* g_nbIterations==0 => quick evaluation, no claim of accuracy */
350 if (g_nbIterations>0) {
351 iterationNb--; /* new round for a more accurate speed evaluation */
352 continue;
353 }
354 }
355 if (ticksPerHash < fastestH) fastestH = ticksPerHash;
356 if (fastestH>0.) { /* avoid div by zero */
357 XSUM_logVerbose(2, "%2u-%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
358 iterationNb,
359 HASHNAME_MAX, HASHNAME_MAX, hName,
360 (unsigned)bufferSize,
361 (double)1 / fastestH,
362 ((double)bufferSize / (1 MB)) / fastestH);
363 } }
364 { double nbh_perSecond = (1 / fastestH) + 1;
365 if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20); /* avoid overflow */
366 nbh_perIteration = (XSUM_U32)nbh_perSecond;
367 }
368 }
369 XSUM_logVerbose(1, "%2i#%-*.*s : %10u -> %8.0f it/s (%7.1f MB/s) \n",
370 testID,
371 HASHNAME_MAX, HASHNAME_MAX, hName,
372 (unsigned)bufferSize,
373 (double)1 / fastestH,
374 ((double)bufferSize / (1 MB)) / fastestH);
375 if (XSUM_logLevel<1)
376 XSUM_logVerbose(0, "%u, ", (unsigned)((double)1 / fastestH));
377 }
378
379
380 /*!
381 * XSUM_benchMem():
382 * buffer: Must be 16-byte aligned.
383 * The real allocated size of buffer is supposed to be >= (bufferSize+3).
384 * returns: 0 on success, 1 if error (invalid mode selected)
385 */
XSUM_benchMem(const void * buffer,size_t bufferSize)386 static void XSUM_benchMem(const void* buffer, size_t bufferSize)
387 {
388 assert((((size_t)buffer) & 15) == 0); /* ensure alignment */
389 XSUM_fillTestBuffer(g_benchSecretBuf, sizeof(g_benchSecretBuf));
390 { int i;
391 for (i = 1; i < NB_TESTFUNC; i++) {
392 int const hashFuncID = (i-1) / 2;
393 assert(g_hashesToBench[hashFuncID].name != NULL);
394 if (g_testIDs[i] == 0) continue;
395 /* aligned */
396 if ((i % 2) == 1) {
397 XSUM_benchHash(g_hashesToBench[hashFuncID].func, g_hashesToBench[hashFuncID].name, i, buffer, bufferSize);
398 }
399 /* unaligned */
400 if ((i % 2) == 0) {
401 /* Append "unaligned". */
402 char* const hashNameBuf = XSUM_strcatDup(g_hashesToBench[hashFuncID].name, " unaligned");
403 assert(hashNameBuf != NULL);
404 XSUM_benchHash(g_hashesToBench[hashFuncID].func, hashNameBuf, i, ((const char*)buffer)+3, bufferSize);
405 free(hashNameBuf);
406 }
407 } }
408 }
409
XSUM_selectBenchedSize(const char * fileName)410 static size_t XSUM_selectBenchedSize(const char* fileName)
411 {
412 XSUM_U64 const inFileSize = XSUM_getFileSize(fileName);
413 size_t benchedSize = (size_t) XSUM_findMaxMem(inFileSize);
414 if ((XSUM_U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
415 if (benchedSize < inFileSize) {
416 XSUM_log("Not enough memory for '%s' full size; testing %i MB only...\n", fileName, (int)(benchedSize>>20));
417 }
418 return benchedSize;
419 }
420
421
XSUM_benchFiles(char * const * fileNamesTable,int nbFiles)422 static int XSUM_benchFiles(char*const* fileNamesTable, int nbFiles)
423 {
424 int fileIdx;
425 for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
426 const char* const inFileName = fileNamesTable[fileIdx];
427 assert(inFileName != NULL);
428
429 { FILE* const inFile = XSUM_fopen( inFileName, "rb" );
430 size_t const benchedSize = XSUM_selectBenchedSize(inFileName);
431 char* const buffer = (char*)calloc(benchedSize+16+3, 1);
432 void* const alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF); /* align on next 16 bytes */
433
434 /* Checks */
435 if (inFile==NULL){
436 XSUM_log("Error: Could not open '%s': %s.\n", inFileName, strerror(errno));
437 free(buffer);
438 exit(11);
439 }
440 if(!buffer) {
441 XSUM_log("\nError: Out of memory.\n");
442 fclose(inFile);
443 exit(12);
444 }
445
446 /* Fill input buffer */
447 { size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile);
448 fclose(inFile);
449 if(readSize != benchedSize) {
450 XSUM_log("\nError: Could not read '%s': %s.\n", inFileName, strerror(errno));
451 free(buffer);
452 exit(13);
453 } }
454
455 /* bench */
456 XSUM_benchMem(alignedBuffer, benchedSize);
457
458 free(buffer);
459 } }
460 return 0;
461 }
462
463
XSUM_benchInternal(size_t keySize)464 static int XSUM_benchInternal(size_t keySize)
465 {
466 void* const buffer = calloc(keySize+16+3, 1);
467 if (buffer == NULL) {
468 XSUM_log("\nError: Out of memory.\n");
469 exit(12);
470 }
471
472 { const void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF); /* align on next 16 bytes */
473
474 /* bench */
475 XSUM_logVerbose(1, "Sample of ");
476 if (keySize > 10 KB) {
477 XSUM_logVerbose(1, "%u KB", (unsigned)(keySize >> 10));
478 } else {
479 XSUM_logVerbose(1, "%u bytes", (unsigned)keySize);
480 }
481 XSUM_logVerbose(1, "... \n");
482
483 XSUM_benchMem(alignedBuffer, keySize);
484 free(buffer);
485 }
486 return 0;
487 }
488
489 /* ********************************************************
490 * File Hashing
491 **********************************************************/
492
493 /* for support of --little-endian display mode */
XSUM_display_LittleEndian(const void * ptr,size_t length)494 static void XSUM_display_LittleEndian(const void* ptr, size_t length)
495 {
496 const XSUM_U8* const p = (const XSUM_U8*)ptr;
497 size_t idx;
498 for (idx=length-1; idx<length; idx--) /* intentional underflow to negative to detect end */
499 XSUM_output("%02x", p[idx]);
500 }
501
XSUM_display_BigEndian(const void * ptr,size_t length)502 static void XSUM_display_BigEndian(const void* ptr, size_t length)
503 {
504 const XSUM_U8* const p = (const XSUM_U8*)ptr;
505 size_t idx;
506 for (idx=0; idx<length; idx++)
507 XSUM_output("%02x", p[idx]);
508 }
509
510 typedef union {
511 XXH32_hash_t xxh32;
512 XXH64_hash_t xxh64;
513 XXH128_hash_t xxh128;
514 } Multihash;
515
516 /*
517 * XSUM_hashStream:
518 * Reads data from `inFile`, generating an incremental hash of type hashType,
519 * using `buffer` of size `blockSize` for temporary storage.
520 */
521 static Multihash
XSUM_hashStream(FILE * inFile,AlgoSelected hashType,void * buffer,size_t blockSize)522 XSUM_hashStream(FILE* inFile,
523 AlgoSelected hashType,
524 void* buffer, size_t blockSize)
525 {
526 XXH32_state_t state32;
527 XXH64_state_t state64;
528 XXH3_state_t state128;
529
530 /* Init */
531 (void)XXH32_reset(&state32, XXHSUM32_DEFAULT_SEED);
532 (void)XXH64_reset(&state64, XXHSUM64_DEFAULT_SEED);
533 (void)XXH3_128bits_reset(&state128);
534
535 /* Load file & update hash */
536 { size_t readSize;
537 while ((readSize = fread(buffer, 1, blockSize, inFile)) > 0) {
538 switch(hashType)
539 {
540 case algo_xxh32:
541 (void)XXH32_update(&state32, buffer, readSize);
542 break;
543 case algo_xxh64:
544 (void)XXH64_update(&state64, buffer, readSize);
545 break;
546 case algo_xxh128:
547 (void)XXH3_128bits_update(&state128, buffer, readSize);
548 break;
549 default:
550 assert(0);
551 }
552 }
553 if (ferror(inFile)) {
554 XSUM_log("Error: a failure occurred reading the input file.\n");
555 exit(1);
556 } }
557
558 { Multihash finalHash = {0};
559 switch(hashType)
560 {
561 case algo_xxh32:
562 finalHash.xxh32 = XXH32_digest(&state32);
563 break;
564 case algo_xxh64:
565 finalHash.xxh64 = XXH64_digest(&state64);
566 break;
567 case algo_xxh128:
568 finalHash.xxh128 = XXH3_128bits_digest(&state128);
569 break;
570 default:
571 assert(0);
572 }
573 return finalHash;
574 }
575 }
576
577 /* algo_xxh32, algo_xxh64, algo_xxh128 */
578 static const char* XSUM_algoName[] = { "XXH32", "XXH64", "XXH128" };
579 static const char* XSUM_algoLE_name[] = { "XXH32_LE", "XXH64_LE", "XXH128_LE" };
580 static const size_t XSUM_algoLength[] = { 4, 8, 16 };
581
582 #define XSUM_TABLE_ELT_SIZE(table) (sizeof(table) / sizeof(*table))
583
584 typedef void (*XSUM_displayHash_f)(const void*, size_t); /* display function signature */
585
XSUM_printLine_BSD_internal(const char * filename,const void * canonicalHash,const AlgoSelected hashType,const char * algoString[],XSUM_displayHash_f f_displayHash)586 static void XSUM_printLine_BSD_internal(const char* filename,
587 const void* canonicalHash, const AlgoSelected hashType,
588 const char* algoString[],
589 XSUM_displayHash_f f_displayHash)
590 {
591 assert(0 <= hashType && hashType <= XSUM_TABLE_ELT_SIZE(XSUM_algoName));
592 { const char* const typeString = algoString[hashType];
593 const size_t hashLength = XSUM_algoLength[hashType];
594 XSUM_output("%s (%s) = ", typeString, filename);
595 f_displayHash(canonicalHash, hashLength);
596 XSUM_output("\n");
597 } }
598
XSUM_printLine_BSD_LE(const char * filename,const void * canonicalHash,const AlgoSelected hashType)599 static void XSUM_printLine_BSD_LE(const char* filename, const void* canonicalHash, const AlgoSelected hashType)
600 {
601 XSUM_printLine_BSD_internal(filename, canonicalHash, hashType, XSUM_algoLE_name, XSUM_display_LittleEndian);
602 }
603
XSUM_printLine_BSD(const char * filename,const void * canonicalHash,const AlgoSelected hashType)604 static void XSUM_printLine_BSD(const char* filename, const void* canonicalHash, const AlgoSelected hashType)
605 {
606 XSUM_printLine_BSD_internal(filename, canonicalHash, hashType, XSUM_algoName, XSUM_display_BigEndian);
607 }
608
XSUM_printLine_GNU_internal(const char * filename,const void * canonicalHash,const AlgoSelected hashType,XSUM_displayHash_f f_displayHash)609 static void XSUM_printLine_GNU_internal(const char* filename,
610 const void* canonicalHash, const AlgoSelected hashType,
611 XSUM_displayHash_f f_displayHash)
612 {
613 assert(0 <= hashType && hashType <= XSUM_TABLE_ELT_SIZE(XSUM_algoName));
614 { const size_t hashLength = XSUM_algoLength[hashType];
615 f_displayHash(canonicalHash, hashLength);
616 XSUM_output(" %s\n", filename);
617 } }
618
XSUM_printLine_GNU(const char * filename,const void * canonicalHash,const AlgoSelected hashType)619 static void XSUM_printLine_GNU(const char* filename,
620 const void* canonicalHash, const AlgoSelected hashType)
621 {
622 XSUM_printLine_GNU_internal(filename, canonicalHash, hashType, XSUM_display_BigEndian);
623 }
624
XSUM_printLine_GNU_LE(const char * filename,const void * canonicalHash,const AlgoSelected hashType)625 static void XSUM_printLine_GNU_LE(const char* filename,
626 const void* canonicalHash, const AlgoSelected hashType)
627 {
628 XSUM_printLine_GNU_internal(filename, canonicalHash, hashType, XSUM_display_LittleEndian);
629 }
630
631 typedef enum { big_endian, little_endian} Display_endianess;
632
633 typedef enum { display_gnu, display_bsd } Display_convention;
634
635 typedef void (*XSUM_displayLine_f)(const char*, const void*, AlgoSelected); /* line display signature */
636
637 static XSUM_displayLine_f XSUM_kDisplayLine_fTable[2][2] = {
638 { XSUM_printLine_GNU, XSUM_printLine_GNU_LE },
639 { XSUM_printLine_BSD, XSUM_printLine_BSD_LE }
640 };
641
XSUM_hashFile(const char * fileName,const AlgoSelected hashType,const Display_endianess displayEndianess,const Display_convention convention)642 static int XSUM_hashFile(const char* fileName,
643 const AlgoSelected hashType,
644 const Display_endianess displayEndianess,
645 const Display_convention convention)
646 {
647 size_t const blockSize = 64 KB;
648 XSUM_displayLine_f const f_displayLine = XSUM_kDisplayLine_fTable[convention][displayEndianess];
649 FILE* inFile;
650 Multihash hashValue;
651 assert(displayEndianess==big_endian || displayEndianess==little_endian);
652 assert(convention==display_gnu || convention==display_bsd);
653
654 /* Check file existence */
655 if (fileName == stdinName) {
656 inFile = stdin;
657 fileName = "stdin";
658 XSUM_setBinaryMode(stdin);
659 } else {
660 if (XSUM_isDirectory(fileName)) {
661 XSUM_log("xxhsum: %s: Is a directory \n", fileName);
662 return 1;
663 }
664 inFile = XSUM_fopen( fileName, "rb" );
665 if (inFile==NULL) {
666 XSUM_log("Error: Could not open '%s': %s. \n", fileName, strerror(errno));
667 return 1;
668 } }
669
670 /* Memory allocation & streaming */
671 { void* const buffer = malloc(blockSize);
672 if (buffer == NULL) {
673 XSUM_log("\nError: Out of memory.\n");
674 fclose(inFile);
675 return 1;
676 }
677
678 /* Stream file & update hash */
679 hashValue = XSUM_hashStream(inFile, hashType, buffer, blockSize);
680
681 fclose(inFile);
682 free(buffer);
683 }
684
685 /* display Hash value in selected format */
686 switch(hashType)
687 {
688 case algo_xxh32:
689 { XXH32_canonical_t hcbe32;
690 (void)XXH32_canonicalFromHash(&hcbe32, hashValue.xxh32);
691 f_displayLine(fileName, &hcbe32, hashType);
692 break;
693 }
694 case algo_xxh64:
695 { XXH64_canonical_t hcbe64;
696 (void)XXH64_canonicalFromHash(&hcbe64, hashValue.xxh64);
697 f_displayLine(fileName, &hcbe64, hashType);
698 break;
699 }
700 case algo_xxh128:
701 { XXH128_canonical_t hcbe128;
702 (void)XXH128_canonicalFromHash(&hcbe128, hashValue.xxh128);
703 f_displayLine(fileName, &hcbe128, hashType);
704 break;
705 }
706 default:
707 assert(0); /* not possible */
708 }
709
710 return 0;
711 }
712
713
714 /*
715 * XSUM_hashFiles:
716 * If fnTotal==0, read from stdin instead.
717 */
XSUM_hashFiles(char * const * fnList,int fnTotal,AlgoSelected hashType,Display_endianess displayEndianess,Display_convention convention)718 static int XSUM_hashFiles(char*const * fnList, int fnTotal,
719 AlgoSelected hashType,
720 Display_endianess displayEndianess,
721 Display_convention convention)
722 {
723 int fnNb;
724 int result = 0;
725
726 if (fnTotal==0)
727 return XSUM_hashFile(stdinName, hashType, displayEndianess, convention);
728
729 for (fnNb=0; fnNb<fnTotal; fnNb++)
730 result |= XSUM_hashFile(fnList[fnNb], hashType, displayEndianess, convention);
731 XSUM_logVerbose(2, "\r%70s\r", "");
732 return result;
733 }
734
735
736 typedef enum {
737 GetLine_ok,
738 GetLine_eof,
739 GetLine_exceedMaxLineLength,
740 GetLine_outOfMemory
741 } GetLineResult;
742
743 typedef enum {
744 CanonicalFromString_ok,
745 CanonicalFromString_invalidFormat
746 } CanonicalFromStringResult;
747
748 typedef enum {
749 ParseLine_ok,
750 ParseLine_invalidFormat
751 } ParseLineResult;
752
753 typedef enum {
754 LineStatus_hashOk,
755 LineStatus_hashFailed,
756 LineStatus_failedToOpen
757 } LineStatus;
758
759 typedef union {
760 XXH32_canonical_t xxh32;
761 XXH64_canonical_t xxh64;
762 XXH128_canonical_t xxh128;
763 } Canonical;
764
765 typedef struct {
766 Canonical canonical;
767 const char* filename;
768 int xxhBits; /* canonical type: 32:xxh32, 64:xxh64, 128:xxh128 */
769 } ParsedLine;
770
771 typedef struct {
772 unsigned long nProperlyFormattedLines;
773 unsigned long nImproperlyFormattedLines;
774 unsigned long nMismatchedChecksums;
775 unsigned long nOpenOrReadFailures;
776 unsigned long nMixedFormatLines;
777 int quit;
778 } ParseFileReport;
779
780 typedef struct {
781 const char* inFileName;
782 FILE* inFile;
783 int lineMax;
784 char* lineBuf;
785 size_t blockSize;
786 char* blockBuf;
787 XSUM_U32 strictMode;
788 XSUM_U32 statusOnly;
789 XSUM_U32 warn;
790 XSUM_U32 quiet;
791 ParseFileReport report;
792 } ParseFileArg;
793
794
795 /*
796 * Reads a line from stream `inFile`.
797 * Returns GetLine_ok, if it reads line successfully.
798 * Returns GetLine_eof, if stream reaches EOF.
799 * Returns GetLine_exceedMaxLineLength, if line length is longer than MAX_LINE_LENGTH.
800 * Returns GetLine_outOfMemory, if line buffer memory allocation failed.
801 */
XSUM_getLine(char ** lineBuf,int * lineMax,FILE * inFile)802 static GetLineResult XSUM_getLine(char** lineBuf, int* lineMax, FILE* inFile)
803 {
804 GetLineResult result = GetLine_ok;
805 size_t len = 0;
806
807 if ((*lineBuf == NULL) || (*lineMax<1)) {
808 free(*lineBuf); /* in case it's != NULL */
809 *lineMax = 0;
810 *lineBuf = (char*)malloc(DEFAULT_LINE_LENGTH);
811 if(*lineBuf == NULL) return GetLine_outOfMemory;
812 *lineMax = DEFAULT_LINE_LENGTH;
813 }
814
815 for (;;) {
816 const int c = fgetc(inFile);
817 if (c == EOF) {
818 /*
819 * If we meet EOF before first character, returns GetLine_eof,
820 * otherwise GetLine_ok.
821 */
822 if (len == 0) result = GetLine_eof;
823 break;
824 }
825
826 /* Make enough space for len+1 (for final NUL) bytes. */
827 if (len+1 >= (size_t)*lineMax) {
828 char* newLineBuf = NULL;
829 size_t newBufSize = (size_t)*lineMax;
830
831 newBufSize += (newBufSize/2) + 1; /* x 1.5 */
832 if (newBufSize > MAX_LINE_LENGTH) newBufSize = MAX_LINE_LENGTH;
833 if (len+1 >= newBufSize) return GetLine_exceedMaxLineLength;
834
835 newLineBuf = (char*) realloc(*lineBuf, newBufSize);
836 if (newLineBuf == NULL) return GetLine_outOfMemory;
837
838 *lineBuf = newLineBuf;
839 *lineMax = (int)newBufSize;
840 }
841
842 if (c == '\n') break;
843 (*lineBuf)[len++] = (char) c;
844 }
845
846 (*lineBuf)[len] = '\0';
847 return result;
848 }
849
850
851 /*
852 * Converts one hexadecimal character to integer.
853 * Returns -1 if the given character is not hexadecimal.
854 */
charToHex(char c)855 static int charToHex(char c)
856 {
857 int result = -1;
858 if (c >= '0' && c <= '9') {
859 result = (int) (c - '0');
860 } else if (c >= 'A' && c <= 'F') {
861 result = (int) (c - 'A') + 0x0a;
862 } else if (c >= 'a' && c <= 'f') {
863 result = (int) (c - 'a') + 0x0a;
864 }
865 return result;
866 }
867
868
869 /*
870 * Converts canonical ASCII hexadecimal string `hashStr`
871 * to the big endian binary representation in unsigned char array `dst`.
872 *
873 * Returns CanonicalFromString_invalidFormat if hashStr is not well formatted.
874 * Returns CanonicalFromString_ok if hashStr is parsed successfully.
875 */
XSUM_canonicalFromString(unsigned char * dst,size_t dstSize,const char * hashStr,int reverseBytes)876 static CanonicalFromStringResult XSUM_canonicalFromString(unsigned char* dst,
877 size_t dstSize,
878 const char* hashStr,
879 int reverseBytes)
880 {
881 size_t i;
882 for (i = 0; i < dstSize; ++i) {
883 int h0, h1;
884 size_t j = reverseBytes ? dstSize - i - 1 : i;
885
886 h0 = charToHex(hashStr[j*2 + 0]);
887 if (h0 < 0) return CanonicalFromString_invalidFormat;
888
889 h1 = charToHex(hashStr[j*2 + 1]);
890 if (h1 < 0) return CanonicalFromString_invalidFormat;
891
892 dst[i] = (unsigned char) ((h0 << 4) | h1);
893 }
894 return CanonicalFromString_ok;
895 }
896
897
898 /*
899 * Parse single line of xxHash checksum file.
900 * Returns ParseLine_invalidFormat if the line is not well formatted.
901 * Returns ParseLine_ok if the line is parsed successfully.
902 * And members of XSUM_parseLine will be filled by parsed values.
903 *
904 * - line must be terminated with '\0' without a trailing newline.
905 * - Since parsedLine.filename will point within given argument `line`,
906 * users must keep `line`s content when they are using parsedLine.
907 * - The line may be modified to carve up the information it contains.
908 *
909 * xxHash checksum lines should have the following format:
910 *
911 * <8, 16, or 32 hexadecimal char> <space> <space> <filename...> <'\0'>
912 *
913 * or:
914 *
915 * <algorithm> <' ('> <filename> <') = '> <hexstring> <'\0'>
916 */
XSUM_parseLine(ParsedLine * parsedLine,char * line,int rev)917 static ParseLineResult XSUM_parseLine(ParsedLine* parsedLine, char* line, int rev)
918 {
919 char* const firstSpace = strchr(line, ' ');
920 const char* hash_ptr;
921 size_t hash_len;
922
923 parsedLine->filename = NULL;
924 parsedLine->xxhBits = 0;
925
926 if (firstSpace == NULL || !firstSpace[1]) return ParseLine_invalidFormat;
927
928 if (firstSpace[1] == '(') {
929 char* lastSpace = strrchr(line, ' ');
930 if (lastSpace - firstSpace < 5) return ParseLine_invalidFormat;
931 if (lastSpace[-1] != '=' || lastSpace[-2] != ' ' || lastSpace[-3] != ')') return ParseLine_invalidFormat;
932 lastSpace[-3] = '\0'; /* Terminate the filename */
933 *firstSpace = '\0';
934 rev = strstr(line, "_LE") != NULL; /* was output little-endian */
935 hash_ptr = lastSpace + 1;
936 hash_len = strlen(hash_ptr);
937 /* NOTE: This currently ignores the hash description at the start of the string.
938 * In the future we should parse it and verify that it matches the hash length.
939 * It could also be used to allow both XXH64 & XXH3_64bits to be differentiated. */
940 } else {
941 hash_ptr = line;
942 hash_len = (size_t)(firstSpace - line);
943 }
944
945 switch (hash_len)
946 {
947 case 8:
948 { XXH32_canonical_t* xxh32c = &parsedLine->canonical.xxh32;
949 if (XSUM_canonicalFromString(xxh32c->digest, sizeof(xxh32c->digest), hash_ptr, rev)
950 != CanonicalFromString_ok) {
951 return ParseLine_invalidFormat;
952 }
953 parsedLine->xxhBits = 32;
954 break;
955 }
956
957 case 16:
958 { XXH64_canonical_t* xxh64c = &parsedLine->canonical.xxh64;
959 if (XSUM_canonicalFromString(xxh64c->digest, sizeof(xxh64c->digest), hash_ptr, rev)
960 != CanonicalFromString_ok) {
961 return ParseLine_invalidFormat;
962 }
963 parsedLine->xxhBits = 64;
964 break;
965 }
966
967 case 32:
968 { XXH128_canonical_t* xxh128c = &parsedLine->canonical.xxh128;
969 if (XSUM_canonicalFromString(xxh128c->digest, sizeof(xxh128c->digest), hash_ptr, rev)
970 != CanonicalFromString_ok) {
971 return ParseLine_invalidFormat;
972 }
973 parsedLine->xxhBits = 128;
974 break;
975 }
976
977 default:
978 return ParseLine_invalidFormat;
979 break;
980 }
981
982 /* note : skipping second separation character, which can be anything,
983 * allowing insertion of custom markers such as '*' */
984 parsedLine->filename = firstSpace + 2;
985 return ParseLine_ok;
986 }
987
988
989 /*!
990 * Parse xxHash checksum file.
991 */
XSUM_parseFile1(ParseFileArg * XSUM_parseFileArg,int rev)992 static void XSUM_parseFile1(ParseFileArg* XSUM_parseFileArg, int rev)
993 {
994 const char* const inFileName = XSUM_parseFileArg->inFileName;
995 ParseFileReport* const report = &XSUM_parseFileArg->report;
996
997 unsigned long lineNumber = 0;
998 memset(report, 0, sizeof(*report));
999
1000 while (!report->quit) {
1001 LineStatus lineStatus = LineStatus_hashFailed;
1002 ParsedLine parsedLine;
1003 memset(&parsedLine, 0, sizeof(parsedLine));
1004
1005 lineNumber++;
1006 if (lineNumber == 0) {
1007 /* This is unlikely happen, but md5sum.c has this error check. */
1008 XSUM_log("%s: Error: Too many checksum lines\n", inFileName);
1009 report->quit = 1;
1010 break;
1011 }
1012
1013 { GetLineResult const XSUM_getLineResult = XSUM_getLine(&XSUM_parseFileArg->lineBuf,
1014 &XSUM_parseFileArg->lineMax,
1015 XSUM_parseFileArg->inFile);
1016 if (XSUM_getLineResult != GetLine_ok) {
1017 if (XSUM_getLineResult == GetLine_eof) break;
1018
1019 switch (XSUM_getLineResult)
1020 {
1021 case GetLine_ok:
1022 case GetLine_eof:
1023 /* These cases never happen. See above XSUM_getLineResult related "if"s.
1024 They exist just for make gcc's -Wswitch-enum happy. */
1025 assert(0);
1026 break;
1027
1028 default:
1029 XSUM_log("%s:%lu: Error: Unknown error.\n", inFileName, lineNumber);
1030 break;
1031
1032 case GetLine_exceedMaxLineLength:
1033 XSUM_log("%s:%lu: Error: Line too long.\n", inFileName, lineNumber);
1034 break;
1035
1036 case GetLine_outOfMemory:
1037 XSUM_log("%s:%lu: Error: Out of memory.\n", inFileName, lineNumber);
1038 break;
1039 }
1040 report->quit = 1;
1041 break;
1042 } }
1043
1044 if (XSUM_parseLine(&parsedLine, XSUM_parseFileArg->lineBuf, rev) != ParseLine_ok) {
1045 report->nImproperlyFormattedLines++;
1046 if (XSUM_parseFileArg->warn) {
1047 XSUM_log("%s:%lu: Error: Improperly formatted checksum line.\n",
1048 inFileName, lineNumber);
1049 }
1050 continue;
1051 }
1052
1053 report->nProperlyFormattedLines++;
1054
1055 do {
1056 FILE* const fp = XSUM_fopen(parsedLine.filename, "rb");
1057 if (fp == NULL) {
1058 lineStatus = LineStatus_failedToOpen;
1059 break;
1060 }
1061 lineStatus = LineStatus_hashFailed;
1062 switch (parsedLine.xxhBits)
1063 {
1064 case 32:
1065 { Multihash const xxh = XSUM_hashStream(fp, algo_xxh32, XSUM_parseFileArg->blockBuf, XSUM_parseFileArg->blockSize);
1066 if (xxh.xxh32 == XXH32_hashFromCanonical(&parsedLine.canonical.xxh32)) {
1067 lineStatus = LineStatus_hashOk;
1068 } }
1069 break;
1070
1071 case 64:
1072 { Multihash const xxh = XSUM_hashStream(fp, algo_xxh64, XSUM_parseFileArg->blockBuf, XSUM_parseFileArg->blockSize);
1073 if (xxh.xxh64 == XXH64_hashFromCanonical(&parsedLine.canonical.xxh64)) {
1074 lineStatus = LineStatus_hashOk;
1075 } }
1076 break;
1077
1078 case 128:
1079 { Multihash const xxh = XSUM_hashStream(fp, algo_xxh128, XSUM_parseFileArg->blockBuf, XSUM_parseFileArg->blockSize);
1080 if (XXH128_isEqual(xxh.xxh128, XXH128_hashFromCanonical(&parsedLine.canonical.xxh128))) {
1081 lineStatus = LineStatus_hashOk;
1082 } }
1083 break;
1084
1085 default:
1086 break;
1087 }
1088 fclose(fp);
1089 } while (0);
1090
1091 switch (lineStatus)
1092 {
1093 default:
1094 XSUM_log("%s: Error: Unknown error.\n", inFileName);
1095 report->quit = 1;
1096 break;
1097
1098 case LineStatus_failedToOpen:
1099 report->nOpenOrReadFailures++;
1100 if (!XSUM_parseFileArg->statusOnly) {
1101 XSUM_output("%s:%lu: Could not open or read '%s': %s.\n",
1102 inFileName, lineNumber, parsedLine.filename, strerror(errno));
1103 }
1104 break;
1105
1106 case LineStatus_hashOk:
1107 case LineStatus_hashFailed:
1108 { int b = 1;
1109 if (lineStatus == LineStatus_hashOk) {
1110 /* If --quiet is specified, don't display "OK" */
1111 if (XSUM_parseFileArg->quiet) b = 0;
1112 } else {
1113 report->nMismatchedChecksums++;
1114 }
1115
1116 if (b && !XSUM_parseFileArg->statusOnly) {
1117 XSUM_output("%s: %s\n", parsedLine.filename
1118 , lineStatus == LineStatus_hashOk ? "OK" : "FAILED");
1119 } }
1120 break;
1121 }
1122 } /* while (!report->quit) */
1123 }
1124
1125
1126 /* Parse xxHash checksum file.
1127 * Returns 1, if all procedures were succeeded.
1128 * Returns 0, if any procedures was failed.
1129 *
1130 * If strictMode != 0, return error code if any line is invalid.
1131 * If statusOnly != 0, don't generate any output.
1132 * If warn != 0, print a warning message to stderr.
1133 * If quiet != 0, suppress "OK" line.
1134 *
1135 * "All procedures are succeeded" means:
1136 * - Checksum file contains at least one line and less than SIZE_T_MAX lines.
1137 * - All files are properly opened and read.
1138 * - All hash values match with its content.
1139 * - (strict mode) All lines in checksum file are consistent and well formatted.
1140 */
XSUM_checkFile(const char * inFileName,const Display_endianess displayEndianess,XSUM_U32 strictMode,XSUM_U32 statusOnly,XSUM_U32 warn,XSUM_U32 quiet)1141 static int XSUM_checkFile(const char* inFileName,
1142 const Display_endianess displayEndianess,
1143 XSUM_U32 strictMode,
1144 XSUM_U32 statusOnly,
1145 XSUM_U32 warn,
1146 XSUM_U32 quiet)
1147 {
1148 int result = 0;
1149 FILE* inFile = NULL;
1150 ParseFileArg XSUM_parseFileArgBody;
1151 ParseFileArg* const XSUM_parseFileArg = &XSUM_parseFileArgBody;
1152 ParseFileReport* const report = &XSUM_parseFileArg->report;
1153
1154 /* note: stdinName is special constant pointer. It is not a string. */
1155 if (inFileName == stdinName) {
1156 /*
1157 * Note: Since we expect text input for xxhash -c mode,
1158 * we don't set binary mode for stdin.
1159 */
1160 inFileName = "stdin";
1161 inFile = stdin;
1162 } else {
1163 inFile = XSUM_fopen( inFileName, "rt" );
1164 }
1165
1166 if (inFile == NULL) {
1167 XSUM_log("Error: Could not open '%s': %s\n", inFileName, strerror(errno));
1168 return 0;
1169 }
1170
1171 XSUM_parseFileArg->inFileName = inFileName;
1172 XSUM_parseFileArg->inFile = inFile;
1173 XSUM_parseFileArg->lineMax = DEFAULT_LINE_LENGTH;
1174 XSUM_parseFileArg->lineBuf = (char*) malloc((size_t)XSUM_parseFileArg->lineMax);
1175 XSUM_parseFileArg->blockSize = 64 * 1024;
1176 XSUM_parseFileArg->blockBuf = (char*) malloc(XSUM_parseFileArg->blockSize);
1177 XSUM_parseFileArg->strictMode = strictMode;
1178 XSUM_parseFileArg->statusOnly = statusOnly;
1179 XSUM_parseFileArg->warn = warn;
1180 XSUM_parseFileArg->quiet = quiet;
1181
1182 if ( (XSUM_parseFileArg->lineBuf == NULL)
1183 || (XSUM_parseFileArg->blockBuf == NULL) ) {
1184 XSUM_log("Error: : memory allocation failed \n");
1185 exit(1);
1186 }
1187 XSUM_parseFile1(XSUM_parseFileArg, displayEndianess != big_endian);
1188
1189 free(XSUM_parseFileArg->blockBuf);
1190 free(XSUM_parseFileArg->lineBuf);
1191
1192 if (inFile != stdin) fclose(inFile);
1193
1194 /* Show error/warning messages. All messages are copied from md5sum.c
1195 */
1196 if (report->nProperlyFormattedLines == 0) {
1197 XSUM_log("%s: no properly formatted xxHash checksum lines found\n", inFileName);
1198 } else if (!statusOnly) {
1199 if (report->nImproperlyFormattedLines) {
1200 XSUM_output("%lu %s improperly formatted\n"
1201 , report->nImproperlyFormattedLines
1202 , report->nImproperlyFormattedLines == 1 ? "line is" : "lines are");
1203 }
1204 if (report->nOpenOrReadFailures) {
1205 XSUM_output("%lu listed %s could not be read\n"
1206 , report->nOpenOrReadFailures
1207 , report->nOpenOrReadFailures == 1 ? "file" : "files");
1208 }
1209 if (report->nMismatchedChecksums) {
1210 XSUM_output("%lu computed %s did NOT match\n"
1211 , report->nMismatchedChecksums
1212 , report->nMismatchedChecksums == 1 ? "checksum" : "checksums");
1213 } }
1214
1215 /* Result (exit) code logic is copied from
1216 * gnu coreutils/src/md5sum.c digest_check() */
1217 result = report->nProperlyFormattedLines != 0
1218 && report->nMismatchedChecksums == 0
1219 && report->nOpenOrReadFailures == 0
1220 && (!strictMode || report->nImproperlyFormattedLines == 0)
1221 && report->quit == 0;
1222 return result;
1223 }
1224
1225
XSUM_checkFiles(char * const * fnList,int fnTotal,const Display_endianess displayEndianess,XSUM_U32 strictMode,XSUM_U32 statusOnly,XSUM_U32 warn,XSUM_U32 quiet)1226 static int XSUM_checkFiles(char*const* fnList, int fnTotal,
1227 const Display_endianess displayEndianess,
1228 XSUM_U32 strictMode,
1229 XSUM_U32 statusOnly,
1230 XSUM_U32 warn,
1231 XSUM_U32 quiet)
1232 {
1233 int ok = 1;
1234
1235 /* Special case for stdinName "-",
1236 * note: stdinName is not a string. It's special pointer. */
1237 if (fnTotal==0) {
1238 ok &= XSUM_checkFile(stdinName, displayEndianess, strictMode, statusOnly, warn, quiet);
1239 } else {
1240 int fnNb;
1241 for (fnNb=0; fnNb<fnTotal; fnNb++)
1242 ok &= XSUM_checkFile(fnList[fnNb], displayEndianess, strictMode, statusOnly, warn, quiet);
1243 }
1244 return ok ? 0 : 1;
1245 }
1246
1247
1248 /* ********************************************************
1249 * Main
1250 **********************************************************/
1251
XSUM_usage(const char * exename)1252 static int XSUM_usage(const char* exename)
1253 {
1254 XSUM_log( WELCOME_MESSAGE(exename) );
1255 XSUM_log( "Print or verify checksums using fast non-cryptographic algorithm xxHash \n\n" );
1256 XSUM_log( "Usage: %s [options] [files] \n\n", exename);
1257 XSUM_log( "When no filename provided or when '-' is provided, uses stdin as input. \n");
1258 XSUM_log( "Options: \n");
1259 XSUM_log( " -H# algorithm selection: 0,1,2 or 32,64,128 (default: %i) \n", (int)g_defaultAlgo);
1260 XSUM_log( " -c, --check read xxHash checksum from [files] and check them \n");
1261 XSUM_log( " -h, --help display a long help page about advanced options \n");
1262 return 0;
1263 }
1264
1265
XSUM_usage_advanced(const char * exename)1266 static int XSUM_usage_advanced(const char* exename)
1267 {
1268 XSUM_usage(exename);
1269 XSUM_log( "Advanced :\n");
1270 XSUM_log( " -V, --version Display version information \n");
1271 XSUM_log( " --tag Produce BSD-style checksum lines \n");
1272 XSUM_log( " --little-endian Checksum values use little endian convention (default: big endian) \n");
1273 XSUM_log( " -b Run benchmark \n");
1274 XSUM_log( " -b# Bench only algorithm variant # \n");
1275 XSUM_log( " -i# Number of times to run the benchmark (default: %u) \n", (unsigned)g_nbIterations);
1276 XSUM_log( " -q, --quiet Don't display version header in benchmark mode \n");
1277 XSUM_log( "\n");
1278 XSUM_log( "The following four options are useful only when verifying checksums (-c): \n");
1279 XSUM_log( " -q, --quiet Don't print OK for each successfully verified file \n");
1280 XSUM_log( " --status Don't output anything, status code shows success \n");
1281 XSUM_log( " --strict Exit non-zero for improperly formatted checksum lines \n");
1282 XSUM_log( " --warn Warn about improperly formatted checksum lines \n");
1283 return 0;
1284 }
1285
XSUM_badusage(const char * exename)1286 static int XSUM_badusage(const char* exename)
1287 {
1288 XSUM_log("Wrong parameters\n\n");
1289 XSUM_usage(exename);
1290 return 1;
1291 }
1292
errorOut(const char * msg)1293 static void errorOut(const char* msg)
1294 {
1295 XSUM_log("%s \n", msg);
1296 exit(1);
1297 }
1298
XSUM_lastNameFromPath(const char * path)1299 static const char* XSUM_lastNameFromPath(const char* path)
1300 {
1301 const char* name = path;
1302 if (strrchr(name, '/')) name = strrchr(name, '/') + 1;
1303 if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */
1304 return name;
1305 }
1306
1307 /*!
1308 * XSUM_readU32FromCharChecked():
1309 * @return 0 if success, and store the result in *value.
1310 * Allows and interprets K, KB, KiB, M, MB and MiB suffix.
1311 * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
1312 * @return 1 if an overflow error occurs
1313 */
XSUM_readU32FromCharChecked(const char ** stringPtr,XSUM_U32 * value)1314 static int XSUM_readU32FromCharChecked(const char** stringPtr, XSUM_U32* value)
1315 {
1316 static const XSUM_U32 max = (((XSUM_U32)(-1)) / 10) - 1;
1317 XSUM_U32 result = 0;
1318 while ((**stringPtr >='0') && (**stringPtr <='9')) {
1319 if (result > max) return 1; /* overflow error */
1320 result *= 10;
1321 result += (XSUM_U32)(**stringPtr - '0');
1322 (*stringPtr)++ ;
1323 }
1324 if ((**stringPtr=='K') || (**stringPtr=='M')) {
1325 XSUM_U32 const maxK = ((XSUM_U32)(-1)) >> 10;
1326 if (result > maxK) return 1; /* overflow error */
1327 result <<= 10;
1328 if (**stringPtr=='M') {
1329 if (result > maxK) return 1; /* overflow error */
1330 result <<= 10;
1331 }
1332 (*stringPtr)++; /* skip `K` or `M` */
1333 if (**stringPtr=='i') (*stringPtr)++;
1334 if (**stringPtr=='B') (*stringPtr)++;
1335 }
1336 *value = result;
1337 return 0;
1338 }
1339
1340 /*!
1341 * XSUM_readU32FromChar():
1342 * @return: unsigned integer value read from input in `char` format.
1343 * allows and interprets K, KB, KiB, M, MB and MiB suffix.
1344 * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
1345 * Note: function will exit() program if digit sequence overflows
1346 */
XSUM_readU32FromChar(const char ** stringPtr)1347 static XSUM_U32 XSUM_readU32FromChar(const char** stringPtr) {
1348 XSUM_U32 result;
1349 if (XSUM_readU32FromCharChecked(stringPtr, &result)) {
1350 static const char errorMsg[] = "Error: numeric value too large";
1351 errorOut(errorMsg);
1352 }
1353 return result;
1354 }
1355
XSUM_main(int argc,char * argv[])1356 XSUM_API int XSUM_main(int argc, char* argv[])
1357 {
1358 int i, filenamesStart = 0;
1359 const char* const exename = XSUM_lastNameFromPath(argv[0]);
1360 XSUM_U32 benchmarkMode = 0;
1361 XSUM_U32 fileCheckMode = 0;
1362 XSUM_U32 strictMode = 0;
1363 XSUM_U32 statusOnly = 0;
1364 XSUM_U32 warn = 0;
1365 int explicitStdin = 0;
1366 XSUM_U32 selectBenchIDs= 0; /* 0 == use default k_testIDs_default, kBenchAll == bench all */
1367 static const XSUM_U32 kBenchAll = 99;
1368 size_t keySize = XSUM_DEFAULT_SAMPLE_SIZE;
1369 AlgoSelected algo = g_defaultAlgo;
1370 Display_endianess displayEndianess = big_endian;
1371 Display_convention convention = display_gnu;
1372
1373 /* special case: xxhNNsum default to NN bits checksum */
1374 if (strstr(exename, "xxh32sum") != NULL) algo = g_defaultAlgo = algo_xxh32;
1375 if (strstr(exename, "xxh64sum") != NULL) algo = g_defaultAlgo = algo_xxh64;
1376 if (strstr(exename, "xxh128sum") != NULL) algo = g_defaultAlgo = algo_xxh128;
1377
1378 for (i=1; i<argc; i++) {
1379 const char* argument = argv[i];
1380 assert(argument != NULL);
1381
1382 if (!strcmp(argument, "--check")) { fileCheckMode = 1; continue; }
1383 if (!strcmp(argument, "--benchmark-all")) { benchmarkMode = 1; selectBenchIDs = kBenchAll; continue; }
1384 if (!strcmp(argument, "--bench-all")) { benchmarkMode = 1; selectBenchIDs = kBenchAll; continue; }
1385 if (!strcmp(argument, "--quiet")) { XSUM_logLevel--; continue; }
1386 if (!strcmp(argument, "--little-endian")) { displayEndianess = little_endian; continue; }
1387 if (!strcmp(argument, "--strict")) { strictMode = 1; continue; }
1388 if (!strcmp(argument, "--status")) { statusOnly = 1; continue; }
1389 if (!strcmp(argument, "--warn")) { warn = 1; continue; }
1390 if (!strcmp(argument, "--help")) { return XSUM_usage_advanced(exename); }
1391 if (!strcmp(argument, "--version")) { XSUM_log(FULL_WELCOME_MESSAGE(exename)); XSUM_sanityCheck(); return 0; }
1392 if (!strcmp(argument, "--tag")) { convention = display_bsd; continue; }
1393
1394 if (!strcmp(argument, "--")) {
1395 if (filenamesStart==0 && i!=argc-1) filenamesStart=i+1; /* only supports a continuous list of filenames */
1396 break; /* treat rest of arguments as strictly file names */
1397 }
1398 if (*argument != '-') {
1399 if (filenamesStart==0) filenamesStart=i; /* only supports a continuous list of filenames */
1400 break; /* treat rest of arguments as strictly file names */
1401 }
1402
1403 /* command selection */
1404 argument++; /* note: *argument=='-' */
1405 if (*argument == 0) explicitStdin = 1;
1406
1407 while (*argument != 0) {
1408 switch(*argument)
1409 {
1410 /* Display version */
1411 case 'V':
1412 XSUM_log(FULL_WELCOME_MESSAGE(exename)); return 0;
1413
1414 /* Display help on XSUM_usage */
1415 case 'h':
1416 return XSUM_usage_advanced(exename);
1417
1418 /* select hash algorithm */
1419 case 'H': argument++;
1420 switch(XSUM_readU32FromChar(&argument)) {
1421 case 0 :
1422 case 32: algo = algo_xxh32; break;
1423 case 1 :
1424 case 64: algo = algo_xxh64; break;
1425 case 2 :
1426 case 128: algo = algo_xxh128; break;
1427 default:
1428 return XSUM_badusage(exename);
1429 }
1430 break;
1431
1432 /* File check mode */
1433 case 'c':
1434 fileCheckMode=1;
1435 argument++;
1436 break;
1437
1438 /* Warning mode (file check mode only, alias of "--warning") */
1439 case 'w':
1440 warn=1;
1441 argument++;
1442 break;
1443
1444 /* Trigger benchmark mode */
1445 case 'b':
1446 argument++;
1447 benchmarkMode = 1;
1448 do {
1449 if (*argument == ',') argument++;
1450 selectBenchIDs = XSUM_readU32FromChar(&argument); /* select one specific test */
1451 if (selectBenchIDs < NB_TESTFUNC) {
1452 g_testIDs[selectBenchIDs] = 1;
1453 } else
1454 selectBenchIDs = kBenchAll;
1455 } while (*argument == ',');
1456 break;
1457
1458 /* Modify Nb Iterations (benchmark only) */
1459 case 'i':
1460 argument++;
1461 g_nbIterations = XSUM_readU32FromChar(&argument);
1462 break;
1463
1464 /* Modify Block size (benchmark only) */
1465 case 'B':
1466 argument++;
1467 keySize = XSUM_readU32FromChar(&argument);
1468 break;
1469
1470 /* Modify verbosity of benchmark output (hidden option) */
1471 case 'q':
1472 argument++;
1473 XSUM_logLevel--;
1474 break;
1475
1476 default:
1477 return XSUM_badusage(exename);
1478 }
1479 }
1480 } /* for(i=1; i<argc; i++) */
1481
1482 /* Check benchmark mode */
1483 if (benchmarkMode) {
1484 XSUM_logVerbose(2, FULL_WELCOME_MESSAGE(exename) );
1485 XSUM_sanityCheck();
1486 if (selectBenchIDs == 0) memcpy(g_testIDs, k_testIDs_default, sizeof(g_testIDs));
1487 if (selectBenchIDs == kBenchAll) memset(g_testIDs, 1, sizeof(g_testIDs));
1488 if (filenamesStart==0) return XSUM_benchInternal(keySize);
1489 return XSUM_benchFiles(argv+filenamesStart, argc-filenamesStart);
1490 }
1491
1492 /* Check if input is defined as console; trigger an error in this case */
1493 if ( (filenamesStart==0) && XSUM_isConsole(stdin) && !explicitStdin)
1494 return XSUM_badusage(exename);
1495
1496 if (filenamesStart==0) filenamesStart = argc;
1497 if (fileCheckMode) {
1498 return XSUM_checkFiles(argv+filenamesStart, argc-filenamesStart,
1499 displayEndianess, strictMode, statusOnly, warn, (XSUM_logLevel < 2) /*quiet*/);
1500 } else {
1501 return XSUM_hashFiles(argv+filenamesStart, argc-filenamesStart, algo, displayEndianess, convention);
1502 }
1503 }
1504