• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * xxhsum - Command line interface for xxhash algorithms
3  * Copyright (C) 2013-2020 Yann Collet
4  *
5  * GPL v2 License
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * You can contact the author at:
22  *   - xxHash homepage: https://www.xxhash.com
23  *   - xxHash source repository: https://github.com/Cyan4973/xxHash
24  */
25 
26 #include "xsum_config.h"
27 #include "xsum_os_specific.h"
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <sys/types.h>  /* struct stat / __wstat64 */
32 #include <sys/stat.h>   /* stat() / _stat64() */
33 
34 /*
35  * This file contains all of the ugly boilerplate to make xxhsum work across
36  * platforms.
37  */
38 #if defined(_MSC_VER) || XSUM_WIN32_USE_WCHAR
39     typedef struct __stat64 XSUM_stat_t;
40 # if defined(_MSC_VER)
41     typedef int mode_t;
42 # endif
43 #else
44     typedef struct stat XSUM_stat_t;
45 #endif
46 
47 #if (defined(__linux__) && (XSUM_PLATFORM_POSIX_VERSION >= 1)) \
48  || (XSUM_PLATFORM_POSIX_VERSION >= 200112L) \
49  || defined(__DJGPP__) \
50  || defined(__MSYS__)
51 #  include <unistd.h>   /* isatty */
52 #  define XSUM_IS_CONSOLE(stdStream) isatty(fileno(stdStream))
53 #elif defined(MSDOS) || defined(OS2)
54 #  include <io.h>       /* _isatty */
55 #  define XSUM_IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
56 #elif defined(WIN32) || defined(_WIN32)
57 #  include <io.h>      /* _isatty */
58 #  include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
59 #  include <stdio.h>   /* FILE */
XSUM_IS_CONSOLE(FILE * stdStream)60 static __inline int XSUM_IS_CONSOLE(FILE* stdStream)
61 {
62     DWORD dummy;
63     return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy);
64 }
65 #else
66 #  define XSUM_IS_CONSOLE(stdStream) 0
67 #endif
68 
69 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
70 #  include <fcntl.h>   /* _O_BINARY */
71 #  include <io.h>      /* _setmode, _fileno, _get_osfhandle */
72 #  if !defined(__DJGPP__)
73 #    include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
74 #    include <winioctl.h> /* FSCTL_SET_SPARSE */
75 #    define XSUM_SET_BINARY_MODE(file) { int const unused=_setmode(_fileno(file), _O_BINARY); (void)unused; }
76 #  else
77 #    define XSUM_SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
78 #  endif
79 #else
80 #  define XSUM_SET_BINARY_MODE(file) ((void)file)
81 #endif
82 
XSUM_isConsole(FILE * stream)83 XSUM_API int XSUM_isConsole(FILE* stream)
84 {
85     return XSUM_IS_CONSOLE(stream);
86 }
87 
XSUM_setBinaryMode(FILE * stream)88 XSUM_API void XSUM_setBinaryMode(FILE* stream)
89 {
90     XSUM_SET_BINARY_MODE(stream);
91 }
92 
93 #if !XSUM_WIN32_USE_WCHAR
94 
XSUM_fopen(const char * filename,const char * mode)95 XSUM_API FILE* XSUM_fopen(const char* filename, const char* mode)
96 {
97     return fopen(filename, mode);
98 }
99 XSUM_ATTRIBUTE((__format__(__printf__, 2, 0)))
XSUM_vfprintf(FILE * stream,const char * format,va_list ap)100 XSUM_API int XSUM_vfprintf(FILE* stream, const char* format, va_list ap)
101 {
102     return vfprintf(stream, format, ap);
103 }
104 
XSUM_stat(const char * infilename,XSUM_stat_t * statbuf)105 static int XSUM_stat(const char* infilename, XSUM_stat_t* statbuf)
106 {
107 #if defined(_MSC_VER)
108     return _stat64(infilename, statbuf);
109 #else
110     return stat(infilename, statbuf);
111 #endif
112 }
113 
114 #ifndef XSUM_NO_MAIN
main(int argc,char * argv[])115 int main(int argc, char* argv[])
116 {
117     return XSUM_main(argc, argv);
118 }
119 #endif
120 
121 /* Unicode helpers for Windows to make UTF-8 act as it should. */
122 #else
123 #  include <windows.h>
124 #  include <wchar.h>
125 
126 /*****************************************************************************
127  *                       Unicode conversion tools
128  *****************************************************************************/
129 
130 /*
131  * Converts a UTF-8 string to UTF-16. Acts like strdup. The string must be freed afterwards.
132  * This version allows keeping the output length.
133  */
XSUM_widenString(const char * str,int * lenOut)134 static wchar_t* XSUM_widenString(const char* str, int* lenOut)
135 {
136     int const len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
137     if (lenOut != NULL) *lenOut = len;
138     if (len == 0) return NULL;
139     {   wchar_t* buf = (wchar_t*)malloc((size_t)len * sizeof(wchar_t));
140         if (buf != NULL) {
141             if (MultiByteToWideChar(CP_UTF8, 0, str, -1, buf, len) == 0) {
142                 free(buf);
143                 return NULL;
144        }    }
145        return buf;
146     }
147 }
148 
149 /*
150  * Converts a UTF-16 string to UTF-8. Acts like strdup. The string must be freed afterwards.
151  * This version allows keeping the output length.
152  */
XSUM_narrowString(const wchar_t * str,int * lenOut)153 static char* XSUM_narrowString(const wchar_t *str, int *lenOut)
154 {
155     int len = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
156     if (lenOut != NULL) *lenOut = len;
157     if (len == 0) return NULL;
158     {   char* const buf = (char*)malloc((size_t)len * sizeof(char));
159         if (buf != NULL) {
160             if (WideCharToMultiByte(CP_UTF8, 0, str, -1, buf, len, NULL, NULL) == 0) {
161                 free(buf);
162                 return NULL;
163         }    }
164         return buf;
165     }
166 }
167 
168 
169 
170 /*****************************************************************************
171  *                             File helpers
172  *****************************************************************************/
173 /*
174  * fopen wrapper that supports UTF-8
175  *
176  * fopen will only accept ANSI filenames, which means that we can't open Unicode filenames.
177  *
178  * In order to open a Unicode filename, we need to convert filenames to UTF-16 and use _wfopen.
179  */
XSUM_fopen(const char * filename,const char * mode)180 XSUM_API FILE* XSUM_fopen(const char* filename, const char* mode)
181 {
182     FILE* f = NULL;
183     wchar_t* const wide_filename = XSUM_widenString(filename, NULL);
184     if (wide_filename != NULL) {
185         wchar_t* const wide_mode = XSUM_widenString(mode, NULL);
186         if (wide_mode != NULL) {
187             f = _wfopen(wide_filename, wide_mode);
188             free(wide_mode);
189         }
190         free(wide_filename);
191     }
192     return f;
193 }
194 
195 /*
196  * stat() wrapper which supports UTF-8 filenames.
197  */
XSUM_stat(const char * infilename,XSUM_stat_t * statbuf)198 static int XSUM_stat(const char* infilename, XSUM_stat_t* statbuf)
199 {
200     int r = -1;
201     wchar_t* const wide_filename = XSUM_widenString(infilename, NULL);
202     if (wide_filename != NULL) {
203         r = _wstat64(wide_filename, statbuf);
204         free(wide_filename);
205     }
206     return r;
207 }
208 
209 /*
210  * In case it isn't available, this is what MSVC 2019 defines in stdarg.h.
211  */
212 #if defined(_MSC_VER) && !defined(__clang__) && !defined(va_copy)
213 #  define XSUM_va_copy(destination, source) ((destination) = (source))
214 #else
215 #  define XSUM_va_copy(destination, source) va_copy(destination, source)
216 #endif
217 
218 /*
219  * vasprintf for Windows.
220  */
221 XSUM_ATTRIBUTE((__format__(__printf__, 2, 0)))
XSUM_vasprintf(char ** strp,const char * format,va_list ap)222 static int XSUM_vasprintf(char** strp, const char* format, va_list ap)
223 {
224     int ret;
225     int size;
226     va_list copy;
227     /*
228      * To be safe, make a va_copy.
229      *
230      * Note that Microsoft doesn't use va_copy in its sample code:
231      *   https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/vsprintf-vsprintf-l-vswprintf-vswprintf-l-vswprintf-l?view=vs-2019
232      */
233     XSUM_va_copy(copy, ap);
234     /* Calculate how many characters we need */
235     size = _vscprintf(format, ap);
236     va_end(copy);
237 
238     if (size < 0) {
239         *strp = NULL;
240         return size;
241     } else {
242         *strp = (char*) malloc((size_t)size + 1);
243         if (*strp == NULL) {
244             return -1;
245         }
246         /* vsprintf into the new buffer */
247         ret = vsprintf(*strp, format, ap);
248         if (ret < 0) {
249             free(*strp);
250             *strp = NULL;
251         }
252         return ret;
253     }
254 }
255 
256 /*
257  * fprintf wrapper that supports UTF-8.
258  *
259  * fprintf doesn't properly handle Unicode on Windows.
260  *
261  * Additionally, it is codepage sensitive on console and may crash the program.
262  *
263  * Instead, we use vsnprintf, and either print with fwrite or convert to UTF-16
264  * for console output and use the codepage-independent WriteConsoleW.
265  *
266  * Credit to t-mat: https://github.com/t-mat/xxHash/commit/5691423
267  */
268 XSUM_ATTRIBUTE((__format__(__printf__, 2, 0)))
XSUM_vfprintf(FILE * stream,const char * format,va_list ap)269 XSUM_API int XSUM_vfprintf(FILE *stream, const char *format, va_list ap)
270 {
271     int result;
272     char* u8_str = NULL;
273 
274     /*
275      * Generate the UTF-8 output string with vasprintf.
276      */
277     result = XSUM_vasprintf(&u8_str, format, ap);
278 
279     if (result >= 0) {
280         const size_t nchar = (size_t)result + 1;
281 
282         /*
283          * Check if we are outputting to a console. Don't use XSUM_isConsole
284          * directly -- we don't need to call _get_osfhandle twice.
285          */
286         int fileNb = _fileno(stream);
287         intptr_t handle_raw = _get_osfhandle(fileNb);
288         HANDLE handle = (HANDLE)handle_raw;
289         DWORD dwTemp;
290 
291         if (handle_raw < 0) {
292              result = -1;
293         } else if (_isatty(fileNb) && GetConsoleMode(handle, &dwTemp)) {
294             /*
295              * Convert to UTF-16 and output with WriteConsoleW.
296              *
297              * This is codepage independent and works on Windows XP's default
298              * msvcrt.dll.
299              */
300             int len;
301             wchar_t* const u16_buf = XSUM_widenString(u8_str, &len);
302             if (u16_buf == NULL) {
303                 result = -1;
304             } else {
305                 if (WriteConsoleW(handle, u16_buf, (DWORD)len - 1, &dwTemp, NULL)) {
306                     result = (int)dwTemp;
307                 } else {
308                     result = -1;
309                 }
310                 free(u16_buf);
311             }
312         } else {
313             /* fwrite the UTF-8 string if we are printing to a file */
314             result = (int)fwrite(u8_str, 1, nchar - 1, stream);
315             if (result == 0) {
316                 result = -1;
317             }
318         }
319         free(u8_str);
320     }
321     return result;
322 }
323 
324 #ifndef XSUM_NO_MAIN
325 /*****************************************************************************
326  *                    Command Line argument parsing
327  *****************************************************************************/
328 
329 /* Converts a UTF-16 argv to UTF-8. */
XSUM_convertArgv(int argc,wchar_t * utf16_argv[])330 static char** XSUM_convertArgv(int argc, wchar_t* utf16_argv[])
331 {
332     char** const utf8_argv = (char**)malloc((size_t)(argc + 1) * sizeof(char*));
333     if (utf8_argv != NULL) {
334         int i;
335         for (i = 0; i < argc; i++) {
336             utf8_argv[i] = XSUM_narrowString(utf16_argv[i], NULL);
337             if (utf8_argv[i] == NULL) {
338                 /* Out of memory, whoops. */
339                 while (i-- > 0) {
340                     free(utf8_argv[i]);
341                 }
342                 free(utf8_argv);
343                 return NULL;
344             }
345         }
346         utf8_argv[argc] = NULL;
347     }
348     return utf8_argv;
349 }
350 /* Frees arguments returned by XSUM_convertArgv */
XSUM_freeArgv(int argc,char ** argv)351 static void XSUM_freeArgv(int argc, char** argv)
352 {
353     int i;
354     if (argv == NULL) {
355         return;
356     }
357     for (i = 0; i < argc; i++) {
358         free(argv[i]);
359     }
360     free(argv);
361 }
362 
XSUM_wmain(int argc,wchar_t * utf16_argv[])363 static int XSUM_wmain(int argc, wchar_t* utf16_argv[])
364 {
365     /* Convert the UTF-16 arguments to UTF-8. */
366     char** utf8_argv = XSUM_convertArgv(argc, utf16_argv);
367 
368     if (utf8_argv == NULL) {
369         /* An unfortunate but incredibly unlikely error. */
370         fprintf(stderr, "xxhsum: error converting command line arguments!\n");
371         abort();
372     } else {
373         int ret;
374 
375         /*
376          * MinGW's terminal uses full block buffering for stderr.
377          *
378          * This is nonstandard behavior and causes text to not display until
379          * the buffer fills.
380          *
381          * `setvbuf()` can easily correct this to make text display instantly.
382          */
383         setvbuf(stderr, NULL, _IONBF, 0);
384 
385         /* Call our real main function */
386         ret = XSUM_main(argc, utf8_argv);
387 
388         /* Cleanup */
389         XSUM_freeArgv(argc, utf8_argv);
390         return ret;
391     }
392 }
393 
394 #if XSUM_WIN32_USE_WMAIN
395 
396 /*
397  * The preferred method of obtaining the real UTF-16 arguments. Always works
398  * on MSVC, sometimes works on MinGW-w64 depending on the compiler flags.
399  */
400 #ifdef __cplusplus
401 extern "C"
402 #endif
wmain(int argc,wchar_t * utf16_argv[])403 int __cdecl wmain(int argc, wchar_t* utf16_argv[])
404 {
405     return XSUM_wmain(argc, utf16_argv);
406 }
407 #else /* !XSUM_WIN32_USE_WMAIN */
408 
409 /*
410  * Wrap `XSUM_wmain()` using `main()` and `__wgetmainargs()` on MinGW without
411  * Unicode support.
412  *
413  * `__wgetmainargs()` is used in the CRT startup to retrieve the arguments for
414  * `wmain()`, so we use it on MinGW to emulate `wmain()`.
415  *
416  * It is an internal function and not declared in any public headers, so we
417  * have to declare it manually.
418  *
419  * An alternative that doesn't mess with internal APIs is `GetCommandLineW()`
420  * with `CommandLineToArgvW()`, but the former doesn't expand wildcards and the
421  * latter requires linking to Shell32.dll and its numerous dependencies.
422  *
423  * This method keeps our dependencies to kernel32.dll and the CRT.
424  *
425  * https://docs.microsoft.com/en-us/cpp/c-runtime-library/getmainargs-wgetmainargs?view=vs-2019
426  */
427 typedef struct {
428     int newmode;
429 } _startupinfo;
430 
431 #ifdef __cplusplus
432 extern "C"
433 #endif
434 int __cdecl __wgetmainargs(
435     int*          Argc,
436     wchar_t***    Argv,
437     wchar_t***    Env,
438     int           DoWildCard,
439     _startupinfo* StartInfo
440 );
441 
main(int ansi_argc,char * ansi_argv[])442 int main(int ansi_argc, char* ansi_argv[])
443 {
444     int       utf16_argc;
445     wchar_t** utf16_argv;
446     wchar_t** utf16_envp;         /* Unused but required */
447     _startupinfo startinfo = {0}; /* 0 == don't change new mode */
448 
449     /* Get wmain's UTF-16 arguments. Make sure we expand wildcards. */
450     if (__wgetmainargs(&utf16_argc, &utf16_argv, &utf16_envp, 1, &startinfo) < 0)
451         /* In the very unlikely case of an error, use the ANSI arguments. */
452         return XSUM_main(ansi_argc, ansi_argv);
453 
454     /* Call XSUM_wmain with our UTF-16 arguments */
455     return XSUM_wmain(utf16_argc, utf16_argv);
456 }
457 
458 #endif /* !XSUM_WIN32_USE_WMAIN */
459 #endif /* !XSUM_NO_MAIN */
460 #endif /* XSUM_WIN32_USE_WCHAR */
461 
462 
463 /*
464  * Determines whether the file at filename is a directory.
465  */
XSUM_isDirectory(const char * filename)466 XSUM_API int XSUM_isDirectory(const char* filename)
467 {
468     XSUM_stat_t statbuf;
469     int r = XSUM_stat(filename, &statbuf);
470 #ifdef _MSC_VER
471     if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
472 #else
473     if (!r && S_ISDIR(statbuf.st_mode)) return 1;
474 #endif
475     return 0;
476 }
477 
478 /*
479  * Returns the filesize of the file at filename.
480  */
XSUM_getFileSize(const char * filename)481 XSUM_API XSUM_U64 XSUM_getFileSize(const char* filename)
482 {
483     XSUM_stat_t statbuf;
484     int r = XSUM_stat(filename, &statbuf);
485     if (r || !S_ISREG(statbuf.st_mode)) return 0;   /* No good... */
486     return (XSUM_U64)statbuf.st_size;
487 }
488