1 /*
2 * xxhsum - Command line interface for xxhash algorithms
3 * Copyright (C) 2013-2020 Yann Collet
4 *
5 * GPL v2 License
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * You can contact the author at:
22 * - xxHash homepage: https://www.xxhash.com
23 * - xxHash source repository: https://github.com/Cyan4973/xxHash
24 */
25
26 #include "xsum_config.h"
27 #include "xsum_os_specific.h"
28 #include <stdio.h>
29 #include <stdarg.h>
30 #include <stdlib.h>
31 #include <sys/types.h> /* struct stat / __wstat64 */
32 #include <sys/stat.h> /* stat() / _stat64() */
33
34 /*
35 * This file contains all of the ugly boilerplate to make xxhsum work across
36 * platforms.
37 */
38 #if defined(_MSC_VER) || XSUM_WIN32_USE_WCHAR
39 typedef struct __stat64 XSUM_stat_t;
40 # if defined(_MSC_VER)
41 typedef int mode_t;
42 # endif
43 #else
44 typedef struct stat XSUM_stat_t;
45 #endif
46
47 #if (defined(__linux__) && (XSUM_PLATFORM_POSIX_VERSION >= 1)) \
48 || (XSUM_PLATFORM_POSIX_VERSION >= 200112L) \
49 || defined(__DJGPP__) \
50 || defined(__MSYS__)
51 # include <unistd.h> /* isatty */
52 # define XSUM_IS_CONSOLE(stdStream) isatty(fileno(stdStream))
53 #elif defined(MSDOS) || defined(OS2)
54 # include <io.h> /* _isatty */
55 # define XSUM_IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
56 #elif defined(WIN32) || defined(_WIN32)
57 # include <io.h> /* _isatty */
58 # include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
59 # include <stdio.h> /* FILE */
XSUM_IS_CONSOLE(FILE * stdStream)60 static __inline int XSUM_IS_CONSOLE(FILE* stdStream)
61 {
62 DWORD dummy;
63 return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy);
64 }
65 #else
66 # define XSUM_IS_CONSOLE(stdStream) 0
67 #endif
68
69 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
70 # include <fcntl.h> /* _O_BINARY */
71 # include <io.h> /* _setmode, _fileno, _get_osfhandle */
72 # if !defined(__DJGPP__)
73 # include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
74 # include <winioctl.h> /* FSCTL_SET_SPARSE */
75 # define XSUM_SET_BINARY_MODE(file) { int const unused=_setmode(_fileno(file), _O_BINARY); (void)unused; }
76 # else
77 # define XSUM_SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
78 # endif
79 #else
80 # define XSUM_SET_BINARY_MODE(file) ((void)file)
81 #endif
82
XSUM_isConsole(FILE * stream)83 XSUM_API int XSUM_isConsole(FILE* stream)
84 {
85 return XSUM_IS_CONSOLE(stream);
86 }
87
XSUM_setBinaryMode(FILE * stream)88 XSUM_API void XSUM_setBinaryMode(FILE* stream)
89 {
90 XSUM_SET_BINARY_MODE(stream);
91 }
92
93 #if !XSUM_WIN32_USE_WCHAR
94
XSUM_fopen(const char * filename,const char * mode)95 XSUM_API FILE* XSUM_fopen(const char* filename, const char* mode)
96 {
97 return fopen(filename, mode);
98 }
99 XSUM_ATTRIBUTE((__format__(__printf__, 2, 0)))
XSUM_vfprintf(FILE * stream,const char * format,va_list ap)100 XSUM_API int XSUM_vfprintf(FILE* stream, const char* format, va_list ap)
101 {
102 return vfprintf(stream, format, ap);
103 }
104
XSUM_stat(const char * infilename,XSUM_stat_t * statbuf)105 static int XSUM_stat(const char* infilename, XSUM_stat_t* statbuf)
106 {
107 #if defined(_MSC_VER)
108 return _stat64(infilename, statbuf);
109 #else
110 return stat(infilename, statbuf);
111 #endif
112 }
113
114 #ifndef XSUM_NO_MAIN
main(int argc,char * argv[])115 int main(int argc, char* argv[])
116 {
117 return XSUM_main(argc, argv);
118 }
119 #endif
120
121 /* Unicode helpers for Windows to make UTF-8 act as it should. */
122 #else
123 # include <windows.h>
124 # include <wchar.h>
125
126 /*****************************************************************************
127 * Unicode conversion tools
128 *****************************************************************************/
129
130 /*
131 * Converts a UTF-8 string to UTF-16. Acts like strdup. The string must be freed afterwards.
132 * This version allows keeping the output length.
133 */
XSUM_widenString(const char * str,int * lenOut)134 static wchar_t* XSUM_widenString(const char* str, int* lenOut)
135 {
136 int const len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
137 if (lenOut != NULL) *lenOut = len;
138 if (len == 0) return NULL;
139 { wchar_t* buf = (wchar_t*)malloc((size_t)len * sizeof(wchar_t));
140 if (buf != NULL) {
141 if (MultiByteToWideChar(CP_UTF8, 0, str, -1, buf, len) == 0) {
142 free(buf);
143 return NULL;
144 } }
145 return buf;
146 }
147 }
148
149 /*
150 * Converts a UTF-16 string to UTF-8. Acts like strdup. The string must be freed afterwards.
151 * This version allows keeping the output length.
152 */
XSUM_narrowString(const wchar_t * str,int * lenOut)153 static char* XSUM_narrowString(const wchar_t *str, int *lenOut)
154 {
155 int len = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
156 if (lenOut != NULL) *lenOut = len;
157 if (len == 0) return NULL;
158 { char* const buf = (char*)malloc((size_t)len * sizeof(char));
159 if (buf != NULL) {
160 if (WideCharToMultiByte(CP_UTF8, 0, str, -1, buf, len, NULL, NULL) == 0) {
161 free(buf);
162 return NULL;
163 } }
164 return buf;
165 }
166 }
167
168
169
170 /*****************************************************************************
171 * File helpers
172 *****************************************************************************/
173 /*
174 * fopen wrapper that supports UTF-8
175 *
176 * fopen will only accept ANSI filenames, which means that we can't open Unicode filenames.
177 *
178 * In order to open a Unicode filename, we need to convert filenames to UTF-16 and use _wfopen.
179 */
XSUM_fopen(const char * filename,const char * mode)180 XSUM_API FILE* XSUM_fopen(const char* filename, const char* mode)
181 {
182 FILE* f = NULL;
183 wchar_t* const wide_filename = XSUM_widenString(filename, NULL);
184 if (wide_filename != NULL) {
185 wchar_t* const wide_mode = XSUM_widenString(mode, NULL);
186 if (wide_mode != NULL) {
187 f = _wfopen(wide_filename, wide_mode);
188 free(wide_mode);
189 }
190 free(wide_filename);
191 }
192 return f;
193 }
194
195 /*
196 * stat() wrapper which supports UTF-8 filenames.
197 */
XSUM_stat(const char * infilename,XSUM_stat_t * statbuf)198 static int XSUM_stat(const char* infilename, XSUM_stat_t* statbuf)
199 {
200 int r = -1;
201 wchar_t* const wide_filename = XSUM_widenString(infilename, NULL);
202 if (wide_filename != NULL) {
203 r = _wstat64(wide_filename, statbuf);
204 free(wide_filename);
205 }
206 return r;
207 }
208
209 /*
210 * In case it isn't available, this is what MSVC 2019 defines in stdarg.h.
211 */
212 #if defined(_MSC_VER) && !defined(__clang__) && !defined(va_copy)
213 # define XSUM_va_copy(destination, source) ((destination) = (source))
214 #else
215 # define XSUM_va_copy(destination, source) va_copy(destination, source)
216 #endif
217
218 /*
219 * vasprintf for Windows.
220 */
221 XSUM_ATTRIBUTE((__format__(__printf__, 2, 0)))
XSUM_vasprintf(char ** strp,const char * format,va_list ap)222 static int XSUM_vasprintf(char** strp, const char* format, va_list ap)
223 {
224 int ret;
225 int size;
226 va_list copy;
227 /*
228 * To be safe, make a va_copy.
229 *
230 * Note that Microsoft doesn't use va_copy in its sample code:
231 * https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/vsprintf-vsprintf-l-vswprintf-vswprintf-l-vswprintf-l?view=vs-2019
232 */
233 XSUM_va_copy(copy, ap);
234 /* Calculate how many characters we need */
235 size = _vscprintf(format, ap);
236 va_end(copy);
237
238 if (size < 0) {
239 *strp = NULL;
240 return size;
241 } else {
242 *strp = (char*) malloc((size_t)size + 1);
243 if (*strp == NULL) {
244 return -1;
245 }
246 /* vsprintf into the new buffer */
247 ret = vsprintf(*strp, format, ap);
248 if (ret < 0) {
249 free(*strp);
250 *strp = NULL;
251 }
252 return ret;
253 }
254 }
255
256 /*
257 * fprintf wrapper that supports UTF-8.
258 *
259 * fprintf doesn't properly handle Unicode on Windows.
260 *
261 * Additionally, it is codepage sensitive on console and may crash the program.
262 *
263 * Instead, we use vsnprintf, and either print with fwrite or convert to UTF-16
264 * for console output and use the codepage-independent WriteConsoleW.
265 *
266 * Credit to t-mat: https://github.com/t-mat/xxHash/commit/5691423
267 */
268 XSUM_ATTRIBUTE((__format__(__printf__, 2, 0)))
XSUM_vfprintf(FILE * stream,const char * format,va_list ap)269 XSUM_API int XSUM_vfprintf(FILE *stream, const char *format, va_list ap)
270 {
271 int result;
272 char* u8_str = NULL;
273
274 /*
275 * Generate the UTF-8 output string with vasprintf.
276 */
277 result = XSUM_vasprintf(&u8_str, format, ap);
278
279 if (result >= 0) {
280 const size_t nchar = (size_t)result + 1;
281
282 /*
283 * Check if we are outputting to a console. Don't use XSUM_isConsole
284 * directly -- we don't need to call _get_osfhandle twice.
285 */
286 int fileNb = _fileno(stream);
287 intptr_t handle_raw = _get_osfhandle(fileNb);
288 HANDLE handle = (HANDLE)handle_raw;
289 DWORD dwTemp;
290
291 if (handle_raw < 0) {
292 result = -1;
293 } else if (_isatty(fileNb) && GetConsoleMode(handle, &dwTemp)) {
294 /*
295 * Convert to UTF-16 and output with WriteConsoleW.
296 *
297 * This is codepage independent and works on Windows XP's default
298 * msvcrt.dll.
299 */
300 int len;
301 wchar_t* const u16_buf = XSUM_widenString(u8_str, &len);
302 if (u16_buf == NULL) {
303 result = -1;
304 } else {
305 if (WriteConsoleW(handle, u16_buf, (DWORD)len - 1, &dwTemp, NULL)) {
306 result = (int)dwTemp;
307 } else {
308 result = -1;
309 }
310 free(u16_buf);
311 }
312 } else {
313 /* fwrite the UTF-8 string if we are printing to a file */
314 result = (int)fwrite(u8_str, 1, nchar - 1, stream);
315 if (result == 0) {
316 result = -1;
317 }
318 }
319 free(u8_str);
320 }
321 return result;
322 }
323
324 #ifndef XSUM_NO_MAIN
325 /*****************************************************************************
326 * Command Line argument parsing
327 *****************************************************************************/
328
329 /* Converts a UTF-16 argv to UTF-8. */
XSUM_convertArgv(int argc,wchar_t * utf16_argv[])330 static char** XSUM_convertArgv(int argc, wchar_t* utf16_argv[])
331 {
332 char** const utf8_argv = (char**)malloc((size_t)(argc + 1) * sizeof(char*));
333 if (utf8_argv != NULL) {
334 int i;
335 for (i = 0; i < argc; i++) {
336 utf8_argv[i] = XSUM_narrowString(utf16_argv[i], NULL);
337 if (utf8_argv[i] == NULL) {
338 /* Out of memory, whoops. */
339 while (i-- > 0) {
340 free(utf8_argv[i]);
341 }
342 free(utf8_argv);
343 return NULL;
344 }
345 }
346 utf8_argv[argc] = NULL;
347 }
348 return utf8_argv;
349 }
350 /* Frees arguments returned by XSUM_convertArgv */
XSUM_freeArgv(int argc,char ** argv)351 static void XSUM_freeArgv(int argc, char** argv)
352 {
353 int i;
354 if (argv == NULL) {
355 return;
356 }
357 for (i = 0; i < argc; i++) {
358 free(argv[i]);
359 }
360 free(argv);
361 }
362
XSUM_wmain(int argc,wchar_t * utf16_argv[])363 static int XSUM_wmain(int argc, wchar_t* utf16_argv[])
364 {
365 /* Convert the UTF-16 arguments to UTF-8. */
366 char** utf8_argv = XSUM_convertArgv(argc, utf16_argv);
367
368 if (utf8_argv == NULL) {
369 /* An unfortunate but incredibly unlikely error. */
370 fprintf(stderr, "xxhsum: error converting command line arguments!\n");
371 abort();
372 } else {
373 int ret;
374
375 /*
376 * MinGW's terminal uses full block buffering for stderr.
377 *
378 * This is nonstandard behavior and causes text to not display until
379 * the buffer fills.
380 *
381 * `setvbuf()` can easily correct this to make text display instantly.
382 */
383 setvbuf(stderr, NULL, _IONBF, 0);
384
385 /* Call our real main function */
386 ret = XSUM_main(argc, utf8_argv);
387
388 /* Cleanup */
389 XSUM_freeArgv(argc, utf8_argv);
390 return ret;
391 }
392 }
393
394 #if XSUM_WIN32_USE_WMAIN
395
396 /*
397 * The preferred method of obtaining the real UTF-16 arguments. Always works
398 * on MSVC, sometimes works on MinGW-w64 depending on the compiler flags.
399 */
400 #ifdef __cplusplus
401 extern "C"
402 #endif
wmain(int argc,wchar_t * utf16_argv[])403 int __cdecl wmain(int argc, wchar_t* utf16_argv[])
404 {
405 return XSUM_wmain(argc, utf16_argv);
406 }
407 #else /* !XSUM_WIN32_USE_WMAIN */
408
409 /*
410 * Wrap `XSUM_wmain()` using `main()` and `__wgetmainargs()` on MinGW without
411 * Unicode support.
412 *
413 * `__wgetmainargs()` is used in the CRT startup to retrieve the arguments for
414 * `wmain()`, so we use it on MinGW to emulate `wmain()`.
415 *
416 * It is an internal function and not declared in any public headers, so we
417 * have to declare it manually.
418 *
419 * An alternative that doesn't mess with internal APIs is `GetCommandLineW()`
420 * with `CommandLineToArgvW()`, but the former doesn't expand wildcards and the
421 * latter requires linking to Shell32.dll and its numerous dependencies.
422 *
423 * This method keeps our dependencies to kernel32.dll and the CRT.
424 *
425 * https://docs.microsoft.com/en-us/cpp/c-runtime-library/getmainargs-wgetmainargs?view=vs-2019
426 */
427 typedef struct {
428 int newmode;
429 } _startupinfo;
430
431 #ifdef __cplusplus
432 extern "C"
433 #endif
434 int __cdecl __wgetmainargs(
435 int* Argc,
436 wchar_t*** Argv,
437 wchar_t*** Env,
438 int DoWildCard,
439 _startupinfo* StartInfo
440 );
441
main(int ansi_argc,char * ansi_argv[])442 int main(int ansi_argc, char* ansi_argv[])
443 {
444 int utf16_argc;
445 wchar_t** utf16_argv;
446 wchar_t** utf16_envp; /* Unused but required */
447 _startupinfo startinfo = {0}; /* 0 == don't change new mode */
448
449 /* Get wmain's UTF-16 arguments. Make sure we expand wildcards. */
450 if (__wgetmainargs(&utf16_argc, &utf16_argv, &utf16_envp, 1, &startinfo) < 0)
451 /* In the very unlikely case of an error, use the ANSI arguments. */
452 return XSUM_main(ansi_argc, ansi_argv);
453
454 /* Call XSUM_wmain with our UTF-16 arguments */
455 return XSUM_wmain(utf16_argc, utf16_argv);
456 }
457
458 #endif /* !XSUM_WIN32_USE_WMAIN */
459 #endif /* !XSUM_NO_MAIN */
460 #endif /* XSUM_WIN32_USE_WCHAR */
461
462
463 /*
464 * Determines whether the file at filename is a directory.
465 */
XSUM_isDirectory(const char * filename)466 XSUM_API int XSUM_isDirectory(const char* filename)
467 {
468 XSUM_stat_t statbuf;
469 int r = XSUM_stat(filename, &statbuf);
470 #ifdef _MSC_VER
471 if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
472 #else
473 if (!r && S_ISDIR(statbuf.st_mode)) return 1;
474 #endif
475 return 0;
476 }
477
478 /*
479 * Returns the filesize of the file at filename.
480 */
XSUM_getFileSize(const char * filename)481 XSUM_API XSUM_U64 XSUM_getFileSize(const char* filename)
482 {
483 XSUM_stat_t statbuf;
484 int r = XSUM_stat(filename, &statbuf);
485 if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */
486 return (XSUM_U64)statbuf.st_size;
487 }
488