1 /*
2 LZ4io.c - LZ4 File/Stream Interface
3 Copyright (C) Yann Collet 2011-2020
4
5 GPL v2 License
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License along
18 with this program; if not, write to the Free Software Foundation, Inc.,
19 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20
21 You can contact the author at :
22 - LZ4 source repository : https://github.com/lz4/lz4
23 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
24 */
25 /*
26 Note : this is stand-alone program.
27 It is not part of LZ4 compression library, it is a user code of the LZ4 library.
28 - The license of LZ4 library is BSD.
29 - The license of xxHash library is BSD.
30 - The license of this source file is GPLv2.
31 */
32
33
34 /*-************************************
35 * Compiler options
36 **************************************/
37 #ifdef _MSC_VER /* Visual Studio */
38 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
39 #endif
40 #if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
41 # define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */
42 #endif
43
44
45 /*****************************
46 * Includes
47 *****************************/
48 #include "platform.h" /* Large File Support, SET_BINARY_MODE, SET_SPARSE_FILE_MODE, PLATFORM_POSIX_VERSION, __64BIT__ */
49 #include "util.h" /* UTIL_getFileStat, UTIL_setFileStat */
50 #include <stdio.h> /* fprintf, fopen, fread, stdin, stdout, fflush, getchar */
51 #include <stdlib.h> /* malloc, free */
52 #include <string.h> /* strerror, strcmp, strlen */
53 #include <time.h> /* clock */
54 #include <sys/types.h> /* stat64 */
55 #include <sys/stat.h> /* stat64 */
56 #include "lz4.h" /* still required for legacy format */
57 #include "lz4hc.h" /* still required for legacy format */
58 #define LZ4F_STATIC_LINKING_ONLY
59 #include "lz4frame.h"
60 #include "lz4io.h"
61
62
63 /*****************************
64 * Constants
65 *****************************/
66 #define KB *(1 <<10)
67 #define MB *(1 <<20)
68 #define GB *(1U<<30)
69
70 #define _1BIT 0x01
71 #define _2BITS 0x03
72 #define _3BITS 0x07
73 #define _4BITS 0x0F
74 #define _8BITS 0xFF
75
76 #define MAGICNUMBER_SIZE 4
77 #define LZ4IO_MAGICNUMBER 0x184D2204
78 #define LZ4IO_SKIPPABLE0 0x184D2A50
79 #define LZ4IO_SKIPPABLEMASK 0xFFFFFFF0
80 #define LEGACY_MAGICNUMBER 0x184C2102
81
82 #define CACHELINE 64
83 #define LEGACY_BLOCKSIZE (8 MB)
84 #define MIN_STREAM_BUFSIZE (192 KB)
85 #define LZ4IO_BLOCKSIZEID_DEFAULT 7
86 #define LZ4_MAX_DICT_SIZE (64 KB)
87
88
89 /**************************************
90 * Macros
91 **************************************/
92 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
93 #define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__)
94 #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
95 static int g_displayLevel = 0; /* 0 : no display ; 1: errors ; 2 : + result + interaction + warnings ; 3 : + progression; 4 : + information */
96
97 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
98 if ( ((clock() - g_time) > refreshRate) \
99 || (g_displayLevel>=4) ) { \
100 g_time = clock(); \
101 DISPLAY(__VA_ARGS__); \
102 if (g_displayLevel>=4) fflush(stderr); \
103 } }
104 static const clock_t refreshRate = CLOCKS_PER_SEC / 6;
105 static clock_t g_time = 0;
106
107 #define LZ4IO_STATIC_ASSERT(c) { enum { LZ4IO_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */
108
109
110 /**************************************
111 * Exceptions
112 ***************************************/
113 #ifndef DEBUG
114 # define DEBUG 0
115 #endif
116 #define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
117 #define END_PROCESS(error, ...) \
118 { \
119 DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
120 DISPLAYLEVEL(1, "Error %i : ", error); \
121 DISPLAYLEVEL(1, __VA_ARGS__); \
122 DISPLAYLEVEL(1, " \n"); \
123 exit(error); \
124 }
125
126
127 /* ************************************************** */
128 /* ****************** Parameters ******************** */
129 /* ************************************************** */
130
131 struct LZ4IO_prefs_s {
132 int passThrough;
133 int overwrite;
134 int testMode;
135 int blockSizeId;
136 size_t blockSize;
137 int blockChecksum;
138 int streamChecksum;
139 int blockIndependence;
140 int sparseFileSupport;
141 int contentSizeFlag;
142 int useDictionary;
143 unsigned favorDecSpeed;
144 const char* dictionaryFilename;
145 int removeSrcFile;
146 };
147
LZ4IO_defaultPreferences(void)148 LZ4IO_prefs_t* LZ4IO_defaultPreferences(void)
149 {
150 LZ4IO_prefs_t* const ret = (LZ4IO_prefs_t*)malloc(sizeof(*ret));
151 if (!ret) END_PROCESS(21, "Allocation error : not enough memory");
152 ret->passThrough = 0;
153 ret->overwrite = 1;
154 ret->testMode = 0;
155 ret->blockSizeId = LZ4IO_BLOCKSIZEID_DEFAULT;
156 ret->blockSize = 0;
157 ret->blockChecksum = 0;
158 ret->streamChecksum = 1;
159 ret->blockIndependence = 1;
160 ret->sparseFileSupport = 1;
161 ret->contentSizeFlag = 0;
162 ret->useDictionary = 0;
163 ret->favorDecSpeed = 0;
164 ret->dictionaryFilename = NULL;
165 ret->removeSrcFile = 0;
166 return ret;
167 }
168
LZ4IO_freePreferences(LZ4IO_prefs_t * prefs)169 void LZ4IO_freePreferences(LZ4IO_prefs_t* prefs)
170 {
171 free(prefs);
172 }
173
174
LZ4IO_setDictionaryFilename(LZ4IO_prefs_t * const prefs,const char * dictionaryFilename)175 int LZ4IO_setDictionaryFilename(LZ4IO_prefs_t* const prefs, const char* dictionaryFilename)
176 {
177 prefs->dictionaryFilename = dictionaryFilename;
178 prefs->useDictionary = dictionaryFilename != NULL;
179 return prefs->useDictionary;
180 }
181
182 /* Default setting : passThrough = 0; return : passThrough mode (0/1) */
LZ4IO_setPassThrough(LZ4IO_prefs_t * const prefs,int yes)183 int LZ4IO_setPassThrough(LZ4IO_prefs_t* const prefs, int yes)
184 {
185 prefs->passThrough = (yes!=0);
186 return prefs->passThrough;
187 }
188
189
190 /* Default setting : overwrite = 1; return : overwrite mode (0/1) */
LZ4IO_setOverwrite(LZ4IO_prefs_t * const prefs,int yes)191 int LZ4IO_setOverwrite(LZ4IO_prefs_t* const prefs, int yes)
192 {
193 prefs->overwrite = (yes!=0);
194 return prefs->overwrite;
195 }
196
197 /* Default setting : testMode = 0; return : testMode (0/1) */
LZ4IO_setTestMode(LZ4IO_prefs_t * const prefs,int yes)198 int LZ4IO_setTestMode(LZ4IO_prefs_t* const prefs, int yes)
199 {
200 prefs->testMode = (yes!=0);
201 return prefs->testMode;
202 }
203
204 /* blockSizeID : valid values : 4-5-6-7 */
LZ4IO_setBlockSizeID(LZ4IO_prefs_t * const prefs,unsigned bsid)205 size_t LZ4IO_setBlockSizeID(LZ4IO_prefs_t* const prefs, unsigned bsid)
206 {
207 static const size_t blockSizeTable[] = { 64 KB, 256 KB, 1 MB, 4 MB };
208 static const unsigned minBlockSizeID = 4;
209 static const unsigned maxBlockSizeID = 7;
210 if ((bsid < minBlockSizeID) || (bsid > maxBlockSizeID)) return 0;
211 prefs->blockSizeId = (int)bsid;
212 prefs->blockSize = blockSizeTable[(unsigned)prefs->blockSizeId-minBlockSizeID];
213 return prefs->blockSize;
214 }
215
LZ4IO_setBlockSize(LZ4IO_prefs_t * const prefs,size_t blockSize)216 size_t LZ4IO_setBlockSize(LZ4IO_prefs_t* const prefs, size_t blockSize)
217 {
218 static const size_t minBlockSize = 32;
219 static const size_t maxBlockSize = 4 MB;
220 unsigned bsid = 0;
221 if (blockSize < minBlockSize) blockSize = minBlockSize;
222 if (blockSize > maxBlockSize) blockSize = maxBlockSize;
223 prefs->blockSize = blockSize;
224 blockSize--;
225 /* find which of { 64k, 256k, 1MB, 4MB } is closest to blockSize */
226 while (blockSize >>= 2)
227 bsid++;
228 if (bsid < 7) bsid = 7;
229 prefs->blockSizeId = (int)(bsid-3);
230 return prefs->blockSize;
231 }
232
233 /* Default setting : 1 == independent blocks */
LZ4IO_setBlockMode(LZ4IO_prefs_t * const prefs,LZ4IO_blockMode_t blockMode)234 int LZ4IO_setBlockMode(LZ4IO_prefs_t* const prefs, LZ4IO_blockMode_t blockMode)
235 {
236 prefs->blockIndependence = (blockMode == LZ4IO_blockIndependent);
237 return prefs->blockIndependence;
238 }
239
240 /* Default setting : 0 == no block checksum */
LZ4IO_setBlockChecksumMode(LZ4IO_prefs_t * const prefs,int enable)241 int LZ4IO_setBlockChecksumMode(LZ4IO_prefs_t* const prefs, int enable)
242 {
243 prefs->blockChecksum = (enable != 0);
244 return prefs->blockChecksum;
245 }
246
247 /* Default setting : 1 == checksum enabled */
LZ4IO_setStreamChecksumMode(LZ4IO_prefs_t * const prefs,int enable)248 int LZ4IO_setStreamChecksumMode(LZ4IO_prefs_t* const prefs, int enable)
249 {
250 prefs->streamChecksum = (enable != 0);
251 return prefs->streamChecksum;
252 }
253
254 /* Default setting : 0 (no notification) */
LZ4IO_setNotificationLevel(int level)255 int LZ4IO_setNotificationLevel(int level)
256 {
257 g_displayLevel = level;
258 return g_displayLevel;
259 }
260
261 /* Default setting : 1 (auto: enabled on file, disabled on stdout) */
LZ4IO_setSparseFile(LZ4IO_prefs_t * const prefs,int enable)262 int LZ4IO_setSparseFile(LZ4IO_prefs_t* const prefs, int enable)
263 {
264 prefs->sparseFileSupport = 2*(enable!=0); /* 2==force enable */
265 return prefs->sparseFileSupport;
266 }
267
268 /* Default setting : 0 (disabled) */
LZ4IO_setContentSize(LZ4IO_prefs_t * const prefs,int enable)269 int LZ4IO_setContentSize(LZ4IO_prefs_t* const prefs, int enable)
270 {
271 prefs->contentSizeFlag = (enable!=0);
272 return prefs->contentSizeFlag;
273 }
274
275 /* Default setting : 0 (disabled) */
LZ4IO_favorDecSpeed(LZ4IO_prefs_t * const prefs,int favor)276 void LZ4IO_favorDecSpeed(LZ4IO_prefs_t* const prefs, int favor)
277 {
278 prefs->favorDecSpeed = (favor!=0);
279 }
280
LZ4IO_setRemoveSrcFile(LZ4IO_prefs_t * const prefs,unsigned flag)281 void LZ4IO_setRemoveSrcFile(LZ4IO_prefs_t* const prefs, unsigned flag)
282 {
283 prefs->removeSrcFile = (flag>0);
284 }
285
286
287
288 /* ************************************************************************ **
289 ** ********************** String functions ********************* **
290 ** ************************************************************************ */
291
LZ4IO_isDevNull(const char * s)292 static int LZ4IO_isDevNull(const char* s)
293 {
294 return UTIL_sameString(s, nulmark);
295 }
296
LZ4IO_isStdin(const char * s)297 static int LZ4IO_isStdin(const char* s)
298 {
299 return UTIL_sameString(s, stdinmark);
300 }
301
LZ4IO_isStdout(const char * s)302 static int LZ4IO_isStdout(const char* s)
303 {
304 return UTIL_sameString(s, stdoutmark);
305 }
306
307
308 /* ************************************************************************ **
309 ** ********************** LZ4 File / Pipe compression ********************* **
310 ** ************************************************************************ */
311
LZ4IO_isSkippableMagicNumber(unsigned int magic)312 static int LZ4IO_isSkippableMagicNumber(unsigned int magic) {
313 return (magic & LZ4IO_SKIPPABLEMASK) == LZ4IO_SKIPPABLE0;
314 }
315
316
317 /** LZ4IO_openSrcFile() :
318 * condition : `srcFileName` must be non-NULL.
319 * @result : FILE* to `dstFileName`, or NULL if it fails */
LZ4IO_openSrcFile(const char * srcFileName)320 static FILE* LZ4IO_openSrcFile(const char* srcFileName)
321 {
322 FILE* f;
323
324 if (LZ4IO_isStdin(srcFileName)) {
325 DISPLAYLEVEL(4,"Using stdin for input \n");
326 f = stdin;
327 SET_BINARY_MODE(stdin);
328 } else {
329 f = fopen(srcFileName, "rb");
330 if (f==NULL) DISPLAYLEVEL(1, "%s: %s \n", srcFileName, strerror(errno));
331 }
332
333 return f;
334 }
335
336 /** FIO_openDstFile() :
337 * prefs is writable, because sparseFileSupport might be updated.
338 * condition : `dstFileName` must be non-NULL.
339 * @result : FILE* to `dstFileName`, or NULL if it fails */
LZ4IO_openDstFile(const char * dstFileName,const LZ4IO_prefs_t * const prefs)340 static FILE* LZ4IO_openDstFile(const char* dstFileName, const LZ4IO_prefs_t* const prefs)
341 {
342 FILE* f;
343 assert(dstFileName != NULL);
344
345 if (LZ4IO_isStdout(dstFileName)) {
346 DISPLAYLEVEL(4, "Using stdout for output \n");
347 f = stdout;
348 SET_BINARY_MODE(stdout);
349 if (prefs->sparseFileSupport==1) {
350 DISPLAYLEVEL(4, "Sparse File Support automatically disabled on stdout ;"
351 " to force-enable it, add --sparse command \n");
352 }
353 } else {
354 if (!prefs->overwrite && !LZ4IO_isDevNull(dstFileName)) {
355 /* Check if destination file already exists */
356 FILE* const testf = fopen( dstFileName, "rb" );
357 if (testf != NULL) { /* dest exists, prompt for overwrite authorization */
358 fclose(testf);
359 if (g_displayLevel <= 1) { /* No interaction possible */
360 DISPLAY("%s already exists; not overwritten \n", dstFileName);
361 return NULL;
362 }
363 DISPLAY("%s already exists; do you want to overwrite (y/N) ? ", dstFileName);
364 { int ch = getchar();
365 if ((ch!='Y') && (ch!='y')) {
366 DISPLAY(" not overwritten \n");
367 return NULL;
368 }
369 while ((ch!=EOF) && (ch!='\n')) ch = getchar(); /* flush rest of input line */
370 } } }
371 f = fopen( dstFileName, "wb" );
372 if (f==NULL) DISPLAYLEVEL(1, "%s: %s\n", dstFileName, strerror(errno));
373 }
374
375 /* sparse file */
376 { int const sparseMode = (prefs->sparseFileSupport - (f==stdout)) > 0;
377 if (f && sparseMode) { SET_SPARSE_FILE_MODE(f); }
378 }
379
380 return f;
381 }
382
383
384
385 /***************************************
386 * Legacy Compression
387 ***************************************/
388
389 /* Size in bytes of a legacy block header in little-endian format */
390 #define LZ4IO_LEGACY_BLOCK_HEADER_SIZE 4
391 #define LZ4IO_LEGACY_BLOCK_SIZE_MAX (8 MB)
392
393 /* unoptimized version; solves endianness & alignment issues */
LZ4IO_writeLE32(void * p,unsigned value32)394 static void LZ4IO_writeLE32 (void* p, unsigned value32)
395 {
396 unsigned char* const dstPtr = (unsigned char*)p;
397 dstPtr[0] = (unsigned char)value32;
398 dstPtr[1] = (unsigned char)(value32 >> 8);
399 dstPtr[2] = (unsigned char)(value32 >> 16);
400 dstPtr[3] = (unsigned char)(value32 >> 24);
401 }
402
LZ4IO_LZ4_compress(const char * src,char * dst,int srcSize,int dstSize,int cLevel)403 static int LZ4IO_LZ4_compress(const char* src, char* dst, int srcSize, int dstSize, int cLevel)
404 {
405 (void)cLevel;
406 return LZ4_compress_fast(src, dst, srcSize, dstSize, 1);
407 }
408
409 /* LZ4IO_compressFilename_Legacy :
410 * This function is intentionally "hidden" (not published in .h)
411 * It generates compressed streams using the old 'legacy' format */
LZ4IO_compressFilename_Legacy(const char * input_filename,const char * output_filename,int compressionlevel,const LZ4IO_prefs_t * prefs)412 int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output_filename,
413 int compressionlevel, const LZ4IO_prefs_t* prefs)
414 {
415 typedef int (*compress_f)(const char* src, char* dst, int srcSize, int dstSize, int cLevel);
416 compress_f const compressionFunction = (compressionlevel < 3) ? LZ4IO_LZ4_compress : LZ4_compress_HC;
417 unsigned long long filesize = 0;
418 unsigned long long compressedfilesize = MAGICNUMBER_SIZE;
419 char* in_buff;
420 char* out_buff;
421 const int outBuffSize = LZ4_compressBound(LEGACY_BLOCKSIZE);
422 FILE* const finput = LZ4IO_openSrcFile(input_filename);
423 FILE* foutput;
424 clock_t clockEnd;
425
426 /* Init */
427 clock_t const clockStart = clock();
428 if (finput == NULL)
429 END_PROCESS(20, "%s : open file error ", input_filename);
430
431 foutput = LZ4IO_openDstFile(output_filename, prefs);
432 if (foutput == NULL) {
433 fclose(finput);
434 END_PROCESS(20, "%s : open file error ", input_filename);
435 }
436
437 /* Allocate Memory */
438 in_buff = (char*)malloc(LEGACY_BLOCKSIZE);
439 out_buff = (char*)malloc((size_t)outBuffSize + 4);
440 if (!in_buff || !out_buff)
441 END_PROCESS(21, "Allocation error : not enough memory");
442
443 /* Write Archive Header */
444 LZ4IO_writeLE32(out_buff, LEGACY_MAGICNUMBER);
445 if (fwrite(out_buff, 1, MAGICNUMBER_SIZE, foutput) != MAGICNUMBER_SIZE)
446 END_PROCESS(22, "Write error : cannot write header");
447
448 /* Main Loop */
449 while (1) {
450 int outSize;
451 /* Read Block */
452 size_t const inSize = fread(in_buff, (size_t)1, (size_t)LEGACY_BLOCKSIZE, finput);
453 if (inSize == 0) break;
454 assert(inSize <= LEGACY_BLOCKSIZE);
455 filesize += inSize;
456
457 /* Compress Block */
458 outSize = compressionFunction(in_buff, out_buff+4, (int)inSize, outBuffSize, compressionlevel);
459 assert(outSize >= 0);
460 compressedfilesize += (unsigned long long)outSize+4;
461 DISPLAYUPDATE(2, "\rRead : %i MiB ==> %.2f%% ",
462 (int)(filesize>>20), (double)compressedfilesize/filesize*100);
463
464 /* Write Block */
465 assert(outSize > 0);
466 assert(outSize < outBuffSize);
467 LZ4IO_writeLE32(out_buff, (unsigned)outSize);
468 if (fwrite(out_buff, 1, (size_t)outSize+4, foutput) != (size_t)(outSize+4)) {
469 END_PROCESS(24, "Write error : cannot write compressed block");
470 } }
471 if (ferror(finput)) END_PROCESS(24, "Error while reading %s ", input_filename);
472
473 /* Status */
474 clockEnd = clock();
475 clockEnd += (clockEnd==clockStart); /* avoid division by zero (speed) */
476 filesize += !filesize; /* avoid division by zero (ratio) */
477 DISPLAYLEVEL(2, "\r%79s\r", ""); /* blank line */
478 DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
479 filesize, compressedfilesize, (double)compressedfilesize / filesize * 100);
480 { double const seconds = (double)(clockEnd - clockStart) / CLOCKS_PER_SEC;
481 DISPLAYLEVEL(4,"Done in %.2f s ==> %.2f MiB/s\n", seconds,
482 (double)filesize / seconds / 1024 / 1024);
483 }
484
485 /* Close & Free */
486 free(in_buff);
487 free(out_buff);
488 fclose(finput);
489 if (!LZ4IO_isStdout(output_filename)) fclose(foutput); /* do not close stdout */
490
491 return 0;
492 }
493
494 #define FNSPACE 30
495 /* LZ4IO_compressMultipleFilenames_Legacy :
496 * This function is intentionally "hidden" (not published in .h)
497 * It generates multiple compressed streams using the old 'legacy' format */
LZ4IO_compressMultipleFilenames_Legacy(const char ** inFileNamesTable,int ifntSize,const char * suffix,int compressionLevel,const LZ4IO_prefs_t * prefs)498 int LZ4IO_compressMultipleFilenames_Legacy(
499 const char** inFileNamesTable, int ifntSize,
500 const char* suffix,
501 int compressionLevel, const LZ4IO_prefs_t* prefs)
502 {
503 int i;
504 int missed_files = 0;
505 char* dstFileName = (char*)malloc(FNSPACE);
506 size_t ofnSize = FNSPACE;
507 const size_t suffixSize = strlen(suffix);
508
509 if (dstFileName == NULL) return ifntSize; /* not enough memory */
510
511 /* loop on each file */
512 for (i=0; i<ifntSize; i++) {
513 size_t const ifnSize = strlen(inFileNamesTable[i]);
514 if (LZ4IO_isStdout(suffix)) {
515 missed_files += LZ4IO_compressFilename_Legacy(
516 inFileNamesTable[i], stdoutmark,
517 compressionLevel, prefs);
518 continue;
519 }
520
521 if (ofnSize <= ifnSize+suffixSize+1) {
522 free(dstFileName);
523 ofnSize = ifnSize + 20;
524 dstFileName = (char*)malloc(ofnSize);
525 if (dstFileName==NULL) {
526 return ifntSize;
527 } }
528 strcpy(dstFileName, inFileNamesTable[i]);
529 strcat(dstFileName, suffix);
530
531 missed_files += LZ4IO_compressFilename_Legacy(
532 inFileNamesTable[i], dstFileName,
533 compressionLevel, prefs);
534 }
535
536 /* Close & Free */
537 free(dstFileName);
538
539 return missed_files;
540 }
541
542
543 /*********************************************
544 * Compression using Frame format
545 *********************************************/
546 typedef struct {
547 void* srcBuffer;
548 size_t srcBufferSize;
549 void* dstBuffer;
550 size_t dstBufferSize;
551 LZ4F_compressionContext_t ctx;
552 LZ4F_CDict* cdict;
553 } cRess_t;
554
LZ4IO_createDict(size_t * dictSize,const char * const dictFilename)555 static void* LZ4IO_createDict(size_t* dictSize, const char* const dictFilename)
556 {
557 size_t readSize;
558 size_t dictEnd = 0;
559 size_t dictLen = 0;
560 size_t dictStart;
561 size_t circularBufSize = LZ4_MAX_DICT_SIZE;
562 char* circularBuf = (char*)malloc(circularBufSize);
563 char* dictBuf;
564 FILE* dictFile;
565
566 if (!circularBuf) END_PROCESS(25, "Allocation error : not enough memory for circular buffer");
567 if (!dictFilename) END_PROCESS(26, "Dictionary error : no filename provided");
568
569 dictFile = LZ4IO_openSrcFile(dictFilename);
570 if (!dictFile) END_PROCESS(27, "Dictionary error : could not open dictionary file");
571
572 /* opportunistically seek to the part of the file we care about.
573 * If this fails it's not a problem since we'll just read everything anyways. */
574 if (!LZ4IO_isStdin(dictFilename)) {
575 (void)UTIL_fseek(dictFile, -LZ4_MAX_DICT_SIZE, SEEK_END);
576 }
577
578 do {
579 readSize = fread(circularBuf + dictEnd, 1, circularBufSize - dictEnd, dictFile);
580 dictEnd = (dictEnd + readSize) % circularBufSize;
581 dictLen += readSize;
582 } while (readSize>0);
583
584 if (dictLen > LZ4_MAX_DICT_SIZE) {
585 dictLen = LZ4_MAX_DICT_SIZE;
586 }
587
588 *dictSize = dictLen;
589
590 dictStart = (circularBufSize + dictEnd - dictLen) % circularBufSize;
591
592 if (dictStart == 0) {
593 /* We're in the simple case where the dict starts at the beginning of our circular buffer. */
594 dictBuf = circularBuf;
595 circularBuf = NULL;
596 } else {
597 /* Otherwise, we will alloc a new buffer and copy our dict into that. */
598 dictBuf = (char *)malloc(dictLen ? dictLen : 1);
599 if (!dictBuf) END_PROCESS(28, "Allocation error : not enough memory");
600
601 memcpy(dictBuf, circularBuf + dictStart, circularBufSize - dictStart);
602 memcpy(dictBuf + circularBufSize - dictStart, circularBuf, dictLen - (circularBufSize - dictStart));
603 }
604
605 fclose(dictFile);
606 free(circularBuf);
607
608 return dictBuf;
609 }
610
LZ4IO_createCDict(const LZ4IO_prefs_t * const prefs)611 static LZ4F_CDict* LZ4IO_createCDict(const LZ4IO_prefs_t* const prefs)
612 {
613 size_t dictionarySize;
614 void* dictionaryBuffer;
615 LZ4F_CDict* cdict;
616 if (!prefs->useDictionary) return NULL;
617 dictionaryBuffer = LZ4IO_createDict(&dictionarySize, prefs->dictionaryFilename);
618 if (!dictionaryBuffer) END_PROCESS(29, "Dictionary error : could not create dictionary");
619 cdict = LZ4F_createCDict(dictionaryBuffer, dictionarySize);
620 free(dictionaryBuffer);
621 return cdict;
622 }
623
LZ4IO_createCResources(const LZ4IO_prefs_t * const prefs)624 static cRess_t LZ4IO_createCResources(const LZ4IO_prefs_t* const prefs)
625 {
626 const size_t blockSize = prefs->blockSize;
627 cRess_t ress;
628
629 LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&(ress.ctx), LZ4F_VERSION);
630 if (LZ4F_isError(errorCode)) END_PROCESS(30, "Allocation error : can't create LZ4F context : %s", LZ4F_getErrorName(errorCode));
631
632 /* Allocate Memory */
633 ress.srcBuffer = malloc(blockSize);
634 ress.srcBufferSize = blockSize;
635 ress.dstBufferSize = LZ4F_compressFrameBound(blockSize, NULL); /* cover worst case */
636 ress.dstBuffer = malloc(ress.dstBufferSize);
637 if (!ress.srcBuffer || !ress.dstBuffer) END_PROCESS(31, "Allocation error : not enough memory");
638
639 ress.cdict = LZ4IO_createCDict(prefs);
640
641 return ress;
642 }
643
LZ4IO_freeCResources(cRess_t ress)644 static void LZ4IO_freeCResources(cRess_t ress)
645 {
646 free(ress.srcBuffer);
647 free(ress.dstBuffer);
648
649 LZ4F_freeCDict(ress.cdict);
650 ress.cdict = NULL;
651
652 { LZ4F_errorCode_t const errorCode = LZ4F_freeCompressionContext(ress.ctx);
653 if (LZ4F_isError(errorCode)) END_PROCESS(35, "Error : can't free LZ4F context resource : %s", LZ4F_getErrorName(errorCode)); }
654 }
655
656 /*
657 * LZ4IO_compressFilename_extRess()
658 * result : 0 : compression completed correctly
659 * 1 : missing or pb opening srcFileName
660 */
661 static int
LZ4IO_compressFilename_extRess(cRess_t ress,const char * srcFileName,const char * dstFileName,int compressionLevel,const LZ4IO_prefs_t * const io_prefs)662 LZ4IO_compressFilename_extRess(cRess_t ress,
663 const char* srcFileName, const char* dstFileName,
664 int compressionLevel, const LZ4IO_prefs_t* const io_prefs)
665 {
666 unsigned long long filesize = 0;
667 unsigned long long compressedfilesize = 0;
668 FILE* dstFile;
669 void* const srcBuffer = ress.srcBuffer;
670 void* const dstBuffer = ress.dstBuffer;
671 const size_t dstBufferSize = ress.dstBufferSize;
672 const size_t blockSize = io_prefs->blockSize;
673 size_t readSize;
674 LZ4F_compressionContext_t ctx = ress.ctx; /* just a pointer */
675 LZ4F_preferences_t prefs;
676
677 /* Init */
678 FILE* const srcFile = LZ4IO_openSrcFile(srcFileName);
679 if (srcFile == NULL) return 1;
680 dstFile = LZ4IO_openDstFile(dstFileName, io_prefs);
681 if (dstFile == NULL) { fclose(srcFile); return 1; }
682 memset(&prefs, 0, sizeof(prefs));
683
684 /* Set compression parameters */
685 prefs.autoFlush = 1;
686 prefs.compressionLevel = compressionLevel;
687 prefs.frameInfo.blockMode = (LZ4F_blockMode_t)io_prefs->blockIndependence;
688 prefs.frameInfo.blockSizeID = (LZ4F_blockSizeID_t)io_prefs->blockSizeId;
689 prefs.frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)io_prefs->blockChecksum;
690 prefs.frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)io_prefs->streamChecksum;
691 prefs.favorDecSpeed = io_prefs->favorDecSpeed;
692 if (io_prefs->contentSizeFlag) {
693 U64 const fileSize = UTIL_getOpenFileSize(srcFile);
694 prefs.frameInfo.contentSize = fileSize; /* == 0 if input == stdin */
695 if (fileSize==0)
696 DISPLAYLEVEL(3, "Warning : cannot determine input content size \n");
697 }
698
699 /* read first block */
700 readSize = fread(srcBuffer, (size_t)1, blockSize, srcFile);
701 if (ferror(srcFile)) END_PROCESS(40, "Error reading %s ", srcFileName);
702 filesize += readSize;
703
704 /* single-block file */
705 if (readSize < blockSize) {
706 /* Compress in single pass */
707 size_t const cSize = LZ4F_compressFrame_usingCDict(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, ress.cdict, &prefs);
708 if (LZ4F_isError(cSize))
709 END_PROCESS(41, "Compression failed : %s", LZ4F_getErrorName(cSize));
710 compressedfilesize = cSize;
711 DISPLAYUPDATE(2, "\rRead : %u MiB ==> %.2f%% ",
712 (unsigned)(filesize>>20), (double)compressedfilesize/(filesize+!filesize)*100); /* avoid division by zero */
713
714 /* Write Block */
715 if (fwrite(dstBuffer, 1, cSize, dstFile) != cSize) {
716 END_PROCESS(42, "Write error : failed writing single-block compressed frame");
717 } }
718
719 else
720
721 /* multiple-blocks file */
722 {
723 /* Write Frame Header */
724 size_t const headerSize = LZ4F_compressBegin_usingCDict(ctx, dstBuffer, dstBufferSize, ress.cdict, &prefs);
725 if (LZ4F_isError(headerSize)) END_PROCESS(43, "File header generation failed : %s", LZ4F_getErrorName(headerSize));
726 if (fwrite(dstBuffer, 1, headerSize, dstFile) != headerSize)
727 END_PROCESS(44, "Write error : cannot write header");
728 compressedfilesize += headerSize;
729
730 /* Main Loop - one block at a time */
731 while (readSize>0) {
732 size_t const outSize = LZ4F_compressUpdate(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, NULL);
733 if (LZ4F_isError(outSize))
734 END_PROCESS(45, "Compression failed : %s", LZ4F_getErrorName(outSize));
735 compressedfilesize += outSize;
736 DISPLAYUPDATE(2, "\rRead : %u MiB ==> %.2f%% ",
737 (unsigned)(filesize>>20), (double)compressedfilesize/filesize*100);
738
739 /* Write Block */
740 if (fwrite(dstBuffer, 1, outSize, dstFile) != outSize)
741 END_PROCESS(46, "Write error : cannot write compressed block");
742
743 /* Read next block */
744 readSize = fread(srcBuffer, (size_t)1, (size_t)blockSize, srcFile);
745 filesize += readSize;
746 }
747 if (ferror(srcFile)) END_PROCESS(47, "Error reading %s ", srcFileName);
748
749 /* End of Frame mark */
750 { size_t const endSize = LZ4F_compressEnd(ctx, dstBuffer, dstBufferSize, NULL);
751 if (LZ4F_isError(endSize))
752 END_PROCESS(48, "End of frame error : %s", LZ4F_getErrorName(endSize));
753 if (fwrite(dstBuffer, 1, endSize, dstFile) != endSize)
754 END_PROCESS(49, "Write error : cannot write end of frame");
755 compressedfilesize += endSize;
756 } }
757
758 /* Release file handlers */
759 fclose (srcFile);
760 if (!LZ4IO_isStdout(dstFileName)) fclose(dstFile); /* do not close stdout */
761
762 /* Copy owner, file permissions and modification time */
763 { stat_t statbuf;
764 if (!LZ4IO_isStdin(srcFileName)
765 && !LZ4IO_isStdout(dstFileName)
766 && !LZ4IO_isDevNull(dstFileName)
767 && UTIL_getFileStat(srcFileName, &statbuf)) {
768 UTIL_setFileStat(dstFileName, &statbuf);
769 } }
770
771 if (io_prefs->removeSrcFile) { /* remove source file : --rm */
772 if (remove(srcFileName))
773 END_PROCESS(50, "Remove error : %s: %s", srcFileName, strerror(errno));
774 }
775
776 /* Final Status */
777 DISPLAYLEVEL(2, "\r%79s\r", "");
778 DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
779 filesize, compressedfilesize,
780 (double)compressedfilesize / (filesize + !filesize /* avoid division by zero */ ) * 100);
781
782 return 0;
783 }
784
785
LZ4IO_compressFilename(const char * srcFileName,const char * dstFileName,int compressionLevel,const LZ4IO_prefs_t * prefs)786 int LZ4IO_compressFilename(const char* srcFileName, const char* dstFileName, int compressionLevel, const LZ4IO_prefs_t* prefs)
787 {
788 UTIL_time_t const timeStart = UTIL_getTime();
789 clock_t const cpuStart = clock();
790 cRess_t const ress = LZ4IO_createCResources(prefs);
791
792 int const result = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel, prefs);
793
794 /* Free resources */
795 LZ4IO_freeCResources(ress);
796
797 /* Final Status */
798 { clock_t const cpuEnd = clock();
799 double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
800 U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
801 double const timeLength_s = (double)timeLength_ns / 1000000000;
802 DISPLAYLEVEL(4, "Completed in %.2f sec (cpu load : %.0f%%)\n",
803 timeLength_s, (cpuLoad_s / timeLength_s) * 100);
804 }
805
806 return result;
807 }
808
809
LZ4IO_compressMultipleFilenames(const char ** inFileNamesTable,int ifntSize,const char * suffix,int compressionLevel,const LZ4IO_prefs_t * prefs)810 int LZ4IO_compressMultipleFilenames(
811 const char** inFileNamesTable, int ifntSize,
812 const char* suffix,
813 int compressionLevel,
814 const LZ4IO_prefs_t* prefs)
815 {
816 int i;
817 int missed_files = 0;
818 char* dstFileName = (char*)malloc(FNSPACE);
819 size_t ofnSize = FNSPACE;
820 const size_t suffixSize = strlen(suffix);
821 cRess_t ress;
822
823 if (dstFileName == NULL) return ifntSize; /* not enough memory */
824 ress = LZ4IO_createCResources(prefs);
825
826 /* loop on each file */
827 for (i=0; i<ifntSize; i++) {
828 size_t const ifnSize = strlen(inFileNamesTable[i]);
829 if (LZ4IO_isStdout(suffix)) {
830 missed_files += LZ4IO_compressFilename_extRess(ress,
831 inFileNamesTable[i], stdoutmark,
832 compressionLevel, prefs);
833 continue;
834 }
835 /* suffix != stdout => compress into a file => generate its name */
836 if (ofnSize <= ifnSize+suffixSize+1) {
837 free(dstFileName);
838 ofnSize = ifnSize + 20;
839 dstFileName = (char*)malloc(ofnSize);
840 if (dstFileName==NULL) {
841 LZ4IO_freeCResources(ress);
842 return ifntSize;
843 } }
844 strcpy(dstFileName, inFileNamesTable[i]);
845 strcat(dstFileName, suffix);
846
847 missed_files += LZ4IO_compressFilename_extRess(ress,
848 inFileNamesTable[i], dstFileName,
849 compressionLevel, prefs);
850 }
851
852 /* Close & Free */
853 LZ4IO_freeCResources(ress);
854 free(dstFileName);
855
856 return missed_files;
857 }
858
859
860 /* ********************************************************************* */
861 /* ********************** LZ4 file-stream Decompression **************** */
862 /* ********************************************************************* */
863
864 /* It's presumed that s points to a memory space of size >= 4 */
LZ4IO_readLE32(const void * s)865 static unsigned LZ4IO_readLE32 (const void* s)
866 {
867 const unsigned char* const srcPtr = (const unsigned char*)s;
868 unsigned value32 = srcPtr[0];
869 value32 += (unsigned)srcPtr[1] << 8;
870 value32 += (unsigned)srcPtr[2] << 16;
871 value32 += (unsigned)srcPtr[3] << 24;
872 return value32;
873 }
874
875
876 static unsigned
LZ4IO_fwriteSparse(FILE * file,const void * buffer,size_t bufferSize,int sparseFileSupport,unsigned storedSkips)877 LZ4IO_fwriteSparse(FILE* file,
878 const void* buffer, size_t bufferSize,
879 int sparseFileSupport,
880 unsigned storedSkips)
881 {
882 const size_t sizeT = sizeof(size_t);
883 const size_t maskT = sizeT -1 ;
884 const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */
885 const size_t* ptrT = bufferT;
886 size_t bufferSizeT = bufferSize / sizeT;
887 const size_t* const bufferTEnd = bufferT + bufferSizeT;
888 const size_t segmentSizeT = (32 KB) / sizeT;
889 int const sparseMode = (sparseFileSupport - (file==stdout)) > 0;
890
891 if (!sparseMode) { /* normal write */
892 size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
893 if (sizeCheck != bufferSize) END_PROCESS(70, "Write error : cannot write decoded block");
894 return 0;
895 }
896
897 /* avoid int overflow */
898 if (storedSkips > 1 GB) {
899 int const seekResult = UTIL_fseek(file, 1 GB, SEEK_CUR);
900 if (seekResult != 0) END_PROCESS(71, "1 GB skip error (sparse file support)");
901 storedSkips -= 1 GB;
902 }
903
904 while (ptrT < bufferTEnd) {
905 size_t seg0SizeT = segmentSizeT;
906 size_t nb0T;
907
908 /* count leading zeros */
909 if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT;
910 bufferSizeT -= seg0SizeT;
911 for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ;
912 storedSkips += (unsigned)(nb0T * sizeT);
913
914 if (nb0T != seg0SizeT) { /* not all 0s */
915 errno = 0;
916 { int const seekResult = UTIL_fseek(file, storedSkips, SEEK_CUR);
917 if (seekResult) END_PROCESS(72, "Sparse skip error(%d): %s ; try --no-sparse", (int)errno, strerror(errno));
918 }
919 storedSkips = 0;
920 seg0SizeT -= nb0T;
921 ptrT += nb0T;
922 { size_t const sizeCheck = fwrite(ptrT, sizeT, seg0SizeT, file);
923 if (sizeCheck != seg0SizeT) END_PROCESS(73, "Write error : cannot write decoded block");
924 } }
925 ptrT += seg0SizeT;
926 }
927
928 if (bufferSize & maskT) { /* size not multiple of sizeT : implies end of block */
929 const char* const restStart = (const char*)bufferTEnd;
930 const char* restPtr = restStart;
931 size_t const restSize = bufferSize & maskT;
932 const char* const restEnd = restStart + restSize;
933 for (; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ;
934 storedSkips += (unsigned) (restPtr - restStart);
935 if (restPtr != restEnd) {
936 int const seekResult = UTIL_fseek(file, storedSkips, SEEK_CUR);
937 if (seekResult) END_PROCESS(74, "Sparse skip error ; try --no-sparse");
938 storedSkips = 0;
939 { size_t const sizeCheck = fwrite(restPtr, 1, (size_t)(restEnd - restPtr), file);
940 if (sizeCheck != (size_t)(restEnd - restPtr)) END_PROCESS(75, "Write error : cannot write decoded end of block");
941 } }
942 }
943
944 return storedSkips;
945 }
946
LZ4IO_fwriteSparseEnd(FILE * file,unsigned storedSkips)947 static void LZ4IO_fwriteSparseEnd(FILE* file, unsigned storedSkips)
948 {
949 if (storedSkips>0) { /* implies sparseFileSupport>0 */
950 const char lastZeroByte[1] = { 0 };
951 if (UTIL_fseek(file, storedSkips-1, SEEK_CUR) != 0)
952 END_PROCESS(68, "Final skip error (sparse file)\n");
953 if (fwrite(lastZeroByte, 1, 1, file) != 1)
954 END_PROCESS(69, "Write error : cannot write last zero\n");
955 }
956 }
957
958
959 static unsigned g_magicRead = 0; /* out-parameter of LZ4IO_decodeLegacyStream() */
960
961 static unsigned long long
LZ4IO_decodeLegacyStream(FILE * finput,FILE * foutput,const LZ4IO_prefs_t * prefs)962 LZ4IO_decodeLegacyStream(FILE* finput, FILE* foutput, const LZ4IO_prefs_t* prefs)
963 {
964 unsigned long long streamSize = 0;
965 unsigned storedSkips = 0;
966
967 /* Allocate Memory */
968 char* const in_buff = (char*)malloc((size_t)LZ4_compressBound(LEGACY_BLOCKSIZE));
969 char* const out_buff = (char*)malloc(LEGACY_BLOCKSIZE);
970 if (!in_buff || !out_buff) END_PROCESS(51, "Allocation error : not enough memory");
971
972 /* Main Loop */
973 while (1) {
974 unsigned int blockSize;
975
976 /* Block Size */
977 { size_t const sizeCheck = fread(in_buff, 1, LZ4IO_LEGACY_BLOCK_HEADER_SIZE, finput);
978 if (sizeCheck == 0) break; /* Nothing to read : file read is completed */
979 if (sizeCheck != LZ4IO_LEGACY_BLOCK_HEADER_SIZE) END_PROCESS(52, "Read error : cannot access block size ");
980 }
981 blockSize = LZ4IO_readLE32(in_buff); /* Convert to Little Endian */
982 if (blockSize > LZ4_COMPRESSBOUND(LEGACY_BLOCKSIZE)) {
983 /* Cannot read next block : maybe new stream ? */
984 g_magicRead = blockSize;
985 break;
986 }
987
988 /* Read Block */
989 { size_t const sizeCheck = fread(in_buff, 1, blockSize, finput);
990 if (sizeCheck != blockSize) END_PROCESS(53, "Read error : cannot access compressed block !"); }
991
992 /* Decode Block */
993 { int const decodeSize = LZ4_decompress_safe(in_buff, out_buff, (int)blockSize, LEGACY_BLOCKSIZE);
994 if (decodeSize < 0) END_PROCESS(54, "Decoding Failed ! Corrupted input detected !");
995 streamSize += (unsigned long long)decodeSize;
996 /* Write Block */
997 storedSkips = LZ4IO_fwriteSparse(foutput, out_buff, (size_t)decodeSize, prefs->sparseFileSupport, storedSkips); /* success or die */
998 } }
999 if (ferror(finput)) END_PROCESS(55, "Read error : ferror");
1000
1001 LZ4IO_fwriteSparseEnd(foutput, storedSkips);
1002
1003 /* Free */
1004 free(in_buff);
1005 free(out_buff);
1006
1007 return streamSize;
1008 }
1009
1010
1011
1012 typedef struct {
1013 void* srcBuffer;
1014 size_t srcBufferSize;
1015 void* dstBuffer;
1016 size_t dstBufferSize;
1017 FILE* dstFile;
1018 LZ4F_decompressionContext_t dCtx;
1019 void* dictBuffer;
1020 size_t dictBufferSize;
1021 } dRess_t;
1022
LZ4IO_loadDDict(dRess_t * ress,const LZ4IO_prefs_t * const prefs)1023 static void LZ4IO_loadDDict(dRess_t* ress, const LZ4IO_prefs_t* const prefs)
1024 {
1025 if (!prefs->useDictionary) {
1026 ress->dictBuffer = NULL;
1027 ress->dictBufferSize = 0;
1028 return;
1029 }
1030
1031 ress->dictBuffer = LZ4IO_createDict(&ress->dictBufferSize, prefs->dictionaryFilename);
1032 if (!ress->dictBuffer) END_PROCESS(25, "Dictionary error : could not create dictionary");
1033 }
1034
1035 static const size_t LZ4IO_dBufferSize = 64 KB;
LZ4IO_createDResources(const LZ4IO_prefs_t * const prefs)1036 static dRess_t LZ4IO_createDResources(const LZ4IO_prefs_t* const prefs)
1037 {
1038 dRess_t ress;
1039
1040 /* init */
1041 LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&ress.dCtx, LZ4F_VERSION);
1042 if (LZ4F_isError(errorCode)) END_PROCESS(60, "Can't create LZ4F context : %s", LZ4F_getErrorName(errorCode));
1043
1044 /* Allocate Memory */
1045 ress.srcBufferSize = LZ4IO_dBufferSize;
1046 ress.srcBuffer = malloc(ress.srcBufferSize);
1047 ress.dstBufferSize = LZ4IO_dBufferSize;
1048 ress.dstBuffer = malloc(ress.dstBufferSize);
1049 if (!ress.srcBuffer || !ress.dstBuffer) END_PROCESS(61, "Allocation error : not enough memory");
1050
1051 LZ4IO_loadDDict(&ress, prefs);
1052
1053 ress.dstFile = NULL;
1054 return ress;
1055 }
1056
LZ4IO_freeDResources(dRess_t ress)1057 static void LZ4IO_freeDResources(dRess_t ress)
1058 {
1059 LZ4F_errorCode_t errorCode = LZ4F_freeDecompressionContext(ress.dCtx);
1060 if (LZ4F_isError(errorCode)) END_PROCESS(69, "Error : can't free LZ4F context resource : %s", LZ4F_getErrorName(errorCode));
1061 free(ress.srcBuffer);
1062 free(ress.dstBuffer);
1063 free(ress.dictBuffer);
1064 }
1065
1066
1067 static unsigned long long
LZ4IO_decompressLZ4F(dRess_t ress,FILE * const srcFile,FILE * const dstFile,const LZ4IO_prefs_t * const prefs)1068 LZ4IO_decompressLZ4F(dRess_t ress,
1069 FILE* const srcFile, FILE* const dstFile,
1070 const LZ4IO_prefs_t* const prefs)
1071 {
1072 unsigned long long filesize = 0;
1073 LZ4F_errorCode_t nextToLoad;
1074 unsigned storedSkips = 0;
1075 LZ4F_decompressOptions_t const dOpt_skipCrc = { 0, 1, 0, 0 };
1076 const LZ4F_decompressOptions_t* const dOptPtr =
1077 ((prefs->blockChecksum==0) && (prefs->streamChecksum==0)) ?
1078 &dOpt_skipCrc : NULL;
1079
1080 /* Init feed with magic number (already consumed from FILE* sFile) */
1081 { size_t inSize = MAGICNUMBER_SIZE;
1082 size_t outSize= 0;
1083 LZ4IO_writeLE32(ress.srcBuffer, LZ4IO_MAGICNUMBER);
1084 nextToLoad = LZ4F_decompress_usingDict(ress.dCtx,
1085 ress.dstBuffer, &outSize,
1086 ress.srcBuffer, &inSize,
1087 ress.dictBuffer, ress.dictBufferSize,
1088 dOptPtr); /* set it once, it's enough */
1089 if (LZ4F_isError(nextToLoad))
1090 END_PROCESS(62, "Header error : %s", LZ4F_getErrorName(nextToLoad));
1091 }
1092
1093 /* Main Loop */
1094 for (;nextToLoad;) {
1095 size_t readSize;
1096 size_t pos = 0;
1097 size_t decodedBytes = ress.dstBufferSize;
1098
1099 /* Read input */
1100 if (nextToLoad > ress.srcBufferSize) nextToLoad = ress.srcBufferSize;
1101 readSize = fread(ress.srcBuffer, 1, nextToLoad, srcFile);
1102 if (!readSize) break; /* reached end of file or stream */
1103
1104 while ((pos < readSize) || (decodedBytes == ress.dstBufferSize)) { /* still to read, or still to flush */
1105 /* Decode Input (at least partially) */
1106 size_t remaining = readSize - pos;
1107 decodedBytes = ress.dstBufferSize;
1108 nextToLoad = LZ4F_decompress_usingDict(ress.dCtx,
1109 ress.dstBuffer, &decodedBytes,
1110 (char*)(ress.srcBuffer)+pos, &remaining,
1111 ress.dictBuffer, ress.dictBufferSize,
1112 NULL);
1113 if (LZ4F_isError(nextToLoad))
1114 END_PROCESS(66, "Decompression error : %s", LZ4F_getErrorName(nextToLoad));
1115 pos += remaining;
1116
1117 /* Write Block */
1118 if (decodedBytes) {
1119 if (!prefs->testMode)
1120 storedSkips = LZ4IO_fwriteSparse(dstFile, ress.dstBuffer, decodedBytes, prefs->sparseFileSupport, storedSkips);
1121 filesize += decodedBytes;
1122 DISPLAYUPDATE(2, "\rDecompressed : %u MiB ", (unsigned)(filesize>>20));
1123 }
1124
1125 if (!nextToLoad) break;
1126 }
1127 }
1128 /* can be out because readSize == 0, which could be an fread() error */
1129 if (ferror(srcFile)) END_PROCESS(67, "Read error");
1130
1131 if (!prefs->testMode) LZ4IO_fwriteSparseEnd(dstFile, storedSkips);
1132 if (nextToLoad!=0) END_PROCESS(68, "Unfinished stream");
1133
1134 return filesize;
1135 }
1136
1137
1138 /* LZ4IO_passThrough:
1139 * just output the same content as input, no decoding.
1140 * This is a capability of zcat, and by extension lz4cat
1141 * MNstore : contain the first MAGICNUMBER_SIZE bytes already read from finput
1142 */
1143 #define PTSIZE (64 KB)
1144 #define PTSIZET (PTSIZE / sizeof(size_t))
1145 static unsigned long long
LZ4IO_passThrough(FILE * finput,FILE * foutput,unsigned char MNstore[MAGICNUMBER_SIZE],int sparseFileSupport)1146 LZ4IO_passThrough(FILE* finput, FILE* foutput,
1147 unsigned char MNstore[MAGICNUMBER_SIZE],
1148 int sparseFileSupport)
1149 {
1150 size_t buffer[PTSIZET];
1151 size_t readBytes = 1;
1152 unsigned long long total = MAGICNUMBER_SIZE;
1153 unsigned storedSkips = 0;
1154
1155 if (fwrite(MNstore, 1, MAGICNUMBER_SIZE, foutput) != MAGICNUMBER_SIZE) {
1156 END_PROCESS(50, "Pass-through write error");
1157 }
1158 while (readBytes) {
1159 readBytes = fread(buffer, 1, sizeof(buffer), finput);
1160 total += readBytes;
1161 storedSkips = LZ4IO_fwriteSparse(foutput, buffer, readBytes, sparseFileSupport, storedSkips);
1162 }
1163 if (ferror(finput)) END_PROCESS(51, "Read Error");
1164
1165 LZ4IO_fwriteSparseEnd(foutput, storedSkips);
1166 return total;
1167 }
1168
1169 /* when fseek() doesn't work (pipe scenario),
1170 * read and forget from input.
1171 **/
1172 #define SKIP_BUFF_SIZE (16 KB)
1173 #define MIN(a,b) ( ((a)<(b)) ? (a) : (b) )
skipStream(FILE * f,unsigned offset)1174 static int skipStream(FILE* f, unsigned offset)
1175 {
1176 char buf[SKIP_BUFF_SIZE];
1177 while (offset > 0) {
1178 size_t const tr = MIN(offset, sizeof(buf));
1179 size_t const r = fread(buf, 1, tr, f);
1180 if (r != tr) return 1; /* error reading f */
1181 offset -= (unsigned)tr;
1182 }
1183 assert(offset == 0);
1184 return 0;
1185 }
1186
1187 /** Safely handle cases when (unsigned)offset > LONG_MAX */
fseek_u32(FILE * fp,unsigned offset,int where)1188 static int fseek_u32(FILE *fp, unsigned offset, int where)
1189 {
1190 const unsigned stepMax = 1U << 30;
1191 int errorNb = 0;
1192
1193 if (where != SEEK_CUR) return -1; /* Only allows SEEK_CUR */
1194 while (offset > 0) {
1195 unsigned s = offset;
1196 if (s > stepMax) s = stepMax;
1197 errorNb = UTIL_fseek(fp, (long)s, SEEK_CUR);
1198 if (errorNb==0) { offset -= s; continue; }
1199 errorNb = skipStream(fp, offset);
1200 offset = 0;
1201 }
1202 return errorNb;
1203 }
1204
1205
1206 #define ENDOFSTREAM ((unsigned long long)-1)
1207 #define DECODING_ERROR ((unsigned long long)-2)
1208 static unsigned long long
selectDecoder(dRess_t ress,FILE * finput,FILE * foutput,const LZ4IO_prefs_t * const prefs)1209 selectDecoder(dRess_t ress,
1210 FILE* finput, FILE* foutput,
1211 const LZ4IO_prefs_t* const prefs)
1212 {
1213 unsigned char MNstore[MAGICNUMBER_SIZE];
1214 unsigned magicNumber;
1215 static unsigned nbFrames = 0;
1216
1217 /* init */
1218 nbFrames++;
1219
1220 /* Check Archive Header */
1221 if (g_magicRead) { /* magic number already read from finput (see legacy frame)*/
1222 magicNumber = g_magicRead;
1223 g_magicRead = 0;
1224 } else {
1225 size_t const nbReadBytes = fread(MNstore, 1, MAGICNUMBER_SIZE, finput);
1226 if (nbReadBytes==0) { nbFrames = 0; return ENDOFSTREAM; } /* EOF */
1227 if (nbReadBytes != MAGICNUMBER_SIZE)
1228 END_PROCESS(40, "Unrecognized header : Magic Number unreadable");
1229 magicNumber = LZ4IO_readLE32(MNstore); /* Little Endian format */
1230 }
1231 if (LZ4IO_isSkippableMagicNumber(magicNumber))
1232 magicNumber = LZ4IO_SKIPPABLE0; /* fold skippable magic numbers */
1233
1234 switch(magicNumber)
1235 {
1236 case LZ4IO_MAGICNUMBER:
1237 return LZ4IO_decompressLZ4F(ress, finput, foutput, prefs);
1238 case LEGACY_MAGICNUMBER:
1239 DISPLAYLEVEL(4, "Detected : Legacy format \n");
1240 return LZ4IO_decodeLegacyStream(finput, foutput, prefs);
1241 case LZ4IO_SKIPPABLE0:
1242 DISPLAYLEVEL(4, "Skipping detected skippable area \n");
1243 { size_t const nbReadBytes = fread(MNstore, 1, 4, finput);
1244 if (nbReadBytes != 4)
1245 END_PROCESS(42, "Stream error : skippable size unreadable");
1246 }
1247 { unsigned const size = LZ4IO_readLE32(MNstore);
1248 int const errorNb = fseek_u32(finput, size, SEEK_CUR);
1249 if (errorNb != 0)
1250 END_PROCESS(43, "Stream error : cannot skip skippable area");
1251 }
1252 return 0;
1253 default:
1254 if (nbFrames == 1) { /* just started */
1255 /* Wrong magic number at the beginning of 1st stream */
1256 if (!prefs->testMode && prefs->overwrite && prefs->passThrough) {
1257 nbFrames = 0;
1258 return LZ4IO_passThrough(finput, foutput, MNstore, prefs->sparseFileSupport);
1259 }
1260 END_PROCESS(44,"Unrecognized header : file cannot be decoded");
1261 }
1262 { long int const position = ftell(finput); /* only works for files < 2 GB */
1263 DISPLAYLEVEL(2, "Stream followed by undecodable data ");
1264 if (position != -1L)
1265 DISPLAYLEVEL(2, "at position %i ", (int)position);
1266 DISPLAYLEVEL(2, "\n");
1267 }
1268 return DECODING_ERROR;
1269 }
1270 }
1271
1272
1273 static int
LZ4IO_decompressSrcFile(dRess_t ress,const char * input_filename,const char * output_filename,const LZ4IO_prefs_t * const prefs)1274 LZ4IO_decompressSrcFile(dRess_t ress,
1275 const char* input_filename, const char* output_filename,
1276 const LZ4IO_prefs_t* const prefs)
1277 {
1278 FILE* const foutput = ress.dstFile;
1279 unsigned long long filesize = 0;
1280 int result = 0;
1281
1282 /* Init */
1283 FILE* const finput = LZ4IO_openSrcFile(input_filename);
1284 if (finput==NULL) return 1;
1285 assert(foutput != NULL);
1286
1287 /* Loop over multiple streams */
1288 for ( ; ; ) { /* endless loop, see break condition */
1289 unsigned long long const decodedSize =
1290 selectDecoder(ress, finput, foutput, prefs);
1291 if (decodedSize == ENDOFSTREAM) break;
1292 if (decodedSize == DECODING_ERROR) { result=1; break; }
1293 filesize += decodedSize;
1294 }
1295
1296 /* Close input */
1297 fclose(finput);
1298 if (prefs->removeSrcFile) { /* --rm */
1299 if (remove(input_filename))
1300 END_PROCESS(45, "Remove error : %s: %s", input_filename, strerror(errno));
1301 }
1302
1303 /* Final Status */
1304 DISPLAYLEVEL(2, "\r%79s\r", "");
1305 DISPLAYLEVEL(2, "%-20.20s : decoded %llu bytes \n", input_filename, filesize);
1306 (void)output_filename;
1307
1308 return result;
1309 }
1310
1311
1312 static int
LZ4IO_decompressDstFile(dRess_t ress,const char * input_filename,const char * output_filename,const LZ4IO_prefs_t * const prefs)1313 LZ4IO_decompressDstFile(dRess_t ress,
1314 const char* input_filename, const char* output_filename,
1315 const LZ4IO_prefs_t* const prefs)
1316 {
1317 int result;
1318 stat_t statbuf;
1319 int stat_result = 0;
1320 FILE* const foutput = LZ4IO_openDstFile(output_filename, prefs);
1321 if (foutput==NULL) return 1; /* failure */
1322
1323 if ( !LZ4IO_isStdin(input_filename)
1324 && UTIL_getFileStat(input_filename, &statbuf))
1325 stat_result = 1;
1326
1327 ress.dstFile = foutput;
1328 result = LZ4IO_decompressSrcFile(ress, input_filename, output_filename, prefs);
1329
1330 fclose(foutput);
1331
1332 /* Copy owner, file permissions and modification time */
1333 if ( stat_result != 0
1334 && !LZ4IO_isStdout(output_filename)
1335 && !LZ4IO_isDevNull(output_filename)) {
1336 UTIL_setFileStat(output_filename, &statbuf);
1337 /* should return value be read ? or is silent fail good enough ? */
1338 }
1339
1340 return result;
1341 }
1342
1343
1344 /* Note : LZ4IO_decompressFilename()
1345 * can provide total decompression time for the specified fileName.
1346 * This information is not available with LZ4IO_decompressMultipleFilenames().
1347 */
LZ4IO_decompressFilename(const char * input_filename,const char * output_filename,const LZ4IO_prefs_t * prefs)1348 int LZ4IO_decompressFilename(const char* input_filename, const char* output_filename, const LZ4IO_prefs_t* prefs)
1349 {
1350 dRess_t const ress = LZ4IO_createDResources(prefs);
1351 clock_t const start = clock();
1352
1353 int const status = LZ4IO_decompressDstFile(ress, input_filename, output_filename, prefs);
1354
1355 clock_t const end = clock();
1356 double const seconds = (double)(end - start) / CLOCKS_PER_SEC;
1357 DISPLAYLEVEL(4, "Done in %.2f sec \n", seconds);
1358
1359 LZ4IO_freeDResources(ress);
1360 return status;
1361 }
1362
1363
LZ4IO_decompressMultipleFilenames(const char ** inFileNamesTable,int ifntSize,const char * suffix,const LZ4IO_prefs_t * prefs)1364 int LZ4IO_decompressMultipleFilenames(
1365 const char** inFileNamesTable, int ifntSize,
1366 const char* suffix,
1367 const LZ4IO_prefs_t* prefs)
1368 {
1369 int i;
1370 int skippedFiles = 0;
1371 int missingFiles = 0;
1372 char* outFileName = (char*)malloc(FNSPACE);
1373 size_t ofnSize = FNSPACE;
1374 size_t const suffixSize = strlen(suffix);
1375 dRess_t ress = LZ4IO_createDResources(prefs);
1376
1377 if (outFileName==NULL) END_PROCESS(70, "Memory allocation error");
1378 if (prefs->blockChecksum==0 && prefs->streamChecksum==0) {
1379 DISPLAYLEVEL(4, "disabling checksum validation during decoding \n");
1380 }
1381 ress.dstFile = LZ4IO_openDstFile(stdoutmark, prefs);
1382
1383 for (i=0; i<ifntSize; i++) {
1384 size_t const ifnSize = strlen(inFileNamesTable[i]);
1385 const char* const suffixPtr = inFileNamesTable[i] + ifnSize - suffixSize;
1386 if (LZ4IO_isStdout(suffix) || LZ4IO_isDevNull(suffix)) {
1387 missingFiles += LZ4IO_decompressSrcFile(ress, inFileNamesTable[i], suffix, prefs);
1388 continue;
1389 }
1390 if (ofnSize <= ifnSize-suffixSize+1) {
1391 free(outFileName);
1392 ofnSize = ifnSize + 20;
1393 outFileName = (char*)malloc(ofnSize);
1394 if (outFileName==NULL) END_PROCESS(71, "Memory allocation error");
1395 }
1396 if (ifnSize <= suffixSize || !UTIL_sameString(suffixPtr, suffix) ) {
1397 DISPLAYLEVEL(1, "File extension doesn't match expected LZ4_EXTENSION (%4s); will not process file: %s\n", suffix, inFileNamesTable[i]);
1398 skippedFiles++;
1399 continue;
1400 }
1401 memcpy(outFileName, inFileNamesTable[i], ifnSize - suffixSize);
1402 outFileName[ifnSize-suffixSize] = '\0';
1403 missingFiles += LZ4IO_decompressDstFile(ress, inFileNamesTable[i], outFileName, prefs);
1404 }
1405
1406 LZ4IO_freeDResources(ress);
1407 free(outFileName);
1408 return missingFiles + skippedFiles;
1409 }
1410
1411
1412 /* ********************************************************************* */
1413 /* ********************** LZ4 --list command *********************** */
1414 /* ********************************************************************* */
1415
1416 typedef enum
1417 {
1418 lz4Frame = 0,
1419 legacyFrame,
1420 skippableFrame
1421 } LZ4IO_frameType_t;
1422
1423 typedef struct {
1424 LZ4F_frameInfo_t lz4FrameInfo;
1425 LZ4IO_frameType_t frameType;
1426 } LZ4IO_frameInfo_t;
1427
1428 #define LZ4IO_INIT_FRAMEINFO { LZ4F_INIT_FRAMEINFO, lz4Frame }
1429
1430 typedef struct {
1431 const char* fileName;
1432 unsigned long long fileSize;
1433 unsigned long long frameCount;
1434 LZ4IO_frameInfo_t frameSummary;
1435 unsigned short eqFrameTypes;
1436 unsigned short eqBlockTypes;
1437 unsigned short allContentSize;
1438 } LZ4IO_cFileInfo_t;
1439
1440 #define LZ4IO_INIT_CFILEINFO { NULL, 0ULL, 0, LZ4IO_INIT_FRAMEINFO, 1, 1, 1 }
1441
1442 typedef enum { LZ4IO_LZ4F_OK, LZ4IO_format_not_known, LZ4IO_not_a_file } LZ4IO_infoResult;
1443
1444 static const char * LZ4IO_frameTypeNames[] = {"LZ4Frame", "LegacyFrame", "SkippableFrame" };
1445
1446 /* Read block headers and skip block data
1447 Return total blocks size for this frame including block headers,
1448 block checksums and content checksums.
1449 returns 0 in case it can't successfully skip block data.
1450 Assumes SEEK_CUR after frame header.
1451 */
1452 static unsigned long long
LZ4IO_skipBlocksData(FILE * finput,const LZ4F_blockChecksum_t blockChecksumFlag,const LZ4F_contentChecksum_t contentChecksumFlag)1453 LZ4IO_skipBlocksData(FILE* finput,
1454 const LZ4F_blockChecksum_t blockChecksumFlag,
1455 const LZ4F_contentChecksum_t contentChecksumFlag)
1456 {
1457 unsigned char blockInfo[LZ4F_BLOCK_HEADER_SIZE];
1458 unsigned long long totalBlocksSize = 0;
1459 for (;;) {
1460 if (!fread(blockInfo, 1, LZ4F_BLOCK_HEADER_SIZE, finput)) {
1461 if (feof(finput)) return totalBlocksSize;
1462 return 0;
1463 }
1464 totalBlocksSize += LZ4F_BLOCK_HEADER_SIZE;
1465 { const unsigned long nextCBlockSize = LZ4IO_readLE32(&blockInfo) & 0x7FFFFFFFU;
1466 const unsigned long nextBlock = nextCBlockSize + (blockChecksumFlag * LZ4F_BLOCK_CHECKSUM_SIZE);
1467 if (nextCBlockSize == 0) {
1468 /* Reached EndMark */
1469 if (contentChecksumFlag) {
1470 /* Skip content checksum */
1471 if (UTIL_fseek(finput, LZ4F_CONTENT_CHECKSUM_SIZE, SEEK_CUR) != 0) {
1472 return 0;
1473 }
1474 totalBlocksSize += LZ4F_CONTENT_CHECKSUM_SIZE;
1475 }
1476 break;
1477 }
1478 totalBlocksSize += nextBlock;
1479 /* skip to the next block */
1480 assert(nextBlock < LONG_MAX);
1481 if (UTIL_fseek(finput, (long)nextBlock, SEEK_CUR) != 0) return 0;
1482 } }
1483 return totalBlocksSize;
1484 }
1485
1486 static const unsigned long long legacyFrameUndecodable = (0ULL-1);
1487 /* For legacy frames only.
1488 Read block headers and skip block data.
1489 Return total blocks size for this frame including block headers.
1490 or legacyFrameUndecodable in case it can't successfully skip block data.
1491 This works as long as legacy block header size = magic number size.
1492 Assumes SEEK_CUR after frame header.
1493 */
LZ4IO_skipLegacyBlocksData(FILE * finput)1494 static unsigned long long LZ4IO_skipLegacyBlocksData(FILE* finput)
1495 {
1496 unsigned char blockInfo[LZ4IO_LEGACY_BLOCK_HEADER_SIZE];
1497 unsigned long long totalBlocksSize = 0;
1498 LZ4IO_STATIC_ASSERT(LZ4IO_LEGACY_BLOCK_HEADER_SIZE == MAGICNUMBER_SIZE);
1499 for (;;) {
1500 size_t const bhs = fread(blockInfo, 1, LZ4IO_LEGACY_BLOCK_HEADER_SIZE, finput);
1501 if (bhs == 0) {
1502 if (feof(finput)) return totalBlocksSize;
1503 return legacyFrameUndecodable;
1504 }
1505 if (bhs != 4) {
1506 return legacyFrameUndecodable;
1507 }
1508 { const unsigned int nextCBlockSize = LZ4IO_readLE32(&blockInfo);
1509 if ( nextCBlockSize == LEGACY_MAGICNUMBER
1510 || nextCBlockSize == LZ4IO_MAGICNUMBER
1511 || LZ4IO_isSkippableMagicNumber(nextCBlockSize) ) {
1512 /* Rewind back. we want cursor at the beginning of next frame */
1513 if (UTIL_fseek(finput, -LZ4IO_LEGACY_BLOCK_HEADER_SIZE, SEEK_CUR) != 0) {
1514 END_PROCESS(37, "impossible to skip backward");
1515 }
1516 break;
1517 }
1518 if (nextCBlockSize > LZ4IO_LEGACY_BLOCK_SIZE_MAX) {
1519 DISPLAYLEVEL(4, "Error : block in legacy frame is too large \n");
1520 return legacyFrameUndecodable;
1521 }
1522 totalBlocksSize += LZ4IO_LEGACY_BLOCK_HEADER_SIZE + nextCBlockSize;
1523 /* skip to the next block
1524 * note : this won't fail if nextCBlockSize is too large, skipping past the end of finput */
1525 if (UTIL_fseek(finput, nextCBlockSize, SEEK_CUR) != 0) {
1526 return legacyFrameUndecodable;
1527 } } }
1528 return totalBlocksSize;
1529 }
1530
1531 /* LZ4IO_blockTypeID:
1532 * return human-readable block type, following command line convention
1533 * buffer : must be a valid memory area of at least 4 bytes */
LZ4IO_blockTypeID(LZ4F_blockSizeID_t sizeID,LZ4F_blockMode_t blockMode,char buffer[4])1534 const char* LZ4IO_blockTypeID(LZ4F_blockSizeID_t sizeID, LZ4F_blockMode_t blockMode, char buffer[4])
1535 {
1536 buffer[0] = 'B';
1537 assert(sizeID >= 4); assert(sizeID <= 7);
1538 buffer[1] = (char)(sizeID + '0');
1539 buffer[2] = (blockMode == LZ4F_blockIndependent) ? 'I' : 'D';
1540 buffer[3] = 0;
1541 return buffer;
1542 }
1543
1544 /* buffer : must be valid memory area of at least 10 bytes */
LZ4IO_toHuman(long double size,char * buf)1545 static const char* LZ4IO_toHuman(long double size, char *buf)
1546 {
1547 const char units[] = {"\0KMGTPEZY"};
1548 size_t i = 0;
1549 for (; size >= 1024; i++) size /= 1024;
1550 sprintf(buf, "%.2Lf%c", size, units[i]);
1551 return buf;
1552 }
1553
1554 /* Get filename without path prefix */
LZ4IO_baseName(const char * input_filename)1555 static const char* LZ4IO_baseName(const char* input_filename)
1556 {
1557 const char* b = strrchr(input_filename, '/');
1558 if (!b) b = strrchr(input_filename, '\\');
1559 if (!b) return input_filename;
1560 return b + 1;
1561 }
1562
1563 /* Report frame/s information (--list) in verbose mode (-v).
1564 * Will populate file info with fileName and frameSummary where applicable.
1565 * - TODO :
1566 * + report nb of blocks, hence max. possible decompressed size (when not reported in header)
1567 */
1568 static LZ4IO_infoResult
LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t * cfinfo,const char * input_filename)1569 LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t* cfinfo, const char* input_filename)
1570 {
1571 LZ4IO_infoResult result = LZ4IO_format_not_known; /* default result (error) */
1572 unsigned char buffer[LZ4F_HEADER_SIZE_MAX];
1573 FILE* const finput = LZ4IO_openSrcFile(input_filename);
1574
1575 if (finput == NULL) return LZ4IO_not_a_file;
1576 cfinfo->fileSize = UTIL_getOpenFileSize(finput);
1577
1578 while (!feof(finput)) {
1579 LZ4IO_frameInfo_t frameInfo = LZ4IO_INIT_FRAMEINFO;
1580 unsigned magicNumber;
1581 /* Get MagicNumber */
1582 { size_t const nbReadBytes = fread(buffer, 1, MAGICNUMBER_SIZE, finput);
1583 if (nbReadBytes == 0) { break; } /* EOF */
1584 result = LZ4IO_format_not_known; /* default result (error) */
1585 if (nbReadBytes != MAGICNUMBER_SIZE) {
1586 END_PROCESS(40, "Unrecognized header : Magic Number unreadable");
1587 } }
1588 magicNumber = LZ4IO_readLE32(buffer); /* Little Endian format */
1589 if (LZ4IO_isSkippableMagicNumber(magicNumber))
1590 magicNumber = LZ4IO_SKIPPABLE0; /* fold skippable magic numbers */
1591
1592 switch (magicNumber) {
1593 case LZ4IO_MAGICNUMBER:
1594 if (cfinfo->frameSummary.frameType != lz4Frame) cfinfo->eqFrameTypes = 0;
1595 /* Get frame info */
1596 { const size_t readBytes = fread(buffer + MAGICNUMBER_SIZE, 1, LZ4F_HEADER_SIZE_MIN - MAGICNUMBER_SIZE, finput);
1597 if (!readBytes || ferror(finput)) END_PROCESS(71, "Error reading %s", input_filename);
1598 }
1599 { size_t hSize = LZ4F_headerSize(&buffer, LZ4F_HEADER_SIZE_MIN);
1600 if (LZ4F_isError(hSize)) break;
1601 if (hSize > (LZ4F_HEADER_SIZE_MIN + MAGICNUMBER_SIZE)) {
1602 /* We've already read LZ4F_HEADER_SIZE_MIN so read any extra until hSize*/
1603 const size_t readBytes = fread(buffer + LZ4F_HEADER_SIZE_MIN, 1, hSize - LZ4F_HEADER_SIZE_MIN, finput);
1604 if (!readBytes || ferror(finput)) END_PROCESS(72, "Error reading %s", input_filename);
1605 }
1606 /* Create decompression context */
1607 { LZ4F_dctx* dctx;
1608 if ( LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION)) ) break;
1609 { unsigned const frameInfoError = LZ4F_isError(LZ4F_getFrameInfo(dctx, &frameInfo.lz4FrameInfo, buffer, &hSize));
1610 LZ4F_freeDecompressionContext(dctx);
1611 if (frameInfoError) break;
1612 if ((cfinfo->frameSummary.lz4FrameInfo.blockSizeID != frameInfo.lz4FrameInfo.blockSizeID ||
1613 cfinfo->frameSummary.lz4FrameInfo.blockMode != frameInfo.lz4FrameInfo.blockMode)
1614 && cfinfo->frameCount != 0)
1615 cfinfo->eqBlockTypes = 0;
1616 { const unsigned long long totalBlocksSize = LZ4IO_skipBlocksData(finput,
1617 frameInfo.lz4FrameInfo.blockChecksumFlag,
1618 frameInfo.lz4FrameInfo.contentChecksumFlag);
1619 if (totalBlocksSize) {
1620 char bTypeBuffer[5];
1621 LZ4IO_blockTypeID(frameInfo.lz4FrameInfo.blockSizeID, frameInfo.lz4FrameInfo.blockMode, bTypeBuffer);
1622 DISPLAYLEVEL(3, " %6llu %14s %5s %8s",
1623 cfinfo->frameCount + 1,
1624 LZ4IO_frameTypeNames[frameInfo.frameType],
1625 bTypeBuffer,
1626 frameInfo.lz4FrameInfo.contentChecksumFlag ? "XXH32" : "-");
1627 if (frameInfo.lz4FrameInfo.contentSize) {
1628 { double const ratio = (double)(totalBlocksSize + hSize) / frameInfo.lz4FrameInfo.contentSize * 100;
1629 DISPLAYLEVEL(3, " %20llu %20llu %9.2f%%\n",
1630 totalBlocksSize + hSize,
1631 frameInfo.lz4FrameInfo.contentSize,
1632 ratio);
1633 }
1634 /* Now we've consumed frameInfo we can use it to store the total contentSize */
1635 frameInfo.lz4FrameInfo.contentSize += cfinfo->frameSummary.lz4FrameInfo.contentSize;
1636 }
1637 else {
1638 DISPLAYLEVEL(3, " %20llu %20s %9s \n", totalBlocksSize + hSize, "-", "-");
1639 cfinfo->allContentSize = 0;
1640 }
1641 result = LZ4IO_LZ4F_OK;
1642 } } } } }
1643 break;
1644 case LEGACY_MAGICNUMBER:
1645 frameInfo.frameType = legacyFrame;
1646 if (cfinfo->frameSummary.frameType != legacyFrame && cfinfo->frameCount != 0) cfinfo->eqFrameTypes = 0;
1647 cfinfo->eqBlockTypes = 0;
1648 cfinfo->allContentSize = 0;
1649 { const unsigned long long totalBlocksSize = LZ4IO_skipLegacyBlocksData(finput);
1650 if (totalBlocksSize == legacyFrameUndecodable) {
1651 DISPLAYLEVEL(1, "Corrupted legacy frame \n");
1652 result = LZ4IO_format_not_known;
1653 break;
1654 }
1655 if (totalBlocksSize) {
1656 DISPLAYLEVEL(3, " %6llu %14s %5s %8s %20llu %20s %9s\n",
1657 cfinfo->frameCount + 1,
1658 LZ4IO_frameTypeNames[frameInfo.frameType],
1659 "-", "-",
1660 totalBlocksSize + 4,
1661 "-", "-");
1662 result = LZ4IO_LZ4F_OK;
1663 } }
1664 break;
1665 case LZ4IO_SKIPPABLE0:
1666 frameInfo.frameType = skippableFrame;
1667 if (cfinfo->frameSummary.frameType != skippableFrame && cfinfo->frameCount != 0) cfinfo->eqFrameTypes = 0;
1668 cfinfo->eqBlockTypes = 0;
1669 cfinfo->allContentSize = 0;
1670 { size_t const nbReadBytes = fread(buffer, 1, 4, finput);
1671 if (nbReadBytes != 4)
1672 END_PROCESS(42, "Stream error : skippable size unreadable");
1673 }
1674 { unsigned const size = LZ4IO_readLE32(buffer);
1675 int const errorNb = fseek_u32(finput, size, SEEK_CUR);
1676 if (errorNb != 0)
1677 END_PROCESS(43, "Stream error : cannot skip skippable area");
1678 DISPLAYLEVEL(3, " %6llu %14s %5s %8s %20u %20s %9s\n",
1679 cfinfo->frameCount + 1,
1680 "SkippableFrame",
1681 "-", "-", size + 8, "-", "-");
1682
1683 result = LZ4IO_LZ4F_OK;
1684 }
1685 break;
1686 default:
1687 { long int const position = ftell(finput); /* only works for files < 2 GB */
1688 DISPLAYLEVEL(3, "Stream followed by undecodable data ");
1689 if (position != -1L)
1690 DISPLAYLEVEL(3, "at position %i ", (int)position);
1691 result = LZ4IO_format_not_known;
1692 DISPLAYLEVEL(3, "\n");
1693 }
1694 break;
1695 }
1696 if (result != LZ4IO_LZ4F_OK) break;
1697 cfinfo->frameSummary = frameInfo;
1698 cfinfo->frameCount++;
1699 } /* while (!feof(finput)) */
1700 fclose(finput);
1701 return result;
1702 }
1703
1704
LZ4IO_displayCompressedFilesInfo(const char ** inFileNames,size_t ifnIdx)1705 int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx)
1706 {
1707 int result = 0;
1708 size_t idx = 0;
1709 if (g_displayLevel < 3) {
1710 DISPLAYOUT("%10s %14s %5s %11s %13s %9s %s\n",
1711 "Frames", "Type", "Block", "Compressed", "Uncompressed", "Ratio", "Filename");
1712 }
1713 for (; idx < ifnIdx; idx++) {
1714 /* Get file info */
1715 LZ4IO_cFileInfo_t cfinfo = LZ4IO_INIT_CFILEINFO;
1716 cfinfo.fileName = LZ4IO_baseName(inFileNames[idx]);
1717 if (LZ4IO_isStdin(inFileNames[idx]) ? !UTIL_isRegFD(0) : !UTIL_isRegFile(inFileNames[idx])) {
1718 DISPLAYLEVEL(1, "lz4: %s is not a regular file \n", inFileNames[idx]);
1719 return 1;
1720 }
1721 DISPLAYLEVEL(3, "%s(%llu/%llu)\n", cfinfo.fileName, (unsigned long long)idx + 1, (unsigned long long)ifnIdx);
1722 DISPLAYLEVEL(3, " %6s %14s %5s %8s %20s %20s %9s\n",
1723 "Frame", "Type", "Block", "Checksum", "Compressed", "Uncompressed", "Ratio")
1724 { LZ4IO_infoResult const op_result = LZ4IO_getCompressedFileInfo(&cfinfo, inFileNames[idx]);
1725 if (op_result != LZ4IO_LZ4F_OK) {
1726 assert(op_result == LZ4IO_format_not_known);
1727 DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]);
1728 return 1;
1729 } }
1730 DISPLAYLEVEL(3, "\n");
1731 if (g_displayLevel < 3) {
1732 /* Display Summary */
1733 { char buffers[3][10];
1734 DISPLAYOUT("%10llu %14s %5s %11s %13s ",
1735 cfinfo.frameCount,
1736 cfinfo.eqFrameTypes ? LZ4IO_frameTypeNames[cfinfo.frameSummary.frameType] : "-" ,
1737 cfinfo.eqBlockTypes ? LZ4IO_blockTypeID(cfinfo.frameSummary.lz4FrameInfo.blockSizeID,
1738 cfinfo.frameSummary.lz4FrameInfo.blockMode, buffers[0]) : "-",
1739 LZ4IO_toHuman((long double)cfinfo.fileSize, buffers[1]),
1740 cfinfo.allContentSize ? LZ4IO_toHuman((long double)cfinfo.frameSummary.lz4FrameInfo.contentSize, buffers[2]) : "-");
1741 if (cfinfo.allContentSize) {
1742 double const ratio = (double)cfinfo.fileSize / cfinfo.frameSummary.lz4FrameInfo.contentSize * 100;
1743 DISPLAYOUT("%9.2f%% %s \n", ratio, cfinfo.fileName);
1744 } else {
1745 DISPLAYOUT("%9s %s\n",
1746 "-",
1747 cfinfo.fileName);
1748 } } } /* if (g_displayLevel < 3) */
1749 } /* for (; idx < ifnIdx; idx++) */
1750
1751 return result;
1752 }
1753