1 /*-----------------------------------------------------------*/
2 /*--- Block recoverer program for bzip2 ---*/
3 /*--- bzip2recover.c ---*/
4 /*-----------------------------------------------------------*/
5
6 /* ------------------------------------------------------------------
7 This file is part of bzip2/libbzip2, a program and library for
8 lossless, block-sorting data compression.
9
10 bzip2/libbzip2 version 1.0.5 of 10 December 2007
11 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
12
13 Please read the WARNING, DISCLAIMER and PATENTS sections in the
14 README file.
15
16 This program is released under the terms of the license contained
17 in the file LICENSE.
18 ------------------------------------------------------------------ */
19
20 /* This program is a complete hack and should be rewritten properly.
21 It isn't very complicated. */
22
23 #include <stdio.h>
24 #include <errno.h>
25 #include <stdlib.h>
26 #include <string.h>
27
28
29 /* This program records bit locations in the file to be recovered.
30 That means that if 64-bit ints are not supported, we will not
31 be able to recover .bz2 files over 512MB (2^32 bits) long.
32 On GNU supported platforms, we take advantage of the 64-bit
33 int support to circumvent this problem. Ditto MSVC.
34
35 This change occurred in version 1.0.2; all prior versions have
36 the 512MB limitation.
37 */
38 #ifdef __GNUC__
39 typedef unsigned long long int MaybeUInt64;
40 # define MaybeUInt64_FMT "%Lu"
41 #else
42 #ifdef _MSC_VER
43 typedef unsigned __int64 MaybeUInt64;
44 # define MaybeUInt64_FMT "%I64u"
45 #else
46 typedef unsigned int MaybeUInt64;
47 # define MaybeUInt64_FMT "%u"
48 #endif
49 #endif
50
51 typedef unsigned int UInt32;
52 typedef int Int32;
53 typedef unsigned char UChar;
54 typedef char Char;
55 typedef unsigned char Bool;
56 #define True ((Bool)1)
57 #define False ((Bool)0)
58
59
60 #define BZ_MAX_FILENAME 2000
61
62 Char inFileName[BZ_MAX_FILENAME];
63 Char outFileName[BZ_MAX_FILENAME];
64 Char progName[BZ_MAX_FILENAME];
65
66 MaybeUInt64 bytesOut = 0;
67 MaybeUInt64 bytesIn = 0;
68
69
70 /*---------------------------------------------------*/
71 /*--- Header bytes ---*/
72 /*---------------------------------------------------*/
73
74 #define BZ_HDR_B 0x42 /* 'B' */
75 #define BZ_HDR_Z 0x5a /* 'Z' */
76 #define BZ_HDR_h 0x68 /* 'h' */
77 #define BZ_HDR_0 0x30 /* '0' */
78
79
80 /*---------------------------------------------------*/
81 /*--- I/O errors ---*/
82 /*---------------------------------------------------*/
83
84 /*---------------------------------------------*/
readError(void)85 static void readError ( void )
86 {
87 fprintf ( stderr,
88 "%s: I/O error reading `%s', possible reason follows.\n",
89 progName, inFileName );
90 perror ( progName );
91 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
92 progName );
93 exit ( 1 );
94 }
95
96
97 /*---------------------------------------------*/
writeError(void)98 static void writeError ( void )
99 {
100 fprintf ( stderr,
101 "%s: I/O error reading `%s', possible reason follows.\n",
102 progName, inFileName );
103 perror ( progName );
104 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
105 progName );
106 exit ( 1 );
107 }
108
109
110 /*---------------------------------------------*/
mallocFail(Int32 n)111 static void mallocFail ( Int32 n )
112 {
113 fprintf ( stderr,
114 "%s: malloc failed on request for %d bytes.\n",
115 progName, n );
116 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
117 progName );
118 exit ( 1 );
119 }
120
121
122 /*---------------------------------------------*/
tooManyBlocks(Int32 max_handled_blocks)123 static void tooManyBlocks ( Int32 max_handled_blocks )
124 {
125 fprintf ( stderr,
126 "%s: `%s' appears to contain more than %d blocks\n",
127 progName, inFileName, max_handled_blocks );
128 fprintf ( stderr,
129 "%s: and cannot be handled. To fix, increase\n",
130 progName );
131 fprintf ( stderr,
132 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
133 progName );
134 exit ( 1 );
135 }
136
137
138
139 /*---------------------------------------------------*/
140 /*--- Bit stream I/O ---*/
141 /*---------------------------------------------------*/
142
143 typedef
144 struct {
145 FILE* handle;
146 Int32 buffer;
147 Int32 buffLive;
148 Char mode;
149 }
150 BitStream;
151
152
153 /*---------------------------------------------*/
bsOpenReadStream(FILE * stream)154 static BitStream* bsOpenReadStream ( FILE* stream )
155 {
156 BitStream *bs = malloc ( sizeof(BitStream) );
157 if (bs == NULL) mallocFail ( sizeof(BitStream) );
158 bs->handle = stream;
159 bs->buffer = 0;
160 bs->buffLive = 0;
161 bs->mode = 'r';
162 return bs;
163 }
164
165
166 /*---------------------------------------------*/
bsOpenWriteStream(FILE * stream)167 static BitStream* bsOpenWriteStream ( FILE* stream )
168 {
169 BitStream *bs = malloc ( sizeof(BitStream) );
170 if (bs == NULL) mallocFail ( sizeof(BitStream) );
171 bs->handle = stream;
172 bs->buffer = 0;
173 bs->buffLive = 0;
174 bs->mode = 'w';
175 return bs;
176 }
177
178
179 /*---------------------------------------------*/
bsPutBit(BitStream * bs,Int32 bit)180 static void bsPutBit ( BitStream* bs, Int32 bit )
181 {
182 if (bs->buffLive == 8) {
183 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
184 if (retVal == EOF) writeError();
185 bytesOut++;
186 bs->buffLive = 1;
187 bs->buffer = bit & 0x1;
188 } else {
189 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
190 bs->buffLive++;
191 };
192 }
193
194
195 /*---------------------------------------------*/
196 /*--
197 Returns 0 or 1, or 2 to indicate EOF.
198 --*/
bsGetBit(BitStream * bs)199 static Int32 bsGetBit ( BitStream* bs )
200 {
201 if (bs->buffLive > 0) {
202 bs->buffLive --;
203 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
204 } else {
205 Int32 retVal = getc ( bs->handle );
206 if ( retVal == EOF ) {
207 if (errno != 0) readError();
208 return 2;
209 }
210 bs->buffLive = 7;
211 bs->buffer = retVal;
212 return ( ((bs->buffer) >> 7) & 0x1 );
213 }
214 }
215
216
217 /*---------------------------------------------*/
bsClose(BitStream * bs)218 static void bsClose ( BitStream* bs )
219 {
220 Int32 retVal;
221
222 if ( bs->mode == 'w' ) {
223 while ( bs->buffLive < 8 ) {
224 bs->buffLive++;
225 bs->buffer <<= 1;
226 };
227 retVal = putc ( (UChar) (bs->buffer), bs->handle );
228 if (retVal == EOF) writeError();
229 bytesOut++;
230 retVal = fflush ( bs->handle );
231 if (retVal == EOF) writeError();
232 }
233 retVal = fclose ( bs->handle );
234 if (retVal == EOF) {
235 if (bs->mode == 'w') writeError(); else readError();
236 }
237 free ( bs );
238 }
239
240
241 /*---------------------------------------------*/
bsPutUChar(BitStream * bs,UChar c)242 static void bsPutUChar ( BitStream* bs, UChar c )
243 {
244 Int32 i;
245 for (i = 7; i >= 0; i--)
246 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
247 }
248
249
250 /*---------------------------------------------*/
bsPutUInt32(BitStream * bs,UInt32 c)251 static void bsPutUInt32 ( BitStream* bs, UInt32 c )
252 {
253 Int32 i;
254
255 for (i = 31; i >= 0; i--)
256 bsPutBit ( bs, (c >> i) & 0x1 );
257 }
258
259
260 /*---------------------------------------------*/
endsInBz2(Char * name)261 static Bool endsInBz2 ( Char* name )
262 {
263 Int32 n = strlen ( name );
264 if (n <= 4) return False;
265 return
266 (name[n-4] == '.' &&
267 name[n-3] == 'b' &&
268 name[n-2] == 'z' &&
269 name[n-1] == '2');
270 }
271
272
273 /*---------------------------------------------------*/
274 /*--- ---*/
275 /*---------------------------------------------------*/
276
277 /* This logic isn't really right when it comes to Cygwin. */
278 #ifdef _WIN32
279 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
280 #else
281 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
282 #endif
283
284 #define BLOCK_HEADER_HI 0x00003141UL
285 #define BLOCK_HEADER_LO 0x59265359UL
286
287 #define BLOCK_ENDMARK_HI 0x00001772UL
288 #define BLOCK_ENDMARK_LO 0x45385090UL
289
290 /* Increase if necessary. However, a .bz2 file with > 50000 blocks
291 would have an uncompressed size of at least 40GB, so the chances
292 are low you'll need to up this.
293 */
294 #define BZ_MAX_HANDLED_BLOCKS 50000
295
296 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
297 MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS];
298 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
299 MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS];
300
main(Int32 argc,Char ** argv)301 Int32 main ( Int32 argc, Char** argv )
302 {
303 FILE* inFile;
304 FILE* outFile;
305 BitStream* bsIn, *bsWr;
306 Int32 b, wrBlock, currBlock, rbCtr;
307 MaybeUInt64 bitsRead;
308
309 UInt32 buffHi, buffLo, blockCRC;
310 Char* p;
311
312 strcpy ( progName, argv[0] );
313 inFileName[0] = outFileName[0] = 0;
314
315 fprintf ( stderr,
316 "bzip2recover 1.0.5: extracts blocks from damaged .bz2 files.\n" );
317
318 if (argc != 2) {
319 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
320 progName, progName );
321 switch (sizeof(MaybeUInt64)) {
322 case 8:
323 fprintf(stderr,
324 "\trestrictions on size of recovered file: None\n");
325 break;
326 case 4:
327 fprintf(stderr,
328 "\trestrictions on size of recovered file: 512 MB\n");
329 fprintf(stderr,
330 "\tto circumvent, recompile with MaybeUInt64 as an\n"
331 "\tunsigned 64-bit int.\n");
332 break;
333 default:
334 fprintf(stderr,
335 "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
336 "configuration error.\n");
337 break;
338 }
339 exit(1);
340 }
341
342 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
343 fprintf ( stderr,
344 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
345 progName, (int)strlen(argv[1]) );
346 exit(1);
347 }
348
349 strcpy ( inFileName, argv[1] );
350
351 inFile = fopen ( inFileName, "rb" );
352 if (inFile == NULL) {
353 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
354 exit(1);
355 }
356
357 bsIn = bsOpenReadStream ( inFile );
358 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
359
360 bitsRead = 0;
361 buffHi = buffLo = 0;
362 currBlock = 0;
363 bStart[currBlock] = 0;
364
365 rbCtr = 0;
366
367 while (True) {
368 b = bsGetBit ( bsIn );
369 bitsRead++;
370 if (b == 2) {
371 if (bitsRead >= bStart[currBlock] &&
372 (bitsRead - bStart[currBlock]) >= 40) {
373 bEnd[currBlock] = bitsRead-1;
374 if (currBlock > 0)
375 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
376 " to " MaybeUInt64_FMT " (incomplete)\n",
377 currBlock, bStart[currBlock], bEnd[currBlock] );
378 } else
379 currBlock--;
380 break;
381 }
382 buffHi = (buffHi << 1) | (buffLo >> 31);
383 buffLo = (buffLo << 1) | (b & 1);
384 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
385 && buffLo == BLOCK_HEADER_LO)
386 ||
387 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
388 && buffLo == BLOCK_ENDMARK_LO)
389 ) {
390 if (bitsRead > 49) {
391 bEnd[currBlock] = bitsRead-49;
392 } else {
393 bEnd[currBlock] = 0;
394 }
395 if (currBlock > 0 &&
396 (bEnd[currBlock] - bStart[currBlock]) >= 130) {
397 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
398 " to " MaybeUInt64_FMT "\n",
399 rbCtr+1, bStart[currBlock], bEnd[currBlock] );
400 rbStart[rbCtr] = bStart[currBlock];
401 rbEnd[rbCtr] = bEnd[currBlock];
402 rbCtr++;
403 }
404 if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
405 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
406 currBlock++;
407
408 bStart[currBlock] = bitsRead;
409 }
410 }
411
412 bsClose ( bsIn );
413
414 /*-- identified blocks run from 1 to rbCtr inclusive. --*/
415
416 if (rbCtr < 1) {
417 fprintf ( stderr,
418 "%s: sorry, I couldn't find any block boundaries.\n",
419 progName );
420 exit(1);
421 };
422
423 fprintf ( stderr, "%s: splitting into blocks\n", progName );
424
425 inFile = fopen ( inFileName, "rb" );
426 if (inFile == NULL) {
427 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
428 exit(1);
429 }
430 bsIn = bsOpenReadStream ( inFile );
431
432 /*-- placate gcc's dataflow analyser --*/
433 blockCRC = 0; bsWr = 0;
434
435 bitsRead = 0;
436 outFile = NULL;
437 wrBlock = 0;
438 while (True) {
439 b = bsGetBit(bsIn);
440 if (b == 2) break;
441 buffHi = (buffHi << 1) | (buffLo >> 31);
442 buffLo = (buffLo << 1) | (b & 1);
443 if (bitsRead == 47+rbStart[wrBlock])
444 blockCRC = (buffHi << 16) | (buffLo >> 16);
445
446 if (outFile != NULL && bitsRead >= rbStart[wrBlock]
447 && bitsRead <= rbEnd[wrBlock]) {
448 bsPutBit ( bsWr, b );
449 }
450
451 bitsRead++;
452
453 if (bitsRead == rbEnd[wrBlock]+1) {
454 if (outFile != NULL) {
455 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
456 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
457 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
458 bsPutUInt32 ( bsWr, blockCRC );
459 bsClose ( bsWr );
460 }
461 if (wrBlock >= rbCtr) break;
462 wrBlock++;
463 } else
464 if (bitsRead == rbStart[wrBlock]) {
465 /* Create the output file name, correctly handling leading paths.
466 (31.10.2001 by Sergey E. Kusikov) */
467 Char* split;
468 Int32 ofs, k;
469 for (k = 0; k < BZ_MAX_FILENAME; k++)
470 outFileName[k] = 0;
471 strcpy (outFileName, inFileName);
472 split = strrchr (outFileName, BZ_SPLIT_SYM);
473 if (split == NULL) {
474 split = outFileName;
475 } else {
476 ++split;
477 }
478 /* Now split points to the start of the basename. */
479 ofs = split - outFileName;
480 sprintf (split, "rec%5d", wrBlock+1);
481 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
482 strcat (outFileName, inFileName + ofs);
483
484 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
485
486 fprintf ( stderr, " writing block %d to `%s' ...\n",
487 wrBlock+1, outFileName );
488
489 outFile = fopen ( outFileName, "wb" );
490 if (outFile == NULL) {
491 fprintf ( stderr, "%s: can't write `%s'\n",
492 progName, outFileName );
493 exit(1);
494 }
495 bsWr = bsOpenWriteStream ( outFile );
496 bsPutUChar ( bsWr, BZ_HDR_B );
497 bsPutUChar ( bsWr, BZ_HDR_Z );
498 bsPutUChar ( bsWr, BZ_HDR_h );
499 bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
500 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
501 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
502 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
503 }
504 }
505
506 fprintf ( stderr, "%s: finished\n", progName );
507 return 0;
508 }
509
510
511
512 /*-----------------------------------------------------------*/
513 /*--- end bzip2recover.c ---*/
514 /*-----------------------------------------------------------*/
515