1
2 /*-------------------------------------------------------------*/
3 /*--- Decompression machinery ---*/
4 /*--- decompress.c ---*/
5 /*-------------------------------------------------------------*/
6
7 /* ------------------------------------------------------------------
8 This file is part of bzip2/libbzip2, a program and library for
9 lossless, block-sorting data compression.
10
11 bzip2/libbzip2 version 1.0.5 of 10 December 2007
12 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
13
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
15 README file.
16
17 This program is released under the terms of the license contained
18 in the file LICENSE.
19 ------------------------------------------------------------------ */
20
21
22 #include "bzlib_private.h"
23
24
25 /*---------------------------------------------------*/
26 static
makeMaps_d(DState * s)27 void makeMaps_d ( DState* s )
28 {
29 Int32 i;
30 s->nInUse = 0;
31 for (i = 0; i < 256; i++)
32 if (s->inUse[i]) {
33 s->seqToUnseq[s->nInUse] = i;
34 s->nInUse++;
35 }
36 }
37
38
39 /*---------------------------------------------------*/
40 #define RETURN(rrr) \
41 { retVal = rrr; goto save_state_and_return; };
42
43 #define GET_BITS(lll,vvv,nnn) \
44 case lll: s->state = lll; \
45 while (True) { \
46 if (s->bsLive >= nnn) { \
47 UInt32 v; \
48 v = (s->bsBuff >> \
49 (s->bsLive-nnn)) & ((1 << nnn)-1); \
50 s->bsLive -= nnn; \
51 vvv = v; \
52 break; \
53 } \
54 if (s->strm->avail_in == 0) RETURN(BZ_OK); \
55 s->bsBuff \
56 = (s->bsBuff << 8) | \
57 ((UInt32) \
58 (*((UChar*)(s->strm->next_in)))); \
59 s->bsLive += 8; \
60 s->strm->next_in++; \
61 s->strm->avail_in--; \
62 s->strm->total_in_lo32++; \
63 if (s->strm->total_in_lo32 == 0) \
64 s->strm->total_in_hi32++; \
65 }
66
67 #define GET_UCHAR(lll,uuu) \
68 GET_BITS(lll,uuu,8)
69
70 #define GET_BIT(lll,uuu) \
71 GET_BITS(lll,uuu,1)
72
73 /*---------------------------------------------------*/
74 #define GET_MTF_VAL(label1,label2,lval) \
75 { \
76 if (groupPos == 0) { \
77 groupNo++; \
78 if (groupNo >= nSelectors) \
79 RETURN(BZ_DATA_ERROR); \
80 groupPos = BZ_G_SIZE; \
81 gSel = s->selector[groupNo]; \
82 gMinlen = s->minLens[gSel]; \
83 gLimit = &(s->limit[gSel][0]); \
84 gPerm = &(s->perm[gSel][0]); \
85 gBase = &(s->base[gSel][0]); \
86 } \
87 groupPos--; \
88 zn = gMinlen; \
89 GET_BITS(label1, zvec, zn); \
90 while (1) { \
91 if (zn > 20 /* the longest code */) \
92 RETURN(BZ_DATA_ERROR); \
93 if (zvec <= gLimit[zn]) break; \
94 zn++; \
95 GET_BIT(label2, zj); \
96 zvec = (zvec << 1) | zj; \
97 }; \
98 if (zvec - gBase[zn] < 0 \
99 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
100 RETURN(BZ_DATA_ERROR); \
101 lval = gPerm[zvec - gBase[zn]]; \
102 }
103
104
105 /*---------------------------------------------------*/
BZ2_decompress(DState * s)106 Int32 BZ2_decompress ( DState* s )
107 {
108 UChar uc;
109 Int32 retVal;
110 Int32 minLen, maxLen;
111 bz_stream* strm = s->strm;
112
113 /* stuff that needs to be saved/restored */
114 Int32 i;
115 Int32 j;
116 Int32 t;
117 Int32 alphaSize;
118 Int32 nGroups;
119 Int32 nSelectors;
120 Int32 EOB;
121 Int32 groupNo;
122 Int32 groupPos;
123 Int32 nextSym;
124 Int32 nblockMAX;
125 Int32 nblock;
126 Int32 es;
127 Int32 N;
128 Int32 curr;
129 Int32 zt;
130 Int32 zn;
131 Int32 zvec;
132 Int32 zj;
133 Int32 gSel;
134 Int32 gMinlen;
135 Int32* gLimit;
136 Int32* gBase;
137 Int32* gPerm;
138
139 if (s->state == BZ_X_MAGIC_1) {
140 /*initialise the save area*/
141 s->save_i = 0;
142 s->save_j = 0;
143 s->save_t = 0;
144 s->save_alphaSize = 0;
145 s->save_nGroups = 0;
146 s->save_nSelectors = 0;
147 s->save_EOB = 0;
148 s->save_groupNo = 0;
149 s->save_groupPos = 0;
150 s->save_nextSym = 0;
151 s->save_nblockMAX = 0;
152 s->save_nblock = 0;
153 s->save_es = 0;
154 s->save_N = 0;
155 s->save_curr = 0;
156 s->save_zt = 0;
157 s->save_zn = 0;
158 s->save_zvec = 0;
159 s->save_zj = 0;
160 s->save_gSel = 0;
161 s->save_gMinlen = 0;
162 s->save_gLimit = NULL;
163 s->save_gBase = NULL;
164 s->save_gPerm = NULL;
165 }
166
167 /*restore from the save area*/
168 i = s->save_i;
169 j = s->save_j;
170 t = s->save_t;
171 alphaSize = s->save_alphaSize;
172 nGroups = s->save_nGroups;
173 nSelectors = s->save_nSelectors;
174 EOB = s->save_EOB;
175 groupNo = s->save_groupNo;
176 groupPos = s->save_groupPos;
177 nextSym = s->save_nextSym;
178 nblockMAX = s->save_nblockMAX;
179 nblock = s->save_nblock;
180 es = s->save_es;
181 N = s->save_N;
182 curr = s->save_curr;
183 zt = s->save_zt;
184 zn = s->save_zn;
185 zvec = s->save_zvec;
186 zj = s->save_zj;
187 gSel = s->save_gSel;
188 gMinlen = s->save_gMinlen;
189 gLimit = s->save_gLimit;
190 gBase = s->save_gBase;
191 gPerm = s->save_gPerm;
192
193 retVal = BZ_OK;
194
195 switch (s->state) {
196
197 GET_UCHAR(BZ_X_MAGIC_1, uc);
198 if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
199
200 GET_UCHAR(BZ_X_MAGIC_2, uc);
201 if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
202
203 GET_UCHAR(BZ_X_MAGIC_3, uc)
204 if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
205
206 GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
207 if (s->blockSize100k < (BZ_HDR_0 + 1) ||
208 s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
209 s->blockSize100k -= BZ_HDR_0;
210
211 if (s->smallDecompress) {
212 s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
213 s->ll4 = BZALLOC(
214 ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar)
215 );
216 if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
217 } else {
218 s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
219 if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
220 }
221
222 GET_UCHAR(BZ_X_BLKHDR_1, uc);
223
224 if (uc == 0x17) goto endhdr_2;
225 if (uc != 0x31) RETURN(BZ_DATA_ERROR);
226 GET_UCHAR(BZ_X_BLKHDR_2, uc);
227 if (uc != 0x41) RETURN(BZ_DATA_ERROR);
228 GET_UCHAR(BZ_X_BLKHDR_3, uc);
229 if (uc != 0x59) RETURN(BZ_DATA_ERROR);
230 GET_UCHAR(BZ_X_BLKHDR_4, uc);
231 if (uc != 0x26) RETURN(BZ_DATA_ERROR);
232 GET_UCHAR(BZ_X_BLKHDR_5, uc);
233 if (uc != 0x53) RETURN(BZ_DATA_ERROR);
234 GET_UCHAR(BZ_X_BLKHDR_6, uc);
235 if (uc != 0x59) RETURN(BZ_DATA_ERROR);
236
237 s->currBlockNo++;
238 if (s->verbosity >= 2)
239 VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo );
240
241 s->storedBlockCRC = 0;
242 GET_UCHAR(BZ_X_BCRC_1, uc);
243 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
244 GET_UCHAR(BZ_X_BCRC_2, uc);
245 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
246 GET_UCHAR(BZ_X_BCRC_3, uc);
247 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
248 GET_UCHAR(BZ_X_BCRC_4, uc);
249 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
250
251 GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
252
253 s->origPtr = 0;
254 GET_UCHAR(BZ_X_ORIGPTR_1, uc);
255 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
256 GET_UCHAR(BZ_X_ORIGPTR_2, uc);
257 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
258 GET_UCHAR(BZ_X_ORIGPTR_3, uc);
259 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
260
261 if (s->origPtr < 0)
262 RETURN(BZ_DATA_ERROR);
263 if (s->origPtr > 10 + 100000*s->blockSize100k)
264 RETURN(BZ_DATA_ERROR);
265
266 /*--- Receive the mapping table ---*/
267 for (i = 0; i < 16; i++) {
268 GET_BIT(BZ_X_MAPPING_1, uc);
269 if (uc == 1)
270 s->inUse16[i] = True; else
271 s->inUse16[i] = False;
272 }
273
274 for (i = 0; i < 256; i++) s->inUse[i] = False;
275
276 for (i = 0; i < 16; i++)
277 if (s->inUse16[i])
278 for (j = 0; j < 16; j++) {
279 GET_BIT(BZ_X_MAPPING_2, uc);
280 if (uc == 1) s->inUse[i * 16 + j] = True;
281 }
282 makeMaps_d ( s );
283 if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
284 alphaSize = s->nInUse+2;
285
286 /*--- Now the selectors ---*/
287 GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
288 if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
289 GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
290 if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
291 for (i = 0; i < nSelectors; i++) {
292 j = 0;
293 while (True) {
294 GET_BIT(BZ_X_SELECTOR_3, uc);
295 if (uc == 0) break;
296 j++;
297 if (j >= nGroups) RETURN(BZ_DATA_ERROR);
298 }
299 s->selectorMtf[i] = j;
300 }
301
302 /*--- Undo the MTF values for the selectors. ---*/
303 {
304 UChar pos[BZ_N_GROUPS], tmp, v;
305 for (v = 0; v < nGroups; v++) pos[v] = v;
306
307 for (i = 0; i < nSelectors; i++) {
308 v = s->selectorMtf[i];
309 tmp = pos[v];
310 while (v > 0) { pos[v] = pos[v-1]; v--; }
311 pos[0] = tmp;
312 s->selector[i] = tmp;
313 }
314 }
315
316 /*--- Now the coding tables ---*/
317 for (t = 0; t < nGroups; t++) {
318 GET_BITS(BZ_X_CODING_1, curr, 5);
319 for (i = 0; i < alphaSize; i++) {
320 while (True) {
321 if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
322 GET_BIT(BZ_X_CODING_2, uc);
323 if (uc == 0) break;
324 GET_BIT(BZ_X_CODING_3, uc);
325 if (uc == 0) curr++; else curr--;
326 }
327 s->len[t][i] = curr;
328 }
329 }
330
331 /*--- Create the Huffman decoding tables ---*/
332 for (t = 0; t < nGroups; t++) {
333 minLen = 32;
334 maxLen = 0;
335 for (i = 0; i < alphaSize; i++) {
336 if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
337 if (s->len[t][i] < minLen) minLen = s->len[t][i];
338 }
339 BZ2_hbCreateDecodeTables (
340 &(s->limit[t][0]),
341 &(s->base[t][0]),
342 &(s->perm[t][0]),
343 &(s->len[t][0]),
344 minLen, maxLen, alphaSize
345 );
346 s->minLens[t] = minLen;
347 }
348
349 /*--- Now the MTF values ---*/
350
351 EOB = s->nInUse+1;
352 nblockMAX = 100000 * s->blockSize100k;
353 groupNo = -1;
354 groupPos = 0;
355
356 for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
357
358 /*-- MTF init --*/
359 {
360 Int32 ii, jj, kk;
361 kk = MTFA_SIZE-1;
362 for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
363 for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
364 s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
365 kk--;
366 }
367 s->mtfbase[ii] = kk + 1;
368 }
369 }
370 /*-- end MTF init --*/
371
372 nblock = 0;
373 GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
374
375 while (True) {
376
377 if (nextSym == EOB) break;
378
379 if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
380
381 es = -1;
382 N = 1;
383 do {
384 if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
385 if (nextSym == BZ_RUNB) es = es + (1+1) * N;
386 N = N * 2;
387 GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
388 }
389 while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
390
391 es++;
392 uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
393 s->unzftab[uc] += es;
394
395 if (s->smallDecompress)
396 while (es > 0) {
397 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
398 s->ll16[nblock] = (UInt16)uc;
399 nblock++;
400 es--;
401 }
402 else
403 while (es > 0) {
404 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
405 s->tt[nblock] = (UInt32)uc;
406 nblock++;
407 es--;
408 };
409
410 continue;
411
412 } else {
413
414 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
415
416 /*-- uc = MTF ( nextSym-1 ) --*/
417 {
418 Int32 ii, jj, kk, pp, lno, off;
419 UInt32 nn;
420 nn = (UInt32)(nextSym - 1);
421
422 if (nn < MTFL_SIZE) {
423 /* avoid general-case expense */
424 pp = s->mtfbase[0];
425 uc = s->mtfa[pp+nn];
426 while (nn > 3) {
427 Int32 z = pp+nn;
428 s->mtfa[(z) ] = s->mtfa[(z)-1];
429 s->mtfa[(z)-1] = s->mtfa[(z)-2];
430 s->mtfa[(z)-2] = s->mtfa[(z)-3];
431 s->mtfa[(z)-3] = s->mtfa[(z)-4];
432 nn -= 4;
433 }
434 while (nn > 0) {
435 s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
436 };
437 s->mtfa[pp] = uc;
438 } else {
439 /* general case */
440 lno = nn / MTFL_SIZE;
441 off = nn % MTFL_SIZE;
442 pp = s->mtfbase[lno] + off;
443 uc = s->mtfa[pp];
444 while (pp > s->mtfbase[lno]) {
445 s->mtfa[pp] = s->mtfa[pp-1]; pp--;
446 };
447 s->mtfbase[lno]++;
448 while (lno > 0) {
449 s->mtfbase[lno]--;
450 s->mtfa[s->mtfbase[lno]]
451 = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
452 lno--;
453 }
454 s->mtfbase[0]--;
455 s->mtfa[s->mtfbase[0]] = uc;
456 if (s->mtfbase[0] == 0) {
457 kk = MTFA_SIZE-1;
458 for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
459 for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
460 s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
461 kk--;
462 }
463 s->mtfbase[ii] = kk + 1;
464 }
465 }
466 }
467 }
468 /*-- end uc = MTF ( nextSym-1 ) --*/
469
470 s->unzftab[s->seqToUnseq[uc]]++;
471 if (s->smallDecompress)
472 s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
473 s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]);
474 nblock++;
475
476 GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
477 continue;
478 }
479 }
480
481 /* Now we know what nblock is, we can do a better sanity
482 check on s->origPtr.
483 */
484 if (s->origPtr < 0 || s->origPtr >= nblock)
485 RETURN(BZ_DATA_ERROR);
486
487 /*-- Set up cftab to facilitate generation of T^(-1) --*/
488 s->cftab[0] = 0;
489 for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
490 for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
491 for (i = 0; i <= 256; i++) {
492 if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
493 /* s->cftab[i] can legitimately be == nblock */
494 RETURN(BZ_DATA_ERROR);
495 }
496 }
497
498 s->state_out_len = 0;
499 s->state_out_ch = 0;
500 BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
501 s->state = BZ_X_OUTPUT;
502 if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
503
504 if (s->smallDecompress) {
505
506 /*-- Make a copy of cftab, used in generation of T --*/
507 for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
508
509 /*-- compute the T vector --*/
510 for (i = 0; i < nblock; i++) {
511 uc = (UChar)(s->ll16[i]);
512 SET_LL(i, s->cftabCopy[uc]);
513 s->cftabCopy[uc]++;
514 }
515
516 /*-- Compute T^(-1) by pointer reversal on T --*/
517 i = s->origPtr;
518 j = GET_LL(i);
519 do {
520 Int32 tmp = GET_LL(j);
521 SET_LL(j, i);
522 i = j;
523 j = tmp;
524 }
525 while (i != s->origPtr);
526
527 s->tPos = s->origPtr;
528 s->nblock_used = 0;
529 if (s->blockRandomised) {
530 BZ_RAND_INIT_MASK;
531 BZ_GET_SMALL(s->k0); s->nblock_used++;
532 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
533 } else {
534 BZ_GET_SMALL(s->k0); s->nblock_used++;
535 }
536
537 } else {
538
539 /*-- compute the T^(-1) vector --*/
540 for (i = 0; i < nblock; i++) {
541 uc = (UChar)(s->tt[i] & 0xff);
542 s->tt[s->cftab[uc]] |= (i << 8);
543 s->cftab[uc]++;
544 }
545
546 s->tPos = s->tt[s->origPtr] >> 8;
547 s->nblock_used = 0;
548 if (s->blockRandomised) {
549 BZ_RAND_INIT_MASK;
550 BZ_GET_FAST(s->k0); s->nblock_used++;
551 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
552 } else {
553 BZ_GET_FAST(s->k0); s->nblock_used++;
554 }
555
556 }
557
558 RETURN(BZ_OK);
559
560
561
562 endhdr_2:
563
564 GET_UCHAR(BZ_X_ENDHDR_2, uc);
565 if (uc != 0x72) RETURN(BZ_DATA_ERROR);
566 GET_UCHAR(BZ_X_ENDHDR_3, uc);
567 if (uc != 0x45) RETURN(BZ_DATA_ERROR);
568 GET_UCHAR(BZ_X_ENDHDR_4, uc);
569 if (uc != 0x38) RETURN(BZ_DATA_ERROR);
570 GET_UCHAR(BZ_X_ENDHDR_5, uc);
571 if (uc != 0x50) RETURN(BZ_DATA_ERROR);
572 GET_UCHAR(BZ_X_ENDHDR_6, uc);
573 if (uc != 0x90) RETURN(BZ_DATA_ERROR);
574
575 s->storedCombinedCRC = 0;
576 GET_UCHAR(BZ_X_CCRC_1, uc);
577 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
578 GET_UCHAR(BZ_X_CCRC_2, uc);
579 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
580 GET_UCHAR(BZ_X_CCRC_3, uc);
581 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
582 GET_UCHAR(BZ_X_CCRC_4, uc);
583 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
584
585 s->state = BZ_X_IDLE;
586 RETURN(BZ_STREAM_END);
587
588 default: AssertH ( False, 4001 );
589 }
590
591 AssertH ( False, 4002 );
592
593 save_state_and_return:
594
595 s->save_i = i;
596 s->save_j = j;
597 s->save_t = t;
598 s->save_alphaSize = alphaSize;
599 s->save_nGroups = nGroups;
600 s->save_nSelectors = nSelectors;
601 s->save_EOB = EOB;
602 s->save_groupNo = groupNo;
603 s->save_groupPos = groupPos;
604 s->save_nextSym = nextSym;
605 s->save_nblockMAX = nblockMAX;
606 s->save_nblock = nblock;
607 s->save_es = es;
608 s->save_N = N;
609 s->save_curr = curr;
610 s->save_zt = zt;
611 s->save_zn = zn;
612 s->save_zvec = zvec;
613 s->save_zj = zj;
614 s->save_gSel = gSel;
615 s->save_gMinlen = gMinlen;
616 s->save_gLimit = gLimit;
617 s->save_gBase = gBase;
618 s->save_gPerm = gPerm;
619
620 return retVal;
621 }
622
623
624 /*-------------------------------------------------------------*/
625 /*--- end decompress.c ---*/
626 /*-------------------------------------------------------------*/
627