1 /*
2 * Simple XZ decoder command line tool
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 * Modified for toybox by Isaac Dunham
9 USE_XZCAT(NEWTOY(xzcat, NULL, TOYFLAG_USR|TOYFLAG_BIN))
10
11 config XZCAT
12 bool "xzcat"
13 default n
14 help
15 usage: xzcat [filename...]
16
17 Decompress listed files to stdout. Use stdin if no files listed.
18
19 */
20 #define FOR_xzcat
21 #include "toys.h"
22
23 // BEGIN xz.h
24
25 /**
26 * enum xz_ret - Return codes
27 * @XZ_OK: Everything is OK so far. More input or more
28 * output space is required to continue.
29 * @XZ_STREAM_END: Operation finished successfully.
30 * @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding
31 * is still possible in multi-call mode by simply
32 * calling xz_dec_run() again.
33 * Note that this return value is used only if
34 * XZ_DEC_ANY_CHECK was defined at build time,
35 * which is not used in the kernel. Unsupported
36 * check types return XZ_OPTIONS_ERROR if
37 * XZ_DEC_ANY_CHECK was not defined at build time.
38 * @XZ_MEM_ERROR: Allocating memory failed. The amount of memory
39 * that was tried to be allocated was no more than the
40 * dict_max argument given to xz_dec_init().
41 * @XZ_MEMLIMIT_ERROR: A bigger LZMA2 dictionary would be needed than
42 * allowed by the dict_max argument given to
43 * xz_dec_init().
44 * @XZ_FORMAT_ERROR: File format was not recognized (wrong magic
45 * bytes).
46 * @XZ_OPTIONS_ERROR: This implementation doesn't support the requested
47 * compression options. In the decoder this means
48 * that the header CRC32 matches, but the header
49 * itself specifies something that we don't support.
50 * @XZ_DATA_ERROR: Compressed data is corrupt.
51 * @XZ_BUF_ERROR: Cannot make any progress. Details are slightly
52 * different between multi-call and single-call
53 * mode; more information below.
54 *
55 * XZ_BUF_ERROR is returned when two consecutive calls to XZ code cannot
56 * consume any input and cannot produce any new output. This happens when
57 * there is no new input available, or the output buffer is full while at
58 * least one output byte is still pending. Assuming your code is not buggy,
59 * you can get this error only when decoding a compressed stream that is
60 * truncated or otherwise corrupt.
61 */
62 enum xz_ret {
63 XZ_OK,
64 XZ_STREAM_END,
65 XZ_UNSUPPORTED_CHECK,
66 XZ_MEM_ERROR,
67 XZ_MEMLIMIT_ERROR,
68 XZ_FORMAT_ERROR,
69 XZ_OPTIONS_ERROR,
70 XZ_DATA_ERROR,
71 XZ_BUF_ERROR
72 };
73
74 /**
75 * struct xz_buf - Passing input and output buffers to XZ code
76 * @in: Beginning of the input buffer. This may be NULL if and only
77 * if in_pos is equal to in_size.
78 * @in_pos: Current position in the input buffer. This must not exceed
79 * in_size.
80 * @in_size: Size of the input buffer
81 * @out: Beginning of the output buffer. This may be NULL if and only
82 * if out_pos is equal to out_size.
83 * @out_pos: Current position in the output buffer. This must not exceed
84 * out_size.
85 * @out_size: Size of the output buffer
86 *
87 * Only the contents of the output buffer from out[out_pos] onward, and
88 * the variables in_pos and out_pos are modified by the XZ code.
89 */
90 struct xz_buf {
91 const uint8_t *in;
92 size_t in_pos;
93 size_t in_size;
94
95 uint8_t *out;
96 size_t out_pos;
97 size_t out_size;
98 };
99
100 /**
101 * struct xz_dec - Opaque type to hold the XZ decoder state
102 */
103 struct xz_dec;
104
105 /**
106 * xz_dec_init() - Allocate and initialize a XZ decoder state
107 * @mode: Operation mode
108 * @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for
109 * multi-call decoding. LZMA2 dictionary is always 2^n bytes
110 * or 2^n + 2^(n-1) bytes (the latter sizes are less common
111 * in practice), so other values for dict_max don't make sense.
112 * In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
113 * 512 KiB, and 1 MiB are probably the only reasonable values,
114 * except for kernel and initramfs images where a bigger
115 * dictionary can be fine and useful.
116 *
117 * dict_max specifies the maximum allowed dictionary size that xz_dec_run()
118 * may allocate once it has parsed the dictionary size from the stream
119 * headers. This way excessive allocations can be avoided while still
120 * limiting the maximum memory usage to a sane value to prevent running the
121 * system out of memory when decompressing streams from untrusted sources.
122 *
123 * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
124 * ready to be used with xz_dec_run(). If memory allocation fails,
125 * xz_dec_init() returns NULL.
126 */
127 struct xz_dec *xz_dec_init(uint32_t dict_max);
128
129 /**
130 * xz_dec_run() - Run the XZ decoder
131 * @s: Decoder state allocated using xz_dec_init()
132 * @b: Input and output buffers
133 *
134 * The possible return values depend on build options and operation mode.
135 * See enum xz_ret for details.
136 *
137 * Note that if an error occurs in single-call mode (return value is not
138 * XZ_STREAM_END), b->in_pos and b->out_pos are not modified and the
139 * contents of the output buffer from b->out[b->out_pos] onward are
140 * undefined. This is true even after XZ_BUF_ERROR, because with some filter
141 * chains, there may be a second pass over the output buffer, and this pass
142 * cannot be properly done if the output buffer is truncated. Thus, you
143 * cannot give the single-call decoder a too small buffer and then expect to
144 * get that amount valid data from the beginning of the stream. You must use
145 * the multi-call decoder if you don't want to uncompress the whole stream.
146 */
147 enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
148
149 /**
150 * xz_dec_reset() - Reset an already allocated decoder state
151 * @s: Decoder state allocated using xz_dec_init()
152 *
153 * This function can be used to reset the multi-call decoder state without
154 * freeing and reallocating memory with xz_dec_end() and xz_dec_init().
155 *
156 * In single-call mode, xz_dec_reset() is always called in the beginning of
157 * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
158 * multi-call mode.
159 */
160 void xz_dec_reset(struct xz_dec *s);
161
162 /**
163 * xz_dec_end() - Free the memory allocated for the decoder state
164 * @s: Decoder state allocated using xz_dec_init(). If s is NULL,
165 * this function does nothing.
166 */
167 void xz_dec_end(struct xz_dec *s);
168
169 /*
170 * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
171 * calculation, the third argument must be zero. To continue the calculation,
172 * the previously returned value is passed as the third argument.
173 */
174 static uint32_t xz_crc32_table[256];
175
xz_crc32(const uint8_t * buf,size_t size,uint32_t crc)176 uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
177 {
178 crc = ~crc;
179
180 while (size != 0) {
181 crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
182 --size;
183 }
184
185 return ~crc;
186 }
187
188 static uint64_t xz_crc64_table[256];
189
190
191 // END xz.h
192
193 static uint8_t in[BUFSIZ];
194 static uint8_t out[BUFSIZ];
195
do_xzcat(int fd,char * name)196 void do_xzcat(int fd, char *name)
197 {
198 struct xz_buf b;
199 struct xz_dec *s;
200 enum xz_ret ret;
201 const char *msg;
202
203 crc_init(xz_crc32_table, 1);
204 const uint64_t poly = 0xC96C5795D7870F42ULL;
205 uint32_t i;
206 uint32_t j;
207 uint64_t r;
208
209 /* initialize CRC64 table*/
210 for (i = 0; i < 256; ++i) {
211 r = i;
212 for (j = 0; j < 8; ++j)
213 r = (r >> 1) ^ (poly & ~((r & 1) - 1));
214
215 xz_crc64_table[i] = r;
216 }
217
218 /*
219 * Support up to 64 MiB dictionary. The actually needed memory
220 * is allocated once the headers have been parsed.
221 */
222 s = xz_dec_init(1 << 26);
223 if (s == NULL) {
224 msg = "Memory allocation failed\n";
225 goto error;
226 }
227
228 b.in = in;
229 b.in_pos = 0;
230 b.in_size = 0;
231 b.out = out;
232 b.out_pos = 0;
233 b.out_size = BUFSIZ;
234
235 for (;;) {
236 if (b.in_pos == b.in_size) {
237 b.in_size = read(fd, in, sizeof(in));
238 b.in_pos = 0;
239 }
240
241 ret = xz_dec_run(s, &b);
242
243 if (b.out_pos == sizeof(out)) {
244 if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
245 msg = "Write error\n";
246 goto error;
247 }
248
249 b.out_pos = 0;
250 }
251
252 if (ret == XZ_OK)
253 continue;
254
255 if (ret == XZ_UNSUPPORTED_CHECK)
256 continue;
257
258 if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
259 msg = "Write error\n";
260 goto error;
261 }
262
263 switch (ret) {
264 case XZ_STREAM_END:
265 xz_dec_end(s);
266 return;
267
268 case XZ_MEM_ERROR:
269 msg = "Memory allocation failed\n";
270 goto error;
271
272 case XZ_MEMLIMIT_ERROR:
273 msg = "Memory usage limit reached\n";
274 goto error;
275
276 case XZ_FORMAT_ERROR:
277 msg = "Not a .xz file\n";
278 goto error;
279
280 case XZ_OPTIONS_ERROR:
281 msg = "Unsupported options in the .xz headers\n";
282 goto error;
283
284 case XZ_DATA_ERROR:
285 case XZ_BUF_ERROR:
286 msg = "File is corrupt\n";
287 goto error;
288
289 default:
290 msg = "Bug!\n";
291 goto error;
292 }
293 }
294
295 error:
296 xz_dec_end(s);
297 error_exit("%s", msg);
298 }
299
xzcat_main(void)300 void xzcat_main(void)
301 {
302 loopfiles(toys.optargs, do_xzcat);
303 }
304
305 // BEGIN xz_private.h
306
307
308 /* Uncomment as needed to enable BCJ filter decoders.
309 * These cost about 2.5 k when all are enabled; SPARC and IA64 make 0.7 k
310 * */
311
312 #define XZ_DEC_X86
313 #define XZ_DEC_POWERPC
314 #define XZ_DEC_IA64
315 #define XZ_DEC_ARM
316 #define XZ_DEC_ARMTHUMB
317 #define XZ_DEC_SPARC
318
319
320 #define memeq(a, b, size) (memcmp(a, b, size) == 0)
321
322 /* Inline functions to access unaligned unsigned 32-bit integers */
323 #ifndef get_unaligned_le32
get_unaligned_le32(const uint8_t * buf)324 static inline uint32_t get_unaligned_le32(const uint8_t *buf)
325 {
326 return (uint32_t)buf[0]
327 | ((uint32_t)buf[1] << 8)
328 | ((uint32_t)buf[2] << 16)
329 | ((uint32_t)buf[3] << 24);
330 }
331 #endif
332
333 #ifndef get_unaligned_be32
get_unaligned_be32(const uint8_t * buf)334 static inline uint32_t get_unaligned_be32(const uint8_t *buf)
335 {
336 return (uint32_t)(buf[0] << 24)
337 | ((uint32_t)buf[1] << 16)
338 | ((uint32_t)buf[2] << 8)
339 | (uint32_t)buf[3];
340 }
341 #endif
342
343 #ifndef put_unaligned_le32
put_unaligned_le32(uint32_t val,uint8_t * buf)344 static inline void put_unaligned_le32(uint32_t val, uint8_t *buf)
345 {
346 buf[0] = (uint8_t)val;
347 buf[1] = (uint8_t)(val >> 8);
348 buf[2] = (uint8_t)(val >> 16);
349 buf[3] = (uint8_t)(val >> 24);
350 }
351 #endif
352
353 #ifndef put_unaligned_be32
put_unaligned_be32(uint32_t val,uint8_t * buf)354 static inline void put_unaligned_be32(uint32_t val, uint8_t *buf)
355 {
356 buf[0] = (uint8_t)(val >> 24);
357 buf[1] = (uint8_t)(val >> 16);
358 buf[2] = (uint8_t)(val >> 8);
359 buf[3] = (uint8_t)val;
360 }
361 #endif
362
363 /*
364 * Use get_unaligned_le32() also for aligned access for simplicity. On
365 * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
366 * could save a few bytes in code size.
367 */
368 #ifndef get_le32
369 # define get_le32 get_unaligned_le32
370 #endif
371
372 /*
373 * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
374 * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
375 */
376 #ifndef XZ_DEC_BCJ
377 # if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
378 || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
379 || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
380 || defined(XZ_DEC_SPARC)
381 # define XZ_DEC_BCJ
382 # endif
383 #endif
384
385 /*
386 * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
387 * before calling xz_dec_lzma2_run().
388 */
389 struct xz_dec_lzma2 *xz_dec_lzma2_create(uint32_t dict_max);
390
391 /*
392 * Decode the LZMA2 properties (one byte) and reset the decoder. Return
393 * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
394 * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
395 * decoder doesn't support.
396 */
397 enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
398 uint8_t props);
399
400 /* Decode raw LZMA2 stream from b->in to b->out. */
401 enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
402 struct xz_buf *b);
403
404 // END "xz_private.h"
405
406
407
408
409 /*
410 * Branch/Call/Jump (BCJ) filter decoders
411 * The rest of the code is inside this ifdef. It makes things a little more
412 * convenient when building without support for any BCJ filters.
413 */
414 #ifdef XZ_DEC_BCJ
415
416 struct xz_dec_bcj {
417 /* Type of the BCJ filter being used */
418 enum {
419 BCJ_X86 = 4, /* x86 or x86-64 */
420 BCJ_POWERPC = 5, /* Big endian only */
421 BCJ_IA64 = 6, /* Big or little endian */
422 BCJ_ARM = 7, /* Little endian only */
423 BCJ_ARMTHUMB = 8, /* Little endian only */
424 BCJ_SPARC = 9 /* Big or little endian */
425 } type;
426
427 /*
428 * Return value of the next filter in the chain. We need to preserve
429 * this information across calls, because we must not call the next
430 * filter anymore once it has returned XZ_STREAM_END.
431 */
432 enum xz_ret ret;
433
434 /*
435 * Absolute position relative to the beginning of the uncompressed
436 * data (in a single .xz Block). We care only about the lowest 32
437 * bits so this doesn't need to be uint64_t even with big files.
438 */
439 uint32_t pos;
440
441 /* x86 filter state */
442 uint32_t x86_prev_mask;
443
444 /* Temporary space to hold the variables from struct xz_buf */
445 uint8_t *out;
446 size_t out_pos;
447 size_t out_size;
448
449 struct {
450 /* Amount of already filtered data in the beginning of buf */
451 size_t filtered;
452
453 /* Total amount of data currently stored in buf */
454 size_t size;
455
456 /*
457 * Buffer to hold a mix of filtered and unfiltered data. This
458 * needs to be big enough to hold Alignment + 2 * Look-ahead:
459 *
460 * Type Alignment Look-ahead
461 * x86 1 4
462 * PowerPC 4 0
463 * IA-64 16 0
464 * ARM 4 0
465 * ARM-Thumb 2 2
466 * SPARC 4 0
467 */
468 uint8_t buf[16];
469 } temp;
470 };
471
472 /*
473 * Decode the Filter ID of a BCJ filter. This implementation doesn't
474 * support custom start offsets, so no decoding of Filter Properties
475 * is needed. Returns XZ_OK if the given Filter ID is supported.
476 * Otherwise XZ_OPTIONS_ERROR is returned.
477 */
478 enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
479
480 /*
481 * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
482 * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
483 * must be called directly.
484 */
485 enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
486 struct xz_dec_lzma2 *lzma2,
487 struct xz_buf *b);
488
489 #ifdef XZ_DEC_X86
490 /*
491 * This is used to test the most significant byte of a memory address
492 * in an x86 instruction.
493 */
bcj_x86_test_msbyte(uint8_t b)494 static inline int bcj_x86_test_msbyte(uint8_t b)
495 {
496 return b == 0x00 || b == 0xFF;
497 }
498
bcj_x86(struct xz_dec_bcj * s,uint8_t * buf,size_t size)499 static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
500 {
501 static const int mask_to_allowed_status[8]
502 = { 1,1,1,0,1,0,0,0 };
503
504 static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
505
506 size_t i;
507 size_t prev_pos = (size_t)-1;
508 uint32_t prev_mask = s->x86_prev_mask;
509 uint32_t src;
510 uint32_t dest;
511 uint32_t j;
512 uint8_t b;
513
514 if (size <= 4)
515 return 0;
516
517 size -= 4;
518 for (i = 0; i < size; ++i) {
519 if ((buf[i] & 0xFE) != 0xE8)
520 continue;
521
522 prev_pos = i - prev_pos;
523 if (prev_pos > 3) {
524 prev_mask = 0;
525 } else {
526 prev_mask = (prev_mask << (prev_pos - 1)) & 7;
527 if (prev_mask != 0) {
528 b = buf[i + 4 - mask_to_bit_num[prev_mask]];
529 if (!mask_to_allowed_status[prev_mask]
530 || bcj_x86_test_msbyte(b)) {
531 prev_pos = i;
532 prev_mask = (prev_mask << 1) | 1;
533 continue;
534 }
535 }
536 }
537
538 prev_pos = i;
539
540 if (bcj_x86_test_msbyte(buf[i + 4])) {
541 src = get_unaligned_le32(buf + i + 1);
542 for (;;) {
543 dest = src - (s->pos + (uint32_t)i + 5);
544 if (prev_mask == 0)
545 break;
546
547 j = mask_to_bit_num[prev_mask] * 8;
548 b = (uint8_t)(dest >> (24 - j));
549 if (!bcj_x86_test_msbyte(b))
550 break;
551
552 src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
553 }
554
555 dest &= 0x01FFFFFF;
556 dest |= (uint32_t)0 - (dest & 0x01000000);
557 put_unaligned_le32(dest, buf + i + 1);
558 i += 4;
559 } else {
560 prev_mask = (prev_mask << 1) | 1;
561 }
562 }
563
564 prev_pos = i - prev_pos;
565 s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
566 return i;
567 }
568 #endif
569
570 #ifdef XZ_DEC_POWERPC
bcj_powerpc(struct xz_dec_bcj * s,uint8_t * buf,size_t size)571 static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
572 {
573 size_t i;
574 uint32_t instr;
575
576 for (i = 0; i + 4 <= size; i += 4) {
577 instr = get_unaligned_be32(buf + i);
578 if ((instr & 0xFC000003) == 0x48000001) {
579 instr &= 0x03FFFFFC;
580 instr -= s->pos + (uint32_t)i;
581 instr &= 0x03FFFFFC;
582 instr |= 0x48000001;
583 put_unaligned_be32(instr, buf + i);
584 }
585 }
586
587 return i;
588 }
589 #endif
590
591 #ifdef XZ_DEC_IA64
bcj_ia64(struct xz_dec_bcj * s,uint8_t * buf,size_t size)592 static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
593 {
594 static const uint8_t branch_table[32] = {
595 0, 0, 0, 0, 0, 0, 0, 0,
596 0, 0, 0, 0, 0, 0, 0, 0,
597 4, 4, 6, 6, 0, 0, 7, 7,
598 4, 4, 0, 0, 4, 4, 0, 0
599 };
600
601 /*
602 * The local variables take a little bit stack space, but it's less
603 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
604 * stack usage here without doing that for the LZMA2 decoder too.
605 */
606
607 /* Loop counters */
608 size_t i;
609 size_t j;
610
611 /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
612 uint32_t slot;
613
614 /* Bitwise offset of the instruction indicated by slot */
615 uint32_t bit_pos;
616
617 /* bit_pos split into byte and bit parts */
618 uint32_t byte_pos;
619 uint32_t bit_res;
620
621 /* Address part of an instruction */
622 uint32_t addr;
623
624 /* Mask used to detect which instructions to convert */
625 uint32_t mask;
626
627 /* 41-bit instruction stored somewhere in the lowest 48 bits */
628 uint64_t instr;
629
630 /* Instruction normalized with bit_res for easier manipulation */
631 uint64_t norm;
632
633 for (i = 0; i + 16 <= size; i += 16) {
634 mask = branch_table[buf[i] & 0x1F];
635 for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
636 if (((mask >> slot) & 1) == 0)
637 continue;
638
639 byte_pos = bit_pos >> 3;
640 bit_res = bit_pos & 7;
641 instr = 0;
642 for (j = 0; j < 6; ++j)
643 instr |= (uint64_t)(buf[i + j + byte_pos])
644 << (8 * j);
645
646 norm = instr >> bit_res;
647
648 if (((norm >> 37) & 0x0F) == 0x05
649 && ((norm >> 9) & 0x07) == 0) {
650 addr = (norm >> 13) & 0x0FFFFF;
651 addr |= ((uint32_t)(norm >> 36) & 1) << 20;
652 addr <<= 4;
653 addr -= s->pos + (uint32_t)i;
654 addr >>= 4;
655
656 norm &= ~((uint64_t)0x8FFFFF << 13);
657 norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
658 norm |= (uint64_t)(addr & 0x100000)
659 << (36 - 20);
660
661 instr &= (1 << bit_res) - 1;
662 instr |= norm << bit_res;
663
664 for (j = 0; j < 6; j++)
665 buf[i + j + byte_pos]
666 = (uint8_t)(instr >> (8 * j));
667 }
668 }
669 }
670
671 return i;
672 }
673 #endif
674
675 #ifdef XZ_DEC_ARM
bcj_arm(struct xz_dec_bcj * s,uint8_t * buf,size_t size)676 static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
677 {
678 size_t i;
679 uint32_t addr;
680
681 for (i = 0; i + 4 <= size; i += 4) {
682 if (buf[i + 3] == 0xEB) {
683 addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
684 | ((uint32_t)buf[i + 2] << 16);
685 addr <<= 2;
686 addr -= s->pos + (uint32_t)i + 8;
687 addr >>= 2;
688 buf[i] = (uint8_t)addr;
689 buf[i + 1] = (uint8_t)(addr >> 8);
690 buf[i + 2] = (uint8_t)(addr >> 16);
691 }
692 }
693
694 return i;
695 }
696 #endif
697
698 #ifdef XZ_DEC_ARMTHUMB
bcj_armthumb(struct xz_dec_bcj * s,uint8_t * buf,size_t size)699 static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
700 {
701 size_t i;
702 uint32_t addr;
703
704 for (i = 0; i + 4 <= size; i += 2) {
705 if ((buf[i + 1] & 0xF8) == 0xF0
706 && (buf[i + 3] & 0xF8) == 0xF8) {
707 addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
708 | ((uint32_t)buf[i] << 11)
709 | (((uint32_t)buf[i + 3] & 0x07) << 8)
710 | (uint32_t)buf[i + 2];
711 addr <<= 1;
712 addr -= s->pos + (uint32_t)i + 4;
713 addr >>= 1;
714 buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
715 buf[i] = (uint8_t)(addr >> 11);
716 buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
717 buf[i + 2] = (uint8_t)addr;
718 i += 2;
719 }
720 }
721
722 return i;
723 }
724 #endif
725
726 #ifdef XZ_DEC_SPARC
bcj_sparc(struct xz_dec_bcj * s,uint8_t * buf,size_t size)727 static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
728 {
729 size_t i;
730 uint32_t instr;
731
732 for (i = 0; i + 4 <= size; i += 4) {
733 instr = get_unaligned_be32(buf + i);
734 if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
735 instr <<= 2;
736 instr -= s->pos + (uint32_t)i;
737 instr >>= 2;
738 instr = ((uint32_t)0x40000000 - (instr & 0x400000))
739 | 0x40000000 | (instr & 0x3FFFFF);
740 put_unaligned_be32(instr, buf + i);
741 }
742 }
743
744 return i;
745 }
746 #endif
747
748 /*
749 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
750 * of data that got filtered.
751 *
752 * NOTE: This is implemented as a switch statement to avoid using function
753 * pointers, which could be problematic in the kernel boot code, which must
754 * avoid pointers to static data (at least on x86).
755 */
bcj_apply(struct xz_dec_bcj * s,uint8_t * buf,size_t * pos,size_t size)756 static void bcj_apply(struct xz_dec_bcj *s,
757 uint8_t *buf, size_t *pos, size_t size)
758 {
759 size_t filtered;
760
761 buf += *pos;
762 size -= *pos;
763
764 switch (s->type) {
765 #ifdef XZ_DEC_X86
766 case BCJ_X86:
767 filtered = bcj_x86(s, buf, size);
768 break;
769 #endif
770 #ifdef XZ_DEC_POWERPC
771 case BCJ_POWERPC:
772 filtered = bcj_powerpc(s, buf, size);
773 break;
774 #endif
775 #ifdef XZ_DEC_IA64
776 case BCJ_IA64:
777 filtered = bcj_ia64(s, buf, size);
778 break;
779 #endif
780 #ifdef XZ_DEC_ARM
781 case BCJ_ARM:
782 filtered = bcj_arm(s, buf, size);
783 break;
784 #endif
785 #ifdef XZ_DEC_ARMTHUMB
786 case BCJ_ARMTHUMB:
787 filtered = bcj_armthumb(s, buf, size);
788 break;
789 #endif
790 #ifdef XZ_DEC_SPARC
791 case BCJ_SPARC:
792 filtered = bcj_sparc(s, buf, size);
793 break;
794 #endif
795 default:
796 /* Never reached but silence compiler warnings. */
797 filtered = 0;
798 break;
799 }
800
801 *pos += filtered;
802 s->pos += filtered;
803 }
804
805 /*
806 * Flush pending filtered data from temp to the output buffer.
807 * Move the remaining mixture of possibly filtered and unfiltered
808 * data to the beginning of temp.
809 */
bcj_flush(struct xz_dec_bcj * s,struct xz_buf * b)810 static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
811 {
812 size_t copy_size;
813
814 copy_size = minof(s->temp.filtered, b->out_size - b->out_pos);
815 memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
816 b->out_pos += copy_size;
817
818 s->temp.filtered -= copy_size;
819 s->temp.size -= copy_size;
820 memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
821 }
822
823 /*
824 * The BCJ filter functions are primitive in sense that they process the
825 * data in chunks of 1-16 bytes. To hide this issue, this function does
826 * some buffering.
827 */
xz_dec_bcj_run(struct xz_dec_bcj * s,struct xz_dec_lzma2 * lzma2,struct xz_buf * b)828 enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
829 struct xz_dec_lzma2 *lzma2,
830 struct xz_buf *b)
831 {
832 size_t out_start;
833
834 /*
835 * Flush pending already filtered data to the output buffer. Return
836 * immediatelly if we couldn't flush everything, or if the next
837 * filter in the chain had already returned XZ_STREAM_END.
838 */
839 if (s->temp.filtered > 0) {
840 bcj_flush(s, b);
841 if (s->temp.filtered > 0)
842 return XZ_OK;
843
844 if (s->ret == XZ_STREAM_END)
845 return XZ_STREAM_END;
846 }
847
848 /*
849 * If we have more output space than what is currently pending in
850 * temp, copy the unfiltered data from temp to the output buffer
851 * and try to fill the output buffer by decoding more data from the
852 * next filter in the chain. Apply the BCJ filter on the new data
853 * in the output buffer. If everything cannot be filtered, copy it
854 * to temp and rewind the output buffer position accordingly.
855 *
856 * This needs to be always run when temp.size == 0 to handle a special
857 * case where the output buffer is full and the next filter has no
858 * more output coming but hasn't returned XZ_STREAM_END yet.
859 */
860 if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
861 out_start = b->out_pos;
862 memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
863 b->out_pos += s->temp.size;
864
865 s->ret = xz_dec_lzma2_run(lzma2, b);
866 if (s->ret != XZ_STREAM_END
867 && (s->ret != XZ_OK ))
868 return s->ret;
869
870 bcj_apply(s, b->out, &out_start, b->out_pos);
871
872 /*
873 * As an exception, if the next filter returned XZ_STREAM_END,
874 * we can do that too, since the last few bytes that remain
875 * unfiltered are meant to remain unfiltered.
876 */
877 if (s->ret == XZ_STREAM_END)
878 return XZ_STREAM_END;
879
880 s->temp.size = b->out_pos - out_start;
881 b->out_pos -= s->temp.size;
882 memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
883
884 /*
885 * If there wasn't enough input to the next filter to fill
886 * the output buffer with unfiltered data, there's no point
887 * to try decoding more data to temp.
888 */
889 if (b->out_pos + s->temp.size < b->out_size)
890 return XZ_OK;
891 }
892
893 /*
894 * We have unfiltered data in temp. If the output buffer isn't full
895 * yet, try to fill the temp buffer by decoding more data from the
896 * next filter. Apply the BCJ filter on temp. Then we hopefully can
897 * fill the actual output buffer by copying filtered data from temp.
898 * A mix of filtered and unfiltered data may be left in temp; it will
899 * be taken care on the next call to this function.
900 */
901 if (b->out_pos < b->out_size) {
902 /* Make b->out{,_pos,_size} temporarily point to s->temp. */
903 s->out = b->out;
904 s->out_pos = b->out_pos;
905 s->out_size = b->out_size;
906 b->out = s->temp.buf;
907 b->out_pos = s->temp.size;
908 b->out_size = sizeof(s->temp.buf);
909
910 s->ret = xz_dec_lzma2_run(lzma2, b);
911
912 s->temp.size = b->out_pos;
913 b->out = s->out;
914 b->out_pos = s->out_pos;
915 b->out_size = s->out_size;
916
917 if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
918 return s->ret;
919
920 bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
921
922 /*
923 * If the next filter returned XZ_STREAM_END, we mark that
924 * everything is filtered, since the last unfiltered bytes
925 * of the stream are meant to be left as is.
926 */
927 if (s->ret == XZ_STREAM_END)
928 s->temp.filtered = s->temp.size;
929
930 bcj_flush(s, b);
931 if (s->temp.filtered > 0)
932 return XZ_OK;
933 }
934
935 return s->ret;
936 }
937
xz_dec_bcj_reset(struct xz_dec_bcj * s,uint8_t id)938 enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
939 {
940 switch (id) {
941 #ifdef XZ_DEC_X86
942 case BCJ_X86:
943 #endif
944 #ifdef XZ_DEC_POWERPC
945 case BCJ_POWERPC:
946 #endif
947 #ifdef XZ_DEC_IA64
948 case BCJ_IA64:
949 #endif
950 #ifdef XZ_DEC_ARM
951 case BCJ_ARM:
952 #endif
953 #ifdef XZ_DEC_ARMTHUMB
954 case BCJ_ARMTHUMB:
955 #endif
956 #ifdef XZ_DEC_SPARC
957 case BCJ_SPARC:
958 #endif
959 break;
960
961 default:
962 /* Unsupported Filter ID */
963 return XZ_OPTIONS_ERROR;
964 }
965
966 s->type = id;
967 s->ret = XZ_OK;
968 s->pos = 0;
969 s->x86_prev_mask = 0;
970 s->temp.filtered = 0;
971 s->temp.size = 0;
972
973 return XZ_OK;
974 }
975
976 #endif
977 /*
978 * LZMA2 decoder
979 */
980
981
982 // BEGIN xz_lzma2.h
983 /*
984 * LZMA2 definitions
985 *
986 */
987
988
989 /* Range coder constants */
990 #define RC_SHIFT_BITS 8
991 #define RC_TOP_BITS 24
992 #define RC_TOP_VALUE (1 << RC_TOP_BITS)
993 #define RC_BIT_MODEL_TOTAL_BITS 11
994 #define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
995 #define RC_MOVE_BITS 5
996
997 /*
998 * Maximum number of position states. A position state is the lowest pb
999 * number of bits of the current uncompressed offset. In some places there
1000 * are different sets of probabilities for different position states.
1001 */
1002 #define POS_STATES_MAX (1 << 4)
1003
1004 /*
1005 * This enum is used to track which LZMA symbols have occurred most recently
1006 * and in which order. This information is used to predict the next symbol.
1007 *
1008 * Symbols:
1009 * - Literal: One 8-bit byte
1010 * - Match: Repeat a chunk of data at some distance
1011 * - Long repeat: Multi-byte match at a recently seen distance
1012 * - Short repeat: One-byte repeat at a recently seen distance
1013 *
1014 * The symbol names are in from STATE_oldest_older_previous. REP means
1015 * either short or long repeated match, and NONLIT means any non-literal.
1016 */
1017 enum lzma_state {
1018 STATE_LIT_LIT,
1019 STATE_MATCH_LIT_LIT,
1020 STATE_REP_LIT_LIT,
1021 STATE_SHORTREP_LIT_LIT,
1022 STATE_MATCH_LIT,
1023 STATE_REP_LIT,
1024 STATE_SHORTREP_LIT,
1025 STATE_LIT_MATCH,
1026 STATE_LIT_LONGREP,
1027 STATE_LIT_SHORTREP,
1028 STATE_NONLIT_MATCH,
1029 STATE_NONLIT_REP
1030 };
1031
1032 /* Total number of states */
1033 #define STATES 12
1034
1035 /* The lowest 7 states indicate that the previous state was a literal. */
1036 #define LIT_STATES 7
1037
1038 /* Indicate that the latest symbol was a literal. */
lzma_state_literal(enum lzma_state * state)1039 static inline void lzma_state_literal(enum lzma_state *state)
1040 {
1041 if (*state <= STATE_SHORTREP_LIT_LIT)
1042 *state = STATE_LIT_LIT;
1043 else if (*state <= STATE_LIT_SHORTREP)
1044 *state -= 3;
1045 else
1046 *state -= 6;
1047 }
1048
1049 /* Indicate that the latest symbol was a match. */
lzma_state_match(enum lzma_state * state)1050 static inline void lzma_state_match(enum lzma_state *state)
1051 {
1052 *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
1053 }
1054
1055 /* Indicate that the latest state was a long repeated match. */
lzma_state_long_rep(enum lzma_state * state)1056 static inline void lzma_state_long_rep(enum lzma_state *state)
1057 {
1058 *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
1059 }
1060
1061 /* Indicate that the latest symbol was a short match. */
lzma_state_short_rep(enum lzma_state * state)1062 static inline void lzma_state_short_rep(enum lzma_state *state)
1063 {
1064 *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
1065 }
1066
1067 /* Test if the previous symbol was a literal. */
lzma_state_is_literal(enum lzma_state state)1068 static inline int lzma_state_is_literal(enum lzma_state state)
1069 {
1070 return state < LIT_STATES;
1071 }
1072
1073 /* Each literal coder is divided in three sections:
1074 * - 0x001-0x0FF: Without match byte
1075 * - 0x101-0x1FF: With match byte; match bit is 0
1076 * - 0x201-0x2FF: With match byte; match bit is 1
1077 *
1078 * Match byte is used when the previous LZMA symbol was something else than
1079 * a literal (that is, it was some kind of match).
1080 */
1081 #define LITERAL_CODER_SIZE 0x300
1082
1083 /* Maximum number of literal coders */
1084 #define LITERAL_CODERS_MAX (1 << 4)
1085
1086 /* Minimum length of a match is two bytes. */
1087 #define MATCH_LEN_MIN 2
1088
1089 /* Match length is encoded with 4, 5, or 10 bits.
1090 *
1091 * Length Bits
1092 * 2-9 4 = Choice=0 + 3 bits
1093 * 10-17 5 = Choice=1 + Choice2=0 + 3 bits
1094 * 18-273 10 = Choice=1 + Choice2=1 + 8 bits
1095 */
1096 #define LEN_LOW_BITS 3
1097 #define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
1098 #define LEN_MID_BITS 3
1099 #define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
1100 #define LEN_HIGH_BITS 8
1101 #define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
1102 #define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
1103
1104 /*
1105 * Maximum length of a match is 273 which is a result of the encoding
1106 * described above.
1107 */
1108 #define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
1109
1110 /*
1111 * Different sets of probabilities are used for match distances that have
1112 * very short match length: Lengths of 2, 3, and 4 bytes have a separate
1113 * set of probabilities for each length. The matches with longer length
1114 * use a shared set of probabilities.
1115 */
1116 #define DIST_STATES 4
1117
1118 /*
1119 * Get the index of the appropriate probability array for decoding
1120 * the distance slot.
1121 */
lzma_get_dist_state(uint32_t len)1122 static inline uint32_t lzma_get_dist_state(uint32_t len)
1123 {
1124 return len < DIST_STATES + MATCH_LEN_MIN
1125 ? len - MATCH_LEN_MIN : DIST_STATES - 1;
1126 }
1127
1128 /*
1129 * The highest two bits of a 32-bit match distance are encoded using six bits.
1130 * This six-bit value is called a distance slot. This way encoding a 32-bit
1131 * value takes 6-36 bits, larger values taking more bits.
1132 */
1133 #define DIST_SLOT_BITS 6
1134 #define DIST_SLOTS (1 << DIST_SLOT_BITS)
1135
1136 /* Match distances up to 127 are fully encoded using probabilities. Since
1137 * the highest two bits (distance slot) are always encoded using six bits,
1138 * the distances 0-3 don't need any additional bits to encode, since the
1139 * distance slot itself is the same as the actual distance. DIST_MODEL_START
1140 * indicates the first distance slot where at least one additional bit is
1141 * needed.
1142 */
1143 #define DIST_MODEL_START 4
1144
1145 /*
1146 * Match distances greater than 127 are encoded in three pieces:
1147 * - distance slot: the highest two bits
1148 * - direct bits: 2-26 bits below the highest two bits
1149 * - alignment bits: four lowest bits
1150 *
1151 * Direct bits don't use any probabilities.
1152 *
1153 * The distance slot value of 14 is for distances 128-191.
1154 */
1155 #define DIST_MODEL_END 14
1156
1157 /* Distance slots that indicate a distance <= 127. */
1158 #define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
1159 #define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
1160
1161 /*
1162 * For match distances greater than 127, only the highest two bits and the
1163 * lowest four bits (alignment) is encoded using probabilities.
1164 */
1165 #define ALIGN_BITS 4
1166 #define ALIGN_SIZE (1 << ALIGN_BITS)
1167 #define ALIGN_MASK (ALIGN_SIZE - 1)
1168
1169 /* Total number of all probability variables */
1170 #define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
1171
1172 /*
1173 * LZMA remembers the four most recent match distances. Reusing these
1174 * distances tends to take less space than re-encoding the actual
1175 * distance value.
1176 */
1177 #define REPS 4
1178
1179
1180 // END xz_lzma2.h
1181
1182 /*
1183 * Range decoder initialization eats the first five bytes of each LZMA chunk.
1184 */
1185 #define RC_INIT_BYTES 5
1186
1187 /*
1188 * Minimum number of usable input buffer to safely decode one LZMA symbol.
1189 * The worst case is that we decode 22 bits using probabilities and 26
1190 * direct bits. This may decode at maximum of 20 bytes of input. However,
1191 * lzma_main() does an extra normalization before returning, thus we
1192 * need to put 21 here.
1193 */
1194 #define LZMA_IN_REQUIRED 21
1195
1196 /*
1197 * Dictionary (history buffer)
1198 *
1199 * These are always true:
1200 * start <= pos <= full <= end
1201 * pos <= limit <= end
1202 * end == size
1203 * size <= size_max
1204 * allocated <= size
1205 *
1206 * Most of these variables are size_t as a relic of single-call mode,
1207 * in which the dictionary variables address the actual output
1208 * buffer directly.
1209 */
1210 struct dictionary {
1211 /* Beginning of the history buffer */
1212 uint8_t *buf;
1213
1214 /* Old position in buf (before decoding more data) */
1215 size_t start;
1216
1217 /* Position in buf */
1218 size_t pos;
1219
1220 /*
1221 * How full dictionary is. This is used to detect corrupt input that
1222 * would read beyond the beginning of the uncompressed stream.
1223 */
1224 size_t full;
1225
1226 /* Write limit; we don't write to buf[limit] or later bytes. */
1227 size_t limit;
1228
1229 /* End of the dictionary buffer. This is the same as the dictionary size. */
1230 size_t end;
1231
1232 /*
1233 * Size of the dictionary as specified in Block Header. This is used
1234 * together with "full" to detect corrupt input that would make us
1235 * read beyond the beginning of the uncompressed stream.
1236 */
1237 uint32_t size;
1238
1239 /*
1240 * Maximum allowed dictionary size.
1241 */
1242 uint32_t size_max;
1243
1244 /*
1245 * Amount of memory currently allocated for the dictionary.
1246 */
1247 uint32_t allocated;
1248 };
1249
1250 /* Range decoder */
1251 struct rc_dec {
1252 uint32_t range;
1253 uint32_t code;
1254
1255 /*
1256 * Number of initializing bytes remaining to be read
1257 * by rc_read_init().
1258 */
1259 uint32_t init_bytes_left;
1260
1261 /*
1262 * Buffer from which we read our input. It can be either
1263 * temp.buf or the caller-provided input buffer.
1264 */
1265 const uint8_t *in;
1266 size_t in_pos;
1267 size_t in_limit;
1268 };
1269
1270 /* Probabilities for a length decoder. */
1271 struct lzma_len_dec {
1272 /* Probability of match length being at least 10 */
1273 uint16_t choice;
1274
1275 /* Probability of match length being at least 18 */
1276 uint16_t choice2;
1277
1278 /* Probabilities for match lengths 2-9 */
1279 uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
1280
1281 /* Probabilities for match lengths 10-17 */
1282 uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
1283
1284 /* Probabilities for match lengths 18-273 */
1285 uint16_t high[LEN_HIGH_SYMBOLS];
1286 };
1287
1288 struct lzma_dec {
1289 /* Distances of latest four matches */
1290 uint32_t rep0;
1291 uint32_t rep1;
1292 uint32_t rep2;
1293 uint32_t rep3;
1294
1295 /* Types of the most recently seen LZMA symbols */
1296 enum lzma_state state;
1297
1298 /*
1299 * Length of a match. This is updated so that dict_repeat can
1300 * be called again to finish repeating the whole match.
1301 */
1302 uint32_t len;
1303
1304 /*
1305 * LZMA properties or related bit masks (number of literal
1306 * context bits, a mask dervied from the number of literal
1307 * position bits, and a mask dervied from the number
1308 * position bits)
1309 */
1310 uint32_t lc;
1311 uint32_t literal_pos_mask; /* (1 << lp) - 1 */
1312 uint32_t pos_mask; /* (1 << pb) - 1 */
1313
1314 /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
1315 uint16_t is_match[STATES][POS_STATES_MAX];
1316
1317 /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
1318 uint16_t is_rep[STATES];
1319
1320 /*
1321 * If 0, distance of a repeated match is rep0.
1322 * Otherwise check is_rep1.
1323 */
1324 uint16_t is_rep0[STATES];
1325
1326 /*
1327 * If 0, distance of a repeated match is rep1.
1328 * Otherwise check is_rep2.
1329 */
1330 uint16_t is_rep1[STATES];
1331
1332 /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
1333 uint16_t is_rep2[STATES];
1334
1335 /*
1336 * If 1, the repeated match has length of one byte. Otherwise
1337 * the length is decoded from rep_len_decoder.
1338 */
1339 uint16_t is_rep0_long[STATES][POS_STATES_MAX];
1340
1341 /*
1342 * Probability tree for the highest two bits of the match
1343 * distance. There is a separate probability tree for match
1344 * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
1345 */
1346 uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
1347
1348 /*
1349 * Probility trees for additional bits for match distance
1350 * when the distance is in the range [4, 127].
1351 */
1352 uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
1353
1354 /*
1355 * Probability tree for the lowest four bits of a match
1356 * distance that is equal to or greater than 128.
1357 */
1358 uint16_t dist_align[ALIGN_SIZE];
1359
1360 /* Length of a normal match */
1361 struct lzma_len_dec match_len_dec;
1362
1363 /* Length of a repeated match */
1364 struct lzma_len_dec rep_len_dec;
1365
1366 /* Probabilities of literals */
1367 uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
1368 };
1369
1370 struct lzma2_dec {
1371 /* Position in xz_dec_lzma2_run(). */
1372 enum lzma2_seq {
1373 SEQ_CONTROL,
1374 SEQ_UNCOMPRESSED_1,
1375 SEQ_UNCOMPRESSED_2,
1376 SEQ_COMPRESSED_0,
1377 SEQ_COMPRESSED_1,
1378 SEQ_PROPERTIES,
1379 SEQ_LZMA_PREPARE,
1380 SEQ_LZMA_RUN,
1381 SEQ_COPY
1382 } sequence;
1383
1384 /* Next position after decoding the compressed size of the chunk. */
1385 enum lzma2_seq next_sequence;
1386
1387 /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
1388 uint32_t uncompressed;
1389
1390 /*
1391 * Compressed size of LZMA chunk or compressed/uncompressed
1392 * size of uncompressed chunk (64 KiB at maximum)
1393 */
1394 uint32_t compressed;
1395
1396 /*
1397 * True if dictionary reset is needed. This is false before
1398 * the first chunk (LZMA or uncompressed).
1399 */
1400 int need_dict_reset;
1401
1402 /*
1403 * True if new LZMA properties are needed. This is false
1404 * before the first LZMA chunk.
1405 */
1406 int need_props;
1407 };
1408
1409 struct xz_dec_lzma2 {
1410 /*
1411 * The order below is important on x86 to reduce code size and
1412 * it shouldn't hurt on other platforms. Everything up to and
1413 * including lzma.pos_mask are in the first 128 bytes on x86-32,
1414 * which allows using smaller instructions to access those
1415 * variables. On x86-64, fewer variables fit into the first 128
1416 * bytes, but this is still the best order without sacrificing
1417 * the readability by splitting the structures.
1418 */
1419 struct rc_dec rc;
1420 struct dictionary dict;
1421 struct lzma2_dec lzma2;
1422 struct lzma_dec lzma;
1423
1424 /*
1425 * Temporary buffer which holds small number of input bytes between
1426 * decoder calls. See lzma2_lzma() for details.
1427 */
1428 struct {
1429 uint32_t size;
1430 uint8_t buf[3 * LZMA_IN_REQUIRED];
1431 } temp;
1432 };
1433
1434 /**************
1435 * Dictionary *
1436 **************/
1437
1438 /* Reset the dictionary state. */
dict_reset(struct dictionary * dict)1439 static void dict_reset(struct dictionary *dict)
1440 {
1441 dict->start = 0;
1442 dict->pos = 0;
1443 dict->limit = 0;
1444 dict->full = 0;
1445 }
1446
1447 /* Set dictionary write limit */
dict_limit(struct dictionary * dict,size_t out_max)1448 static void dict_limit(struct dictionary *dict, size_t out_max)
1449 {
1450 if (dict->end - dict->pos <= out_max)
1451 dict->limit = dict->end;
1452 else
1453 dict->limit = dict->pos + out_max;
1454 }
1455
1456 /* Return true if at least one byte can be written into the dictionary. */
dict_has_space(const struct dictionary * dict)1457 static inline int dict_has_space(const struct dictionary *dict)
1458 {
1459 return dict->pos < dict->limit;
1460 }
1461
1462 /*
1463 * Get a byte from the dictionary at the given distance. The distance is
1464 * assumed to valid, or as a special case, zero when the dictionary is
1465 * still empty. This special case is needed for single-call decoding to
1466 * avoid writing a '\0' to the end of the destination buffer.
1467 */
dict_get(const struct dictionary * dict,uint32_t dist)1468 static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
1469 {
1470 size_t offset = dict->pos - dist - 1;
1471
1472 if (dist >= dict->pos)
1473 offset += dict->end;
1474
1475 return dict->full > 0 ? dict->buf[offset] : 0;
1476 }
1477
1478 /*
1479 * Put one byte into the dictionary. It is assumed that there is space for it.
1480 */
dict_put(struct dictionary * dict,uint8_t byte)1481 static inline void dict_put(struct dictionary *dict, uint8_t byte)
1482 {
1483 dict->buf[dict->pos++] = byte;
1484
1485 if (dict->full < dict->pos)
1486 dict->full = dict->pos;
1487 }
1488
1489 /*
1490 * Repeat given number of bytes from the given distance. If the distance is
1491 * invalid, false is returned. On success, true is returned and *len is
1492 * updated to indicate how many bytes were left to be repeated.
1493 */
dict_repeat(struct dictionary * dict,uint32_t * len,uint32_t dist)1494 static int dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
1495 {
1496 size_t back;
1497 uint32_t left;
1498
1499 if (dist >= dict->full || dist >= dict->size) return 0;
1500
1501 left = minof(dict->limit - dict->pos, *len);
1502 *len -= left;
1503
1504 back = dict->pos - dist - 1;
1505 if (dist >= dict->pos)
1506 back += dict->end;
1507
1508 do {
1509 dict->buf[dict->pos++] = dict->buf[back++];
1510 if (back == dict->end)
1511 back = 0;
1512 } while (--left > 0);
1513
1514 if (dict->full < dict->pos)
1515 dict->full = dict->pos;
1516
1517 return 1;
1518 }
1519
1520 /* Copy uncompressed data as is from input to dictionary and output buffers. */
dict_uncompressed(struct dictionary * dict,struct xz_buf * b,uint32_t * left)1521 static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
1522 uint32_t *left)
1523 {
1524 size_t copy_size;
1525
1526 while (*left > 0 && b->in_pos < b->in_size
1527 && b->out_pos < b->out_size) {
1528 copy_size = minof(b->in_size - b->in_pos,
1529 b->out_size - b->out_pos);
1530 if (copy_size > dict->end - dict->pos)
1531 copy_size = dict->end - dict->pos;
1532 if (copy_size > *left)
1533 copy_size = *left;
1534
1535 *left -= copy_size;
1536
1537 memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
1538 dict->pos += copy_size;
1539
1540 if (dict->full < dict->pos)
1541 dict->full = dict->pos;
1542
1543 if (dict->pos == dict->end)
1544 dict->pos = 0;
1545
1546 memcpy(b->out + b->out_pos, b->in + b->in_pos,
1547 copy_size);
1548
1549 dict->start = dict->pos;
1550
1551 b->out_pos += copy_size;
1552 b->in_pos += copy_size;
1553 }
1554 }
1555
1556 /*
1557 * Flush pending data from dictionary to b->out. It is assumed that there is
1558 * enough space in b->out. This is guaranteed because caller uses dict_limit()
1559 * before decoding data into the dictionary.
1560 */
dict_flush(struct dictionary * dict,struct xz_buf * b)1561 static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
1562 {
1563 size_t copy_size = dict->pos - dict->start;
1564
1565 if (dict->pos == dict->end)
1566 dict->pos = 0;
1567
1568 memcpy(b->out + b->out_pos, dict->buf + dict->start,
1569 copy_size);
1570
1571 dict->start = dict->pos;
1572 b->out_pos += copy_size;
1573 return copy_size;
1574 }
1575
1576 /*****************
1577 * Range decoder *
1578 *****************/
1579
1580 /* Reset the range decoder. */
rc_reset(struct rc_dec * rc)1581 static void rc_reset(struct rc_dec *rc)
1582 {
1583 rc->range = (uint32_t)-1;
1584 rc->code = 0;
1585 rc->init_bytes_left = RC_INIT_BYTES;
1586 }
1587
1588 /*
1589 * Read the first five initial bytes into rc->code if they haven't been
1590 * read already. (Yes, the first byte gets completely ignored.)
1591 */
rc_read_init(struct rc_dec * rc,struct xz_buf * b)1592 static int rc_read_init(struct rc_dec *rc, struct xz_buf *b)
1593 {
1594 while (rc->init_bytes_left > 0) {
1595 if (b->in_pos == b->in_size) return 0;
1596
1597 rc->code = (rc->code << 8) + b->in[b->in_pos++];
1598 --rc->init_bytes_left;
1599 }
1600
1601 return 1;
1602 }
1603
1604 /* Return true if there may not be enough input for the next decoding loop. */
rc_limit_exceeded(const struct rc_dec * rc)1605 static inline int rc_limit_exceeded(const struct rc_dec *rc)
1606 {
1607 return rc->in_pos > rc->in_limit;
1608 }
1609
1610 /*
1611 * Return true if it is possible (from point of view of range decoder) that
1612 * we have reached the end of the LZMA chunk.
1613 */
rc_is_finished(const struct rc_dec * rc)1614 static inline int rc_is_finished(const struct rc_dec *rc)
1615 {
1616 return rc->code == 0;
1617 }
1618
1619 /* Read the next input byte if needed. */
rc_normalize(struct rc_dec * rc)1620 static inline void rc_normalize(struct rc_dec *rc)
1621 {
1622 if (rc->range < RC_TOP_VALUE) {
1623 rc->range <<= RC_SHIFT_BITS;
1624 rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
1625 }
1626 }
1627
1628 /*
1629 * Decode one bit. In some versions, this function has been splitted in three
1630 * functions so that the compiler is supposed to be able to more easily avoid
1631 * an extra branch. In this particular version of the LZMA decoder, this
1632 * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
1633 * on x86). Using a non-splitted version results in nicer looking code too.
1634 *
1635 * NOTE: This must return an int. Do not make it return a bool or the speed
1636 * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
1637 * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
1638 */
rc_bit(struct rc_dec * rc,uint16_t * prob)1639 static inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
1640 {
1641 uint32_t bound;
1642 int bit;
1643
1644 rc_normalize(rc);
1645 bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
1646 if (rc->code < bound) {
1647 rc->range = bound;
1648 *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
1649 bit = 0;
1650 } else {
1651 rc->range -= bound;
1652 rc->code -= bound;
1653 *prob -= *prob >> RC_MOVE_BITS;
1654 bit = 1;
1655 }
1656
1657 return bit;
1658 }
1659
1660 /* Decode a bittree starting from the most significant bit. */
rc_bittree(struct rc_dec * rc,uint16_t * probs,uint32_t limit)1661 static inline uint32_t rc_bittree(struct rc_dec *rc,
1662 uint16_t *probs, uint32_t limit)
1663 {
1664 uint32_t symbol = 1;
1665
1666 do {
1667 if (rc_bit(rc, &probs[symbol]))
1668 symbol = (symbol << 1) + 1;
1669 else
1670 symbol <<= 1;
1671 } while (symbol < limit);
1672
1673 return symbol;
1674 }
1675
1676 /* Decode a bittree starting from the least significant bit. */
rc_bittree_reverse(struct rc_dec * rc,uint16_t * probs,uint32_t * dest,uint32_t limit)1677 static inline void rc_bittree_reverse(struct rc_dec *rc,
1678 uint16_t *probs,
1679 uint32_t *dest, uint32_t limit)
1680 {
1681 uint32_t symbol = 1;
1682 uint32_t i = 0;
1683
1684 do {
1685 if (rc_bit(rc, &probs[symbol])) {
1686 symbol = (symbol << 1) + 1;
1687 *dest += 1 << i;
1688 } else {
1689 symbol <<= 1;
1690 }
1691 } while (++i < limit);
1692 }
1693
1694 /* Decode direct bits (fixed fifty-fifty probability) */
rc_direct(struct rc_dec * rc,uint32_t * dest,uint32_t limit)1695 static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
1696 {
1697 uint32_t mask;
1698
1699 do {
1700 rc_normalize(rc);
1701 rc->range >>= 1;
1702 rc->code -= rc->range;
1703 mask = (uint32_t)0 - (rc->code >> 31);
1704 rc->code += rc->range & mask;
1705 *dest = (*dest << 1) + (mask + 1);
1706 } while (--limit > 0);
1707 }
1708
1709 /********
1710 * LZMA *
1711 ********/
1712
1713 /* Get pointer to literal coder probability array. */
lzma_literal_probs(struct xz_dec_lzma2 * s)1714 static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
1715 {
1716 uint32_t prev_byte = dict_get(&s->dict, 0);
1717 uint32_t low = prev_byte >> (8 - s->lzma.lc);
1718 uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
1719 return s->lzma.literal[low + high];
1720 }
1721
1722 /* Decode a literal (one 8-bit byte) */
lzma_literal(struct xz_dec_lzma2 * s)1723 static void lzma_literal(struct xz_dec_lzma2 *s)
1724 {
1725 uint16_t *probs;
1726 uint32_t symbol;
1727 uint32_t match_byte;
1728 uint32_t match_bit;
1729 uint32_t offset;
1730 uint32_t i;
1731
1732 probs = lzma_literal_probs(s);
1733
1734 if (lzma_state_is_literal(s->lzma.state)) {
1735 symbol = rc_bittree(&s->rc, probs, 0x100);
1736 } else {
1737 symbol = 1;
1738 match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
1739 offset = 0x100;
1740
1741 do {
1742 match_bit = match_byte & offset;
1743 match_byte <<= 1;
1744 i = offset + match_bit + symbol;
1745
1746 if (rc_bit(&s->rc, &probs[i])) {
1747 symbol = (symbol << 1) + 1;
1748 offset &= match_bit;
1749 } else {
1750 symbol <<= 1;
1751 offset &= ~match_bit;
1752 }
1753 } while (symbol < 0x100);
1754 }
1755
1756 dict_put(&s->dict, (uint8_t)symbol);
1757 lzma_state_literal(&s->lzma.state);
1758 }
1759
1760 /* Decode the length of the match into s->lzma.len. */
lzma_len(struct xz_dec_lzma2 * s,struct lzma_len_dec * l,uint32_t pos_state)1761 static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
1762 uint32_t pos_state)
1763 {
1764 uint16_t *probs;
1765 uint32_t limit;
1766
1767 if (!rc_bit(&s->rc, &l->choice)) {
1768 probs = l->low[pos_state];
1769 limit = LEN_LOW_SYMBOLS;
1770 s->lzma.len = MATCH_LEN_MIN;
1771 } else {
1772 if (!rc_bit(&s->rc, &l->choice2)) {
1773 probs = l->mid[pos_state];
1774 limit = LEN_MID_SYMBOLS;
1775 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
1776 } else {
1777 probs = l->high;
1778 limit = LEN_HIGH_SYMBOLS;
1779 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
1780 + LEN_MID_SYMBOLS;
1781 }
1782 }
1783
1784 s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
1785 }
1786
1787 /* Decode a match. The distance will be stored in s->lzma.rep0. */
lzma_match(struct xz_dec_lzma2 * s,uint32_t pos_state)1788 static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
1789 {
1790 uint16_t *probs;
1791 uint32_t dist_slot;
1792 uint32_t limit;
1793
1794 lzma_state_match(&s->lzma.state);
1795
1796 s->lzma.rep3 = s->lzma.rep2;
1797 s->lzma.rep2 = s->lzma.rep1;
1798 s->lzma.rep1 = s->lzma.rep0;
1799
1800 lzma_len(s, &s->lzma.match_len_dec, pos_state);
1801
1802 probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
1803 dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
1804
1805 if (dist_slot < DIST_MODEL_START) {
1806 s->lzma.rep0 = dist_slot;
1807 } else {
1808 limit = (dist_slot >> 1) - 1;
1809 s->lzma.rep0 = 2 + (dist_slot & 1);
1810
1811 if (dist_slot < DIST_MODEL_END) {
1812 s->lzma.rep0 <<= limit;
1813 probs = s->lzma.dist_special + s->lzma.rep0
1814 - dist_slot - 1;
1815 rc_bittree_reverse(&s->rc, probs,
1816 &s->lzma.rep0, limit);
1817 } else {
1818 rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
1819 s->lzma.rep0 <<= ALIGN_BITS;
1820 rc_bittree_reverse(&s->rc, s->lzma.dist_align,
1821 &s->lzma.rep0, ALIGN_BITS);
1822 }
1823 }
1824 }
1825
1826 /*
1827 * Decode a repeated match. The distance is one of the four most recently
1828 * seen matches. The distance will be stored in s->lzma.rep0.
1829 */
lzma_rep_match(struct xz_dec_lzma2 * s,uint32_t pos_state)1830 static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
1831 {
1832 uint32_t tmp;
1833
1834 if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
1835 if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
1836 s->lzma.state][pos_state])) {
1837 lzma_state_short_rep(&s->lzma.state);
1838 s->lzma.len = 1;
1839 return;
1840 }
1841 } else {
1842 if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
1843 tmp = s->lzma.rep1;
1844 } else {
1845 if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
1846 tmp = s->lzma.rep2;
1847 } else {
1848 tmp = s->lzma.rep3;
1849 s->lzma.rep3 = s->lzma.rep2;
1850 }
1851
1852 s->lzma.rep2 = s->lzma.rep1;
1853 }
1854
1855 s->lzma.rep1 = s->lzma.rep0;
1856 s->lzma.rep0 = tmp;
1857 }
1858
1859 lzma_state_long_rep(&s->lzma.state);
1860 lzma_len(s, &s->lzma.rep_len_dec, pos_state);
1861 }
1862
1863 /* LZMA decoder core */
lzma_main(struct xz_dec_lzma2 * s)1864 static int lzma_main(struct xz_dec_lzma2 *s)
1865 {
1866 uint32_t pos_state;
1867
1868 /*
1869 * If the dictionary was reached during the previous call, try to
1870 * finish the possibly pending repeat in the dictionary.
1871 */
1872 if (dict_has_space(&s->dict) && s->lzma.len > 0)
1873 dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
1874
1875 /*
1876 * Decode more LZMA symbols. One iteration may consume up to
1877 * LZMA_IN_REQUIRED - 1 bytes.
1878 */
1879 while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
1880 pos_state = s->dict.pos & s->lzma.pos_mask;
1881
1882 if (!rc_bit(&s->rc, &s->lzma.is_match[
1883 s->lzma.state][pos_state])) {
1884 lzma_literal(s);
1885 } else {
1886 if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
1887 lzma_rep_match(s, pos_state);
1888 else
1889 lzma_match(s, pos_state);
1890
1891 if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
1892 return 0;
1893 }
1894 }
1895
1896 /*
1897 * Having the range decoder always normalized when we are outside
1898 * this function makes it easier to correctly handle end of the chunk.
1899 */
1900 rc_normalize(&s->rc);
1901
1902 return 1;
1903 }
1904
1905 /*
1906 * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
1907 * here, because LZMA state may be reset without resetting the dictionary.
1908 */
lzma_reset(struct xz_dec_lzma2 * s)1909 static void lzma_reset(struct xz_dec_lzma2 *s)
1910 {
1911 uint16_t *probs;
1912 size_t i;
1913
1914 s->lzma.state = STATE_LIT_LIT;
1915 s->lzma.rep0 = 0;
1916 s->lzma.rep1 = 0;
1917 s->lzma.rep2 = 0;
1918 s->lzma.rep3 = 0;
1919
1920 /*
1921 * All probabilities are initialized to the same value. This hack
1922 * makes the code smaller by avoiding a separate loop for each
1923 * probability array.
1924 *
1925 * This could be optimized so that only that part of literal
1926 * probabilities that are actually required. In the common case
1927 * we would write 12 KiB less.
1928 */
1929 probs = s->lzma.is_match[0];
1930 for (i = 0; i < PROBS_TOTAL; ++i)
1931 probs[i] = RC_BIT_MODEL_TOTAL / 2;
1932
1933 rc_reset(&s->rc);
1934 }
1935
1936 /*
1937 * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
1938 * from the decoded lp and pb values. On success, the LZMA decoder state is
1939 * reset and true is returned.
1940 */
lzma_props(struct xz_dec_lzma2 * s,uint8_t props)1941 static int lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
1942 {
1943 if (props > (4 * 5 + 4) * 9 + 8)
1944 return 0;
1945
1946 s->lzma.pos_mask = 0;
1947 while (props >= 9 * 5) {
1948 props -= 9 * 5;
1949 ++s->lzma.pos_mask;
1950 }
1951
1952 s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
1953
1954 s->lzma.literal_pos_mask = 0;
1955 while (props >= 9) {
1956 props -= 9;
1957 ++s->lzma.literal_pos_mask;
1958 }
1959
1960 s->lzma.lc = props;
1961
1962 if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
1963 return 0;
1964
1965 s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
1966
1967 lzma_reset(s);
1968
1969 return 1;
1970 }
1971
1972 /*********
1973 * LZMA2 *
1974 *********/
1975
1976 /*
1977 * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
1978 * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
1979 * wrapper function takes care of making the LZMA decoder's assumption safe.
1980 *
1981 * As long as there is plenty of input left to be decoded in the current LZMA
1982 * chunk, we decode directly from the caller-supplied input buffer until
1983 * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
1984 * s->temp.buf, which (hopefully) gets filled on the next call to this
1985 * function. We decode a few bytes from the temporary buffer so that we can
1986 * continue decoding from the caller-supplied input buffer again.
1987 */
lzma2_lzma(struct xz_dec_lzma2 * s,struct xz_buf * b)1988 static int lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
1989 {
1990 size_t in_avail;
1991 uint32_t tmp;
1992
1993 in_avail = b->in_size - b->in_pos;
1994 if (s->temp.size > 0 || s->lzma2.compressed == 0) {
1995 tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
1996 if (tmp > s->lzma2.compressed - s->temp.size)
1997 tmp = s->lzma2.compressed - s->temp.size;
1998 if (tmp > in_avail)
1999 tmp = in_avail;
2000
2001 memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
2002
2003 if (s->temp.size + tmp == s->lzma2.compressed) {
2004 memset(s->temp.buf + s->temp.size + tmp, 0,
2005 sizeof(s->temp.buf)
2006 - s->temp.size - tmp);
2007 s->rc.in_limit = s->temp.size + tmp;
2008 } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
2009 s->temp.size += tmp;
2010 b->in_pos += tmp;
2011 return 1;
2012 } else {
2013 s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
2014 }
2015
2016 s->rc.in = s->temp.buf;
2017 s->rc.in_pos = 0;
2018
2019 if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
2020 return 0;
2021
2022 s->lzma2.compressed -= s->rc.in_pos;
2023
2024 if (s->rc.in_pos < s->temp.size) {
2025 s->temp.size -= s->rc.in_pos;
2026 memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
2027 s->temp.size);
2028 return 1;
2029 }
2030
2031 b->in_pos += s->rc.in_pos - s->temp.size;
2032 s->temp.size = 0;
2033 }
2034
2035 in_avail = b->in_size - b->in_pos;
2036 if (in_avail >= LZMA_IN_REQUIRED) {
2037 s->rc.in = b->in;
2038 s->rc.in_pos = b->in_pos;
2039
2040 if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
2041 s->rc.in_limit = b->in_pos + s->lzma2.compressed;
2042 else
2043 s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
2044
2045 if (!lzma_main(s))
2046 return 0;
2047
2048 in_avail = s->rc.in_pos - b->in_pos;
2049 if (in_avail > s->lzma2.compressed) return 0;
2050
2051 s->lzma2.compressed -= in_avail;
2052 b->in_pos = s->rc.in_pos;
2053 }
2054
2055 in_avail = b->in_size - b->in_pos;
2056 if (in_avail < LZMA_IN_REQUIRED) {
2057 if (in_avail > s->lzma2.compressed)
2058 in_avail = s->lzma2.compressed;
2059
2060 memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
2061 s->temp.size = in_avail;
2062 b->in_pos += in_avail;
2063 }
2064
2065 return 1;
2066 }
2067
2068 /*
2069 * Take care of the LZMA2 control layer, and forward the job of actual LZMA
2070 * decoding or copying of uncompressed chunks to other functions.
2071 */
xz_dec_lzma2_run(struct xz_dec_lzma2 * s,struct xz_buf * b)2072 enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
2073 struct xz_buf *b)
2074 {
2075 uint32_t tmp;
2076
2077 while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
2078 switch (s->lzma2.sequence) {
2079 case SEQ_CONTROL:
2080 /*
2081 * LZMA2 control byte
2082 *
2083 * Exact values:
2084 * 0x00 End marker
2085 * 0x01 Dictionary reset followed by
2086 * an uncompressed chunk
2087 * 0x02 Uncompressed chunk (no dictionary reset)
2088 *
2089 * Highest three bits (s->control & 0xE0):
2090 * 0xE0 Dictionary reset, new properties and state
2091 * reset, followed by LZMA compressed chunk
2092 * 0xC0 New properties and state reset, followed
2093 * by LZMA compressed chunk (no dictionary
2094 * reset)
2095 * 0xA0 State reset using old properties,
2096 * followed by LZMA compressed chunk (no
2097 * dictionary reset)
2098 * 0x80 LZMA chunk (no dictionary or state reset)
2099 *
2100 * For LZMA compressed chunks, the lowest five bits
2101 * (s->control & 1F) are the highest bits of the
2102 * uncompressed size (bits 16-20).
2103 *
2104 * A new LZMA2 stream must begin with a dictionary
2105 * reset. The first LZMA chunk must set new
2106 * properties and reset the LZMA state.
2107 *
2108 * Values that don't match anything described above
2109 * are invalid and we return XZ_DATA_ERROR.
2110 */
2111 tmp = b->in[b->in_pos++];
2112
2113 if (tmp == 0x00)
2114 return XZ_STREAM_END;
2115
2116 if (tmp >= 0xE0 || tmp == 0x01) {
2117 s->lzma2.need_props = 1;
2118 s->lzma2.need_dict_reset = 0;
2119 dict_reset(&s->dict);
2120 } else if (s->lzma2.need_dict_reset) {
2121 return XZ_DATA_ERROR;
2122 }
2123
2124 if (tmp >= 0x80) {
2125 s->lzma2.uncompressed = (tmp & 0x1F) << 16;
2126 s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
2127
2128 if (tmp >= 0xC0) {
2129 /*
2130 * When there are new properties,
2131 * state reset is done at
2132 * SEQ_PROPERTIES.
2133 */
2134 s->lzma2.need_props = 0;
2135 s->lzma2.next_sequence
2136 = SEQ_PROPERTIES;
2137
2138 } else if (s->lzma2.need_props) {
2139 return XZ_DATA_ERROR;
2140
2141 } else {
2142 s->lzma2.next_sequence
2143 = SEQ_LZMA_PREPARE;
2144 if (tmp >= 0xA0)
2145 lzma_reset(s);
2146 }
2147 } else {
2148 if (tmp > 0x02)
2149 return XZ_DATA_ERROR;
2150
2151 s->lzma2.sequence = SEQ_COMPRESSED_0;
2152 s->lzma2.next_sequence = SEQ_COPY;
2153 }
2154
2155 break;
2156
2157 case SEQ_UNCOMPRESSED_1:
2158 s->lzma2.uncompressed
2159 += (uint32_t)b->in[b->in_pos++] << 8;
2160 s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
2161 break;
2162
2163 case SEQ_UNCOMPRESSED_2:
2164 s->lzma2.uncompressed
2165 += (uint32_t)b->in[b->in_pos++] + 1;
2166 s->lzma2.sequence = SEQ_COMPRESSED_0;
2167 break;
2168
2169 case SEQ_COMPRESSED_0:
2170 s->lzma2.compressed
2171 = (uint32_t)b->in[b->in_pos++] << 8;
2172 s->lzma2.sequence = SEQ_COMPRESSED_1;
2173 break;
2174
2175 case SEQ_COMPRESSED_1:
2176 s->lzma2.compressed
2177 += (uint32_t)b->in[b->in_pos++] + 1;
2178 s->lzma2.sequence = s->lzma2.next_sequence;
2179 break;
2180
2181 case SEQ_PROPERTIES:
2182 if (!lzma_props(s, b->in[b->in_pos++]))
2183 return XZ_DATA_ERROR;
2184
2185 s->lzma2.sequence = SEQ_LZMA_PREPARE;
2186
2187 case SEQ_LZMA_PREPARE:
2188 if (s->lzma2.compressed < RC_INIT_BYTES)
2189 return XZ_DATA_ERROR;
2190
2191 if (!rc_read_init(&s->rc, b))
2192 return XZ_OK;
2193
2194 s->lzma2.compressed -= RC_INIT_BYTES;
2195 s->lzma2.sequence = SEQ_LZMA_RUN;
2196
2197 case SEQ_LZMA_RUN:
2198 /*
2199 * Set dictionary limit to indicate how much we want
2200 * to be encoded at maximum. Decode new data into the
2201 * dictionary. Flush the new data from dictionary to
2202 * b->out. Check if we finished decoding this chunk.
2203 * In case the dictionary got full but we didn't fill
2204 * the output buffer yet, we may run this loop
2205 * multiple times without changing s->lzma2.sequence.
2206 */
2207 dict_limit(&s->dict, minof(b->out_size - b->out_pos,
2208 s->lzma2.uncompressed));
2209 if (!lzma2_lzma(s, b))
2210 return XZ_DATA_ERROR;
2211
2212 s->lzma2.uncompressed -= dict_flush(&s->dict, b);
2213
2214 if (s->lzma2.uncompressed == 0) {
2215 if (s->lzma2.compressed > 0 || s->lzma.len > 0
2216 || !rc_is_finished(&s->rc))
2217 return XZ_DATA_ERROR;
2218
2219 rc_reset(&s->rc);
2220 s->lzma2.sequence = SEQ_CONTROL;
2221
2222 } else if (b->out_pos == b->out_size
2223 || (b->in_pos == b->in_size
2224 && s->temp.size
2225 < s->lzma2.compressed)) {
2226 return XZ_OK;
2227 }
2228
2229 break;
2230
2231 case SEQ_COPY:
2232 dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
2233 if (s->lzma2.compressed > 0)
2234 return XZ_OK;
2235
2236 s->lzma2.sequence = SEQ_CONTROL;
2237 break;
2238 }
2239 }
2240
2241 return XZ_OK;
2242 }
2243
xz_dec_lzma2_create(uint32_t dict_max)2244 struct xz_dec_lzma2 *xz_dec_lzma2_create(uint32_t dict_max)
2245 {
2246 struct xz_dec_lzma2 *s = malloc(sizeof(*s));
2247 if (s == NULL)
2248 return NULL;
2249
2250 s->dict.size_max = dict_max;
2251 s->dict.buf = NULL;
2252 s->dict.allocated = 0;
2253
2254 return s;
2255 }
2256
xz_dec_lzma2_reset(struct xz_dec_lzma2 * s,uint8_t props)2257 enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
2258 {
2259 /* This limits dictionary size to 3 GiB to keep parsing simpler. */
2260 if (props > 39)
2261 return XZ_OPTIONS_ERROR;
2262
2263 s->dict.size = 2 + (props & 1);
2264 s->dict.size <<= (props >> 1) + 11;
2265
2266 if (s->dict.size > s->dict.size_max)
2267 return XZ_MEMLIMIT_ERROR;
2268
2269 s->dict.end = s->dict.size;
2270
2271 if (s->dict.allocated < s->dict.size) {
2272 free(s->dict.buf);
2273 s->dict.buf = malloc(s->dict.size);
2274 if (s->dict.buf == NULL) {
2275 s->dict.allocated = 0;
2276 return XZ_MEM_ERROR;
2277 }
2278 }
2279
2280 s->lzma.len = 0;
2281
2282 s->lzma2.sequence = SEQ_CONTROL;
2283 s->lzma2.need_dict_reset = 1;
2284
2285 s->temp.size = 0;
2286
2287 return XZ_OK;
2288 }
2289
2290 /*
2291 * .xz Stream decoder
2292 */
2293
2294
2295 // BEGIN xz_stream.h
2296 /*
2297 * Definitions for handling the .xz file format
2298 */
2299
2300 /*
2301 * See the .xz file format specification at
2302 * http://tukaani.org/xz/xz-file-format.txt
2303 * to understand the container format.
2304 */
2305
2306 #define STREAM_HEADER_SIZE 12
2307
2308 #define HEADER_MAGIC "\3757zXZ"
2309 #define HEADER_MAGIC_SIZE 6
2310
2311 #define FOOTER_MAGIC "YZ"
2312 #define FOOTER_MAGIC_SIZE 2
2313
2314 /*
2315 * Variable-length integer can hold a 63-bit unsigned integer or a special
2316 * value indicating that the value is unknown.
2317 *
2318 * Experimental: vli_type can be defined to uint32_t to save a few bytes
2319 * in code size (no effect on speed). Doing so limits the uncompressed and
2320 * compressed size of the file to less than 256 MiB and may also weaken
2321 * error detection slightly.
2322 */
2323 typedef uint64_t vli_type;
2324
2325 #define VLI_MAX ((vli_type)-1 / 2)
2326 #define VLI_UNKNOWN ((vli_type)-1)
2327
2328 /* Maximum encoded size of a VLI */
2329 #define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
2330
2331 /* Integrity Check types */
2332 enum xz_check {
2333 XZ_CHECK_NONE = 0,
2334 XZ_CHECK_CRC32 = 1,
2335 XZ_CHECK_CRC64 = 4,
2336 XZ_CHECK_SHA256 = 10
2337 };
2338
2339 /* Maximum possible Check ID */
2340 #define XZ_CHECK_MAX 15
2341 // END xz_stream.h
2342
2343 #define IS_CRC64(check_type) ((check_type) == XZ_CHECK_CRC64)
2344
2345 /* Hash used to validate the Index field */
2346 struct xz_dec_hash {
2347 vli_type unpadded;
2348 vli_type uncompressed;
2349 uint32_t crc32;
2350 };
2351
2352 struct xz_dec {
2353 /* Position in dec_main() */
2354 enum {
2355 SEQ_STREAM_HEADER,
2356 SEQ_BLOCK_START,
2357 SEQ_BLOCK_HEADER,
2358 SEQ_BLOCK_UNCOMPRESS,
2359 SEQ_BLOCK_PADDING,
2360 SEQ_BLOCK_CHECK,
2361 SEQ_INDEX,
2362 SEQ_INDEX_PADDING,
2363 SEQ_INDEX_CRC32,
2364 SEQ_STREAM_FOOTER
2365 } sequence;
2366
2367 /* Position in variable-length integers and Check fields */
2368 uint32_t pos;
2369
2370 /* Variable-length integer decoded by dec_vli() */
2371 vli_type vli;
2372
2373 /* Saved in_pos and out_pos */
2374 size_t in_start;
2375 size_t out_start;
2376
2377 /* CRC32 or CRC64 value in Block or CRC32 value in Index */
2378 uint64_t crc;
2379
2380 /* Type of the integrity check calculated from uncompressed data */
2381 enum xz_check check_type;
2382
2383 /*
2384 * True if the next call to xz_dec_run() is allowed to return
2385 * XZ_BUF_ERROR.
2386 */
2387 int allow_buf_error;
2388
2389 /* Information stored in Block Header */
2390 struct {
2391 /*
2392 * Value stored in the Compressed Size field, or
2393 * VLI_UNKNOWN if Compressed Size is not present.
2394 */
2395 vli_type compressed;
2396
2397 /*
2398 * Value stored in the Uncompressed Size field, or
2399 * VLI_UNKNOWN if Uncompressed Size is not present.
2400 */
2401 vli_type uncompressed;
2402
2403 /* Size of the Block Header field */
2404 uint32_t size;
2405 } block_header;
2406
2407 /* Information collected when decoding Blocks */
2408 struct {
2409 /* Observed compressed size of the current Block */
2410 vli_type compressed;
2411
2412 /* Observed uncompressed size of the current Block */
2413 vli_type uncompressed;
2414
2415 /* Number of Blocks decoded so far */
2416 vli_type count;
2417
2418 /*
2419 * Hash calculated from the Block sizes. This is used to
2420 * validate the Index field.
2421 */
2422 struct xz_dec_hash hash;
2423 } block;
2424
2425 /* Variables needed when verifying the Index field */
2426 struct {
2427 /* Position in dec_index() */
2428 enum {
2429 SEQ_INDEX_COUNT,
2430 SEQ_INDEX_UNPADDED,
2431 SEQ_INDEX_UNCOMPRESSED
2432 } sequence;
2433
2434 /* Size of the Index in bytes */
2435 vli_type size;
2436
2437 /* Number of Records (matches block.count in valid files) */
2438 vli_type count;
2439
2440 /*
2441 * Hash calculated from the Records (matches block.hash in
2442 * valid files).
2443 */
2444 struct xz_dec_hash hash;
2445 } index;
2446
2447 /*
2448 * Temporary buffer needed to hold Stream Header, Block Header,
2449 * and Stream Footer. The Block Header is the biggest (1 KiB)
2450 * so we reserve space according to that. buf[] has to be aligned
2451 * to a multiple of four bytes; the size_t variables before it
2452 * should guarantee this.
2453 */
2454 struct {
2455 size_t pos;
2456 size_t size;
2457 uint8_t buf[1024];
2458 } temp;
2459
2460 struct xz_dec_lzma2 *lzma2;
2461
2462 #ifdef XZ_DEC_BCJ
2463 struct xz_dec_bcj *bcj;
2464 int bcj_active;
2465 #endif
2466 };
2467
2468 /* Sizes of the Check field with different Check IDs */
2469 static const uint8_t check_sizes[16] = {
2470 0,
2471 4, 4, 4,
2472 8, 8, 8,
2473 16, 16, 16,
2474 32, 32, 32,
2475 64, 64, 64
2476 };
2477
2478 /*
2479 * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
2480 * must have set s->temp.pos to indicate how much data we are supposed
2481 * to copy into s->temp.buf. Return true once s->temp.pos has reached
2482 * s->temp.size.
2483 */
fill_temp(struct xz_dec * s,struct xz_buf * b)2484 static int fill_temp(struct xz_dec *s, struct xz_buf *b)
2485 {
2486 size_t copy_size = minof(b->in_size - b->in_pos, s->temp.size - s->temp.pos);
2487
2488 memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
2489 b->in_pos += copy_size;
2490 s->temp.pos += copy_size;
2491
2492 if (s->temp.pos == s->temp.size) {
2493 s->temp.pos = 0;
2494 return 1;
2495 }
2496
2497 return 0;
2498 }
2499
2500 /* Decode a variable-length integer (little-endian base-128 encoding) */
dec_vli(struct xz_dec * s,const uint8_t * in,size_t * in_pos,size_t in_size)2501 static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in,
2502 size_t *in_pos, size_t in_size)
2503 {
2504 uint8_t byte;
2505
2506 if (s->pos == 0)
2507 s->vli = 0;
2508
2509 while (*in_pos < in_size) {
2510 byte = in[*in_pos];
2511 ++*in_pos;
2512
2513 s->vli |= (vli_type)(byte & 0x7F) << s->pos;
2514
2515 if ((byte & 0x80) == 0) {
2516 /* Don't allow non-minimal encodings. */
2517 if (byte == 0 && s->pos != 0)
2518 return XZ_DATA_ERROR;
2519
2520 s->pos = 0;
2521 return XZ_STREAM_END;
2522 }
2523
2524 s->pos += 7;
2525 if (s->pos == 7 * VLI_BYTES_MAX)
2526 return XZ_DATA_ERROR;
2527 }
2528
2529 return XZ_OK;
2530 }
2531
2532 /*
2533 * Decode the Compressed Data field from a Block. Update and validate
2534 * the observed compressed and uncompressed sizes of the Block so that
2535 * they don't exceed the values possibly stored in the Block Header
2536 * (validation assumes that no integer overflow occurs, since vli_type
2537 * is normally uint64_t). Update the CRC32 or CRC64 value if presence of
2538 * the CRC32 or CRC64 field was indicated in Stream Header.
2539 *
2540 * Once the decoding is finished, validate that the observed sizes match
2541 * the sizes possibly stored in the Block Header. Update the hash and
2542 * Block count, which are later used to validate the Index field.
2543 */
dec_block(struct xz_dec * s,struct xz_buf * b)2544 static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
2545 {
2546 enum xz_ret ret;
2547
2548 s->in_start = b->in_pos;
2549 s->out_start = b->out_pos;
2550
2551 #ifdef XZ_DEC_BCJ
2552 if (s->bcj_active)
2553 ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
2554 else
2555 #endif
2556 ret = xz_dec_lzma2_run(s->lzma2, b);
2557
2558 s->block.compressed += b->in_pos - s->in_start;
2559 s->block.uncompressed += b->out_pos - s->out_start;
2560
2561 /*
2562 * There is no need to separately check for VLI_UNKNOWN, since
2563 * the observed sizes are always smaller than VLI_UNKNOWN.
2564 */
2565 if (s->block.compressed > s->block_header.compressed
2566 || s->block.uncompressed
2567 > s->block_header.uncompressed)
2568 return XZ_DATA_ERROR;
2569
2570 if (s->check_type == XZ_CHECK_CRC32)
2571 s->crc = xz_crc32(b->out + s->out_start,
2572 b->out_pos - s->out_start, s->crc);
2573 else if (s->check_type == XZ_CHECK_CRC64) {
2574 s->crc = ~(s->crc);
2575 size_t size = b->out_pos - s->out_start;
2576 uint8_t *buf = b->out + s->out_start;
2577 while (size) {
2578 s->crc = xz_crc64_table[*buf++ ^ (s->crc & 0xFF)] ^ (s->crc >> 8);
2579 --size;
2580 }
2581 s->crc=~(s->crc);
2582 }
2583
2584 if (ret == XZ_STREAM_END) {
2585 if (s->block_header.compressed != VLI_UNKNOWN
2586 && s->block_header.compressed
2587 != s->block.compressed)
2588 return XZ_DATA_ERROR;
2589
2590 if (s->block_header.uncompressed != VLI_UNKNOWN
2591 && s->block_header.uncompressed
2592 != s->block.uncompressed)
2593 return XZ_DATA_ERROR;
2594
2595 s->block.hash.unpadded += s->block_header.size
2596 + s->block.compressed;
2597
2598 s->block.hash.unpadded += check_sizes[s->check_type];
2599
2600 s->block.hash.uncompressed += s->block.uncompressed;
2601 s->block.hash.crc32 = xz_crc32(
2602 (const uint8_t *)&s->block.hash,
2603 sizeof(s->block.hash), s->block.hash.crc32);
2604
2605 ++s->block.count;
2606 }
2607
2608 return ret;
2609 }
2610
2611 /* Update the Index size and the CRC32 value. */
index_update(struct xz_dec * s,const struct xz_buf * b)2612 static void index_update(struct xz_dec *s, const struct xz_buf *b)
2613 {
2614 size_t in_used = b->in_pos - s->in_start;
2615 s->index.size += in_used;
2616 s->crc = xz_crc32(b->in + s->in_start, in_used, s->crc);
2617 }
2618
2619 /*
2620 * Decode the Number of Records, Unpadded Size, and Uncompressed Size
2621 * fields from the Index field. That is, Index Padding and CRC32 are not
2622 * decoded by this function.
2623 *
2624 * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
2625 * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
2626 */
dec_index(struct xz_dec * s,struct xz_buf * b)2627 static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b)
2628 {
2629 enum xz_ret ret;
2630
2631 do {
2632 ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
2633 if (ret != XZ_STREAM_END) {
2634 index_update(s, b);
2635 return ret;
2636 }
2637
2638 switch (s->index.sequence) {
2639 case SEQ_INDEX_COUNT:
2640 s->index.count = s->vli;
2641
2642 /*
2643 * Validate that the Number of Records field
2644 * indicates the same number of Records as
2645 * there were Blocks in the Stream.
2646 */
2647 if (s->index.count != s->block.count)
2648 return XZ_DATA_ERROR;
2649
2650 s->index.sequence = SEQ_INDEX_UNPADDED;
2651 break;
2652
2653 case SEQ_INDEX_UNPADDED:
2654 s->index.hash.unpadded += s->vli;
2655 s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
2656 break;
2657
2658 case SEQ_INDEX_UNCOMPRESSED:
2659 s->index.hash.uncompressed += s->vli;
2660 s->index.hash.crc32 = xz_crc32(
2661 (const uint8_t *)&s->index.hash,
2662 sizeof(s->index.hash),
2663 s->index.hash.crc32);
2664 --s->index.count;
2665 s->index.sequence = SEQ_INDEX_UNPADDED;
2666 break;
2667 }
2668 } while (s->index.count > 0);
2669
2670 return XZ_STREAM_END;
2671 }
2672
2673 /*
2674 * Validate that the next four or eight input bytes match the value
2675 * of s->crc. s->pos must be zero when starting to validate the first byte.
2676 * The "bits" argument allows using the same code for both CRC32 and CRC64.
2677 */
crc_validate(struct xz_dec * s,struct xz_buf * b,uint32_t bits)2678 static enum xz_ret crc_validate(struct xz_dec *s, struct xz_buf *b,
2679 uint32_t bits)
2680 {
2681 do {
2682 if (b->in_pos == b->in_size)
2683 return XZ_OK;
2684
2685 if (((s->crc >> s->pos) & 0xFF) != b->in[b->in_pos++])
2686 return XZ_DATA_ERROR;
2687
2688 s->pos += 8;
2689
2690 } while (s->pos < bits);
2691
2692 s->crc = 0;
2693 s->pos = 0;
2694
2695 return XZ_STREAM_END;
2696 }
2697
2698 /*
2699 * Skip over the Check field when the Check ID is not supported.
2700 * Returns true once the whole Check field has been skipped over.
2701 */
check_skip(struct xz_dec * s,struct xz_buf * b)2702 static int check_skip(struct xz_dec *s, struct xz_buf *b)
2703 {
2704 while (s->pos < check_sizes[s->check_type]) {
2705 if (b->in_pos == b->in_size) return 0;
2706
2707 ++b->in_pos;
2708 ++s->pos;
2709 }
2710
2711 s->pos = 0;
2712
2713 return 1;
2714 }
2715
2716 /* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
dec_stream_header(struct xz_dec * s)2717 static enum xz_ret dec_stream_header(struct xz_dec *s)
2718 {
2719 if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
2720 return XZ_FORMAT_ERROR;
2721
2722 if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
2723 != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
2724 return XZ_DATA_ERROR;
2725
2726 if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
2727 return XZ_OPTIONS_ERROR;
2728
2729 /*
2730 * Of integrity checks, we support none (Check ID = 0),
2731 * CRC32 (Check ID = 1), and optionally CRC64 (Check ID = 4).
2732 * However, if XZ_DEC_ANY_CHECK is defined, we will accept other
2733 * check types too, but then the check won't be verified and
2734 * a warning (XZ_UNSUPPORTED_CHECK) will be given.
2735 */
2736 s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
2737
2738 if (s->check_type > XZ_CHECK_MAX)
2739 return XZ_OPTIONS_ERROR;
2740
2741 if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type))
2742 return XZ_UNSUPPORTED_CHECK;
2743
2744 return XZ_OK;
2745 }
2746
2747 /* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
dec_stream_footer(struct xz_dec * s)2748 static enum xz_ret dec_stream_footer(struct xz_dec *s)
2749 {
2750 if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
2751 return XZ_DATA_ERROR;
2752
2753 if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
2754 return XZ_DATA_ERROR;
2755
2756 /*
2757 * Validate Backward Size. Note that we never added the size of the
2758 * Index CRC32 field to s->index.size, thus we use s->index.size / 4
2759 * instead of s->index.size / 4 - 1.
2760 */
2761 if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
2762 return XZ_DATA_ERROR;
2763
2764 if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
2765 return XZ_DATA_ERROR;
2766
2767 /*
2768 * Use XZ_STREAM_END instead of XZ_OK to be more convenient
2769 * for the caller.
2770 */
2771 return XZ_STREAM_END;
2772 }
2773
2774 /* Decode the Block Header and initialize the filter chain. */
dec_block_header(struct xz_dec * s)2775 static enum xz_ret dec_block_header(struct xz_dec *s)
2776 {
2777 enum xz_ret ret;
2778
2779 /*
2780 * Validate the CRC32. We know that the temp buffer is at least
2781 * eight bytes so this is safe.
2782 */
2783 s->temp.size -= 4;
2784 if (xz_crc32(s->temp.buf, s->temp.size, 0)
2785 != get_le32(s->temp.buf + s->temp.size))
2786 return XZ_DATA_ERROR;
2787
2788 s->temp.pos = 2;
2789
2790 /*
2791 * Catch unsupported Block Flags. We support only one or two filters
2792 * in the chain, so we catch that with the same test.
2793 */
2794 #ifdef XZ_DEC_BCJ
2795 if (s->temp.buf[1] & 0x3E)
2796 #else
2797 if (s->temp.buf[1] & 0x3F)
2798 #endif
2799 return XZ_OPTIONS_ERROR;
2800
2801 /* Compressed Size */
2802 if (s->temp.buf[1] & 0x40) {
2803 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
2804 != XZ_STREAM_END)
2805 return XZ_DATA_ERROR;
2806
2807 s->block_header.compressed = s->vli;
2808 } else {
2809 s->block_header.compressed = VLI_UNKNOWN;
2810 }
2811
2812 /* Uncompressed Size */
2813 if (s->temp.buf[1] & 0x80) {
2814 if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
2815 != XZ_STREAM_END)
2816 return XZ_DATA_ERROR;
2817
2818 s->block_header.uncompressed = s->vli;
2819 } else {
2820 s->block_header.uncompressed = VLI_UNKNOWN;
2821 }
2822
2823 #ifdef XZ_DEC_BCJ
2824 /* If there are two filters, the first one must be a BCJ filter. */
2825 s->bcj_active = s->temp.buf[1] & 0x01;
2826 if (s->bcj_active) {
2827 if (s->temp.size - s->temp.pos < 2)
2828 return XZ_OPTIONS_ERROR;
2829
2830 ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
2831 if (ret != XZ_OK)
2832 return ret;
2833
2834 /*
2835 * We don't support custom start offset,
2836 * so Size of Properties must be zero.
2837 */
2838 if (s->temp.buf[s->temp.pos++] != 0x00)
2839 return XZ_OPTIONS_ERROR;
2840 }
2841 #endif
2842
2843 /* Valid Filter Flags always take at least two bytes. */
2844 if (s->temp.size - s->temp.pos < 2)
2845 return XZ_DATA_ERROR;
2846
2847 /* Filter ID = LZMA2 */
2848 if (s->temp.buf[s->temp.pos++] != 0x21)
2849 return XZ_OPTIONS_ERROR;
2850
2851 /* Size of Properties = 1-byte Filter Properties */
2852 if (s->temp.buf[s->temp.pos++] != 0x01)
2853 return XZ_OPTIONS_ERROR;
2854
2855 /* Filter Properties contains LZMA2 dictionary size. */
2856 if (s->temp.size - s->temp.pos < 1)
2857 return XZ_DATA_ERROR;
2858
2859 ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
2860 if (ret != XZ_OK)
2861 return ret;
2862
2863 /* The rest must be Header Padding. */
2864 while (s->temp.pos < s->temp.size)
2865 if (s->temp.buf[s->temp.pos++] != 0x00)
2866 return XZ_OPTIONS_ERROR;
2867
2868 s->temp.pos = 0;
2869 s->block.compressed = 0;
2870 s->block.uncompressed = 0;
2871
2872 return XZ_OK;
2873 }
2874
dec_main(struct xz_dec * s,struct xz_buf * b)2875 static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
2876 {
2877 enum xz_ret ret;
2878
2879 /*
2880 * Store the start position for the case when we are in the middle
2881 * of the Index field.
2882 */
2883 s->in_start = b->in_pos;
2884
2885 for (;;) {
2886 switch (s->sequence) {
2887 case SEQ_STREAM_HEADER:
2888 /*
2889 * Stream Header is copied to s->temp, and then
2890 * decoded from there. This way if the caller
2891 * gives us only little input at a time, we can
2892 * still keep the Stream Header decoding code
2893 * simple. Similar approach is used in many places
2894 * in this file.
2895 */
2896 if (!fill_temp(s, b))
2897 return XZ_OK;
2898
2899 /*
2900 * If dec_stream_header() returns
2901 * XZ_UNSUPPORTED_CHECK, it is still possible
2902 * to continue decoding if working in multi-call
2903 * mode. Thus, update s->sequence before calling
2904 * dec_stream_header().
2905 */
2906 s->sequence = SEQ_BLOCK_START;
2907
2908 ret = dec_stream_header(s);
2909 if (ret != XZ_OK)
2910 return ret;
2911
2912 case SEQ_BLOCK_START:
2913 /* We need one byte of input to continue. */
2914 if (b->in_pos == b->in_size)
2915 return XZ_OK;
2916
2917 /* See if this is the beginning of the Index field. */
2918 if (b->in[b->in_pos] == 0) {
2919 s->in_start = b->in_pos++;
2920 s->sequence = SEQ_INDEX;
2921 break;
2922 }
2923
2924 /*
2925 * Calculate the size of the Block Header and
2926 * prepare to decode it.
2927 */
2928 s->block_header.size
2929 = ((uint32_t)b->in[b->in_pos] + 1) * 4;
2930
2931 s->temp.size = s->block_header.size;
2932 s->temp.pos = 0;
2933 s->sequence = SEQ_BLOCK_HEADER;
2934
2935 case SEQ_BLOCK_HEADER:
2936 if (!fill_temp(s, b))
2937 return XZ_OK;
2938
2939 ret = dec_block_header(s);
2940 if (ret != XZ_OK)
2941 return ret;
2942
2943 s->sequence = SEQ_BLOCK_UNCOMPRESS;
2944
2945 case SEQ_BLOCK_UNCOMPRESS:
2946 ret = dec_block(s, b);
2947 if (ret != XZ_STREAM_END)
2948 return ret;
2949
2950 s->sequence = SEQ_BLOCK_PADDING;
2951
2952 case SEQ_BLOCK_PADDING:
2953 /*
2954 * Size of Compressed Data + Block Padding
2955 * must be a multiple of four. We don't need
2956 * s->block.compressed for anything else
2957 * anymore, so we use it here to test the size
2958 * of the Block Padding field.
2959 */
2960 while (s->block.compressed & 3) {
2961 if (b->in_pos == b->in_size)
2962 return XZ_OK;
2963
2964 if (b->in[b->in_pos++] != 0)
2965 return XZ_DATA_ERROR;
2966
2967 ++s->block.compressed;
2968 }
2969
2970 s->sequence = SEQ_BLOCK_CHECK;
2971
2972 case SEQ_BLOCK_CHECK:
2973 if (s->check_type == XZ_CHECK_CRC32) {
2974 ret = crc_validate(s, b, 32);
2975 if (ret != XZ_STREAM_END)
2976 return ret;
2977 }
2978 else if (IS_CRC64(s->check_type)) {
2979 ret = crc_validate(s, b, 64);
2980 if (ret != XZ_STREAM_END)
2981 return ret;
2982 }
2983 else if (!check_skip(s, b)) {
2984 return XZ_OK;
2985 }
2986
2987 s->sequence = SEQ_BLOCK_START;
2988 break;
2989
2990 case SEQ_INDEX:
2991 ret = dec_index(s, b);
2992 if (ret != XZ_STREAM_END)
2993 return ret;
2994
2995 s->sequence = SEQ_INDEX_PADDING;
2996
2997 case SEQ_INDEX_PADDING:
2998 while ((s->index.size + (b->in_pos - s->in_start))
2999 & 3) {
3000 if (b->in_pos == b->in_size) {
3001 index_update(s, b);
3002 return XZ_OK;
3003 }
3004
3005 if (b->in[b->in_pos++] != 0)
3006 return XZ_DATA_ERROR;
3007 }
3008
3009 /* Finish the CRC32 value and Index size. */
3010 index_update(s, b);
3011
3012 /* Compare the hashes to validate the Index field. */
3013 if (!memeq(&s->block.hash, &s->index.hash,
3014 sizeof(s->block.hash)))
3015 return XZ_DATA_ERROR;
3016
3017 s->sequence = SEQ_INDEX_CRC32;
3018
3019 case SEQ_INDEX_CRC32:
3020 ret = crc_validate(s, b, 32);
3021 if (ret != XZ_STREAM_END)
3022 return ret;
3023
3024 s->temp.size = STREAM_HEADER_SIZE;
3025 s->sequence = SEQ_STREAM_FOOTER;
3026
3027 case SEQ_STREAM_FOOTER:
3028 if (!fill_temp(s, b))
3029 return XZ_OK;
3030
3031 return dec_stream_footer(s);
3032 }
3033 }
3034
3035 /* Never reached */
3036 }
3037
3038 /*
3039 * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
3040 * multi-call and single-call decoding.
3041 *
3042 * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
3043 * are not going to make any progress anymore. This is to prevent the caller
3044 * from calling us infinitely when the input file is truncated or otherwise
3045 * corrupt. Since zlib-style API allows that the caller fills the input buffer
3046 * only when the decoder doesn't produce any new output, we have to be careful
3047 * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
3048 * after the second consecutive call to xz_dec_run() that makes no progress.
3049 *
3050 * In single-call mode, if we couldn't decode everything and no error
3051 * occurred, either the input is truncated or the output buffer is too small.
3052 * Since we know that the last input byte never produces any output, we know
3053 * that if all the input was consumed and decoding wasn't finished, the file
3054 * must be corrupt. Otherwise the output buffer has to be too small or the
3055 * file is corrupt in a way that decoding it produces too big output.
3056 *
3057 * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
3058 * their original values. This is because with some filter chains there won't
3059 * be any valid uncompressed data in the output buffer unless the decoding
3060 * actually succeeds (that's the price to pay of using the output buffer as
3061 * the workspace).
3062 */
xz_dec_run(struct xz_dec * s,struct xz_buf * b)3063 enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
3064 {
3065 size_t in_start;
3066 size_t out_start;
3067 enum xz_ret ret;
3068
3069 in_start = b->in_pos;
3070 out_start = b->out_pos;
3071 ret = dec_main(s, b);
3072
3073 if (ret == XZ_OK && in_start == b->in_pos && out_start == b->out_pos) {
3074 if (s->allow_buf_error)
3075 ret = XZ_BUF_ERROR;
3076
3077 s->allow_buf_error = 1;
3078 } else {
3079 s->allow_buf_error = 0;
3080 }
3081
3082 return ret;
3083 }
3084
xz_dec_init(uint32_t dict_max)3085 struct xz_dec *xz_dec_init(uint32_t dict_max)
3086 {
3087 struct xz_dec *s = malloc(sizeof(*s));
3088 if (!s)
3089 return NULL;
3090
3091 #ifdef XZ_DEC_BCJ
3092 s->bcj = malloc(sizeof(*s->bcj));
3093 if (!s->bcj)
3094 goto error_bcj;
3095 #endif
3096
3097 s->lzma2 = xz_dec_lzma2_create(dict_max);
3098 if (s->lzma2 == NULL)
3099 goto error_lzma2;
3100
3101 xz_dec_reset(s);
3102 return s;
3103
3104 error_lzma2:
3105 #ifdef XZ_DEC_BCJ
3106 free(s->bcj);
3107 error_bcj:
3108 #endif
3109 free(s);
3110 return NULL;
3111 }
3112
xz_dec_reset(struct xz_dec * s)3113 void xz_dec_reset(struct xz_dec *s)
3114 {
3115 s->sequence = SEQ_STREAM_HEADER;
3116 s->allow_buf_error = 0;
3117 s->pos = 0;
3118 s->crc = 0;
3119 memset(&s->block, 0, sizeof(s->block));
3120 memset(&s->index, 0, sizeof(s->index));
3121 s->temp.pos = 0;
3122 s->temp.size = STREAM_HEADER_SIZE;
3123 }
3124
xz_dec_end(struct xz_dec * s)3125 void xz_dec_end(struct xz_dec *s)
3126 {
3127 if (s != NULL) {
3128 free((s->lzma2)->dict.buf);
3129 free(s->lzma2);
3130
3131 #ifdef XZ_DEC_BCJ
3132 free(s->bcj);
3133 #endif
3134 free(s);
3135 }
3136 }
3137