• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * .xz Stream decoder
3  *
4  * Author: Lasse Collin <lasse.collin@tukaani.org>
5  *
6  * This file has been put into the public domain.
7  * You can do whatever you want with this file.
8  */
9 
10 #include "xz_private.h"
11 #include "xz_stream.h"
12 
13 #ifdef XZ_USE_CRC64
14 #	define IS_CRC64(check_type) ((check_type) == XZ_CHECK_CRC64)
15 #else
16 #	define IS_CRC64(check_type) false
17 #endif
18 
19 /* Hash used to validate the Index field */
20 struct xz_dec_hash {
21 	vli_type unpadded;
22 	vli_type uncompressed;
23 	uint32_t crc32;
24 };
25 
26 struct xz_dec {
27 	/* Position in dec_main() */
28 	enum {
29 		SEQ_STREAM_HEADER,
30 		SEQ_BLOCK_START,
31 		SEQ_BLOCK_HEADER,
32 		SEQ_BLOCK_UNCOMPRESS,
33 		SEQ_BLOCK_PADDING,
34 		SEQ_BLOCK_CHECK,
35 		SEQ_INDEX,
36 		SEQ_INDEX_PADDING,
37 		SEQ_INDEX_CRC32,
38 		SEQ_STREAM_FOOTER,
39 		SEQ_STREAM_PADDING
40 	} sequence;
41 
42 	/* Position in variable-length integers and Check fields */
43 	uint32_t pos;
44 
45 	/* Variable-length integer decoded by dec_vli() */
46 	vli_type vli;
47 
48 	/* Saved in_pos and out_pos */
49 	size_t in_start;
50 	size_t out_start;
51 
52 #ifdef XZ_USE_CRC64
53 	/* CRC32 or CRC64 value in Block or CRC32 value in Index */
54 	uint64_t crc;
55 #else
56 	/* CRC32 value in Block or Index */
57 	uint32_t crc;
58 #endif
59 
60 	/* Type of the integrity check calculated from uncompressed data */
61 	enum xz_check check_type;
62 
63 	/* Operation mode */
64 	enum xz_mode mode;
65 
66 	/*
67 	 * True if the next call to xz_dec_run() is allowed to return
68 	 * XZ_BUF_ERROR.
69 	 */
70 	bool allow_buf_error;
71 
72 	/* Information stored in Block Header */
73 	struct {
74 		/*
75 		 * Value stored in the Compressed Size field, or
76 		 * VLI_UNKNOWN if Compressed Size is not present.
77 		 */
78 		vli_type compressed;
79 
80 		/*
81 		 * Value stored in the Uncompressed Size field, or
82 		 * VLI_UNKNOWN if Uncompressed Size is not present.
83 		 */
84 		vli_type uncompressed;
85 
86 		/* Size of the Block Header field */
87 		uint32_t size;
88 	} block_header;
89 
90 	/* Information collected when decoding Blocks */
91 	struct {
92 		/* Observed compressed size of the current Block */
93 		vli_type compressed;
94 
95 		/* Observed uncompressed size of the current Block */
96 		vli_type uncompressed;
97 
98 		/* Number of Blocks decoded so far */
99 		vli_type count;
100 
101 		/*
102 		 * Hash calculated from the Block sizes. This is used to
103 		 * validate the Index field.
104 		 */
105 		struct xz_dec_hash hash;
106 	} block;
107 
108 	/* Variables needed when verifying the Index field */
109 	struct {
110 		/* Position in dec_index() */
111 		enum {
112 			SEQ_INDEX_COUNT,
113 			SEQ_INDEX_UNPADDED,
114 			SEQ_INDEX_UNCOMPRESSED
115 		} sequence;
116 
117 		/* Size of the Index in bytes */
118 		vli_type size;
119 
120 		/* Number of Records (matches block.count in valid files) */
121 		vli_type count;
122 
123 		/*
124 		 * Hash calculated from the Records (matches block.hash in
125 		 * valid files).
126 		 */
127 		struct xz_dec_hash hash;
128 	} index;
129 
130 	/*
131 	 * Temporary buffer needed to hold Stream Header, Block Header,
132 	 * and Stream Footer. The Block Header is the biggest (1 KiB)
133 	 * so we reserve space according to that. buf[] has to be aligned
134 	 * to a multiple of four bytes; the size_t variables before it
135 	 * should guarantee this.
136 	 */
137 	struct {
138 		size_t pos;
139 		size_t size;
140 		uint8_t buf[1024];
141 	} temp;
142 
143 	struct xz_dec_lzma2 *lzma2;
144 
145 #ifdef XZ_DEC_BCJ
146 	struct xz_dec_bcj *bcj;
147 	bool bcj_active;
148 #endif
149 };
150 
151 #ifdef XZ_DEC_ANY_CHECK
152 /* Sizes of the Check field with different Check IDs */
153 static const uint8_t check_sizes[16] = {
154 	0,
155 	4, 4, 4,
156 	8, 8, 8,
157 	16, 16, 16,
158 	32, 32, 32,
159 	64, 64, 64
160 };
161 #endif
162 
163 /*
164  * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
165  * must have set s->temp.pos to indicate how much data we are supposed
166  * to copy into s->temp.buf. Return true once s->temp.pos has reached
167  * s->temp.size.
168  */
fill_temp(struct xz_dec * s,struct xz_buf * b)169 static bool fill_temp(struct xz_dec *s, struct xz_buf *b)
170 {
171 	size_t copy_size = min_t(size_t,
172 			b->in_size - b->in_pos, s->temp.size - s->temp.pos);
173 
174 	memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
175 	b->in_pos += copy_size;
176 	s->temp.pos += copy_size;
177 
178 	if (s->temp.pos == s->temp.size) {
179 		s->temp.pos = 0;
180 		return true;
181 	}
182 
183 	return false;
184 }
185 
186 /* Decode a variable-length integer (little-endian base-128 encoding) */
dec_vli(struct xz_dec * s,const uint8_t * in,size_t * in_pos,size_t in_size)187 static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in,
188 			   size_t *in_pos, size_t in_size)
189 {
190 	uint8_t byte;
191 
192 	if (s->pos == 0)
193 		s->vli = 0;
194 
195 	while (*in_pos < in_size) {
196 		byte = in[*in_pos];
197 		++*in_pos;
198 
199 		s->vli |= (vli_type)(byte & 0x7F) << s->pos;
200 
201 		if ((byte & 0x80) == 0) {
202 			/* Don't allow non-minimal encodings. */
203 			if (byte == 0 && s->pos != 0)
204 				return XZ_DATA_ERROR;
205 
206 			s->pos = 0;
207 			return XZ_STREAM_END;
208 		}
209 
210 		s->pos += 7;
211 		if (s->pos == 7 * VLI_BYTES_MAX)
212 			return XZ_DATA_ERROR;
213 	}
214 
215 	return XZ_OK;
216 }
217 
218 /*
219  * Decode the Compressed Data field from a Block. Update and validate
220  * the observed compressed and uncompressed sizes of the Block so that
221  * they don't exceed the values possibly stored in the Block Header
222  * (validation assumes that no integer overflow occurs, since vli_type
223  * is normally uint64_t). Update the CRC32 or CRC64 value if presence of
224  * the CRC32 or CRC64 field was indicated in Stream Header.
225  *
226  * Once the decoding is finished, validate that the observed sizes match
227  * the sizes possibly stored in the Block Header. Update the hash and
228  * Block count, which are later used to validate the Index field.
229  */
dec_block(struct xz_dec * s,struct xz_buf * b)230 static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
231 {
232 	enum xz_ret ret;
233 
234 	s->in_start = b->in_pos;
235 	s->out_start = b->out_pos;
236 
237 #ifdef XZ_DEC_BCJ
238 	if (s->bcj_active)
239 		ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
240 	else
241 #endif
242 		ret = xz_dec_lzma2_run(s->lzma2, b);
243 
244 	s->block.compressed += b->in_pos - s->in_start;
245 	s->block.uncompressed += b->out_pos - s->out_start;
246 
247 	/*
248 	 * There is no need to separately check for VLI_UNKNOWN, since
249 	 * the observed sizes are always smaller than VLI_UNKNOWN.
250 	 */
251 	if (s->block.compressed > s->block_header.compressed
252 			|| s->block.uncompressed
253 				> s->block_header.uncompressed)
254 		return XZ_DATA_ERROR;
255 
256 	if (s->check_type == XZ_CHECK_CRC32)
257 		s->crc = xz_crc32(b->out + s->out_start,
258 				b->out_pos - s->out_start, s->crc);
259 #ifdef XZ_USE_CRC64
260 	else if (s->check_type == XZ_CHECK_CRC64)
261 		s->crc = xz_crc64(b->out + s->out_start,
262 				b->out_pos - s->out_start, s->crc);
263 #endif
264 
265 	if (ret == XZ_STREAM_END) {
266 		if (s->block_header.compressed != VLI_UNKNOWN
267 				&& s->block_header.compressed
268 					!= s->block.compressed)
269 			return XZ_DATA_ERROR;
270 
271 		if (s->block_header.uncompressed != VLI_UNKNOWN
272 				&& s->block_header.uncompressed
273 					!= s->block.uncompressed)
274 			return XZ_DATA_ERROR;
275 
276 		s->block.hash.unpadded += s->block_header.size
277 				+ s->block.compressed;
278 
279 #ifdef XZ_DEC_ANY_CHECK
280 		s->block.hash.unpadded += check_sizes[s->check_type];
281 #else
282 		if (s->check_type == XZ_CHECK_CRC32)
283 			s->block.hash.unpadded += 4;
284 		else if (IS_CRC64(s->check_type))
285 			s->block.hash.unpadded += 8;
286 #endif
287 
288 		s->block.hash.uncompressed += s->block.uncompressed;
289 		s->block.hash.crc32 = xz_crc32(
290 				(const uint8_t *)&s->block.hash,
291 				sizeof(s->block.hash), s->block.hash.crc32);
292 
293 		++s->block.count;
294 	}
295 
296 	return ret;
297 }
298 
299 /* Update the Index size and the CRC32 value. */
index_update(struct xz_dec * s,const struct xz_buf * b)300 static void index_update(struct xz_dec *s, const struct xz_buf *b)
301 {
302 	size_t in_used = b->in_pos - s->in_start;
303 	s->index.size += in_used;
304 	s->crc = xz_crc32(b->in + s->in_start, in_used, s->crc);
305 }
306 
307 /*
308  * Decode the Number of Records, Unpadded Size, and Uncompressed Size
309  * fields from the Index field. That is, Index Padding and CRC32 are not
310  * decoded by this function.
311  *
312  * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
313  * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
314  */
dec_index(struct xz_dec * s,struct xz_buf * b)315 static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b)
316 {
317 	enum xz_ret ret;
318 
319 	do {
320 		ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
321 		if (ret != XZ_STREAM_END) {
322 			index_update(s, b);
323 			return ret;
324 		}
325 
326 		switch (s->index.sequence) {
327 		case SEQ_INDEX_COUNT:
328 			s->index.count = s->vli;
329 
330 			/*
331 			 * Validate that the Number of Records field
332 			 * indicates the same number of Records as
333 			 * there were Blocks in the Stream.
334 			 */
335 			if (s->index.count != s->block.count)
336 				return XZ_DATA_ERROR;
337 
338 			s->index.sequence = SEQ_INDEX_UNPADDED;
339 			break;
340 
341 		case SEQ_INDEX_UNPADDED:
342 			s->index.hash.unpadded += s->vli;
343 			s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
344 			break;
345 
346 		case SEQ_INDEX_UNCOMPRESSED:
347 			s->index.hash.uncompressed += s->vli;
348 			s->index.hash.crc32 = xz_crc32(
349 					(const uint8_t *)&s->index.hash,
350 					sizeof(s->index.hash),
351 					s->index.hash.crc32);
352 			--s->index.count;
353 			s->index.sequence = SEQ_INDEX_UNPADDED;
354 			break;
355 		}
356 	} while (s->index.count > 0);
357 
358 	return XZ_STREAM_END;
359 }
360 
361 /*
362  * Validate that the next four or eight input bytes match the value
363  * of s->crc. s->pos must be zero when starting to validate the first byte.
364  * The "bits" argument allows using the same code for both CRC32 and CRC64.
365  */
crc_validate(struct xz_dec * s,struct xz_buf * b,uint32_t bits)366 static enum xz_ret crc_validate(struct xz_dec *s, struct xz_buf *b,
367 				uint32_t bits)
368 {
369 	do {
370 		if (b->in_pos == b->in_size)
371 			return XZ_OK;
372 
373 		if (((s->crc >> s->pos) & 0xFF) != b->in[b->in_pos++])
374 			return XZ_DATA_ERROR;
375 
376 		s->pos += 8;
377 
378 	} while (s->pos < bits);
379 
380 	s->crc = 0;
381 	s->pos = 0;
382 
383 	return XZ_STREAM_END;
384 }
385 
386 #ifdef XZ_DEC_ANY_CHECK
387 /*
388  * Skip over the Check field when the Check ID is not supported.
389  * Returns true once the whole Check field has been skipped over.
390  */
check_skip(struct xz_dec * s,struct xz_buf * b)391 static bool check_skip(struct xz_dec *s, struct xz_buf *b)
392 {
393 	while (s->pos < check_sizes[s->check_type]) {
394 		if (b->in_pos == b->in_size)
395 			return false;
396 
397 		++b->in_pos;
398 		++s->pos;
399 	}
400 
401 	s->pos = 0;
402 
403 	return true;
404 }
405 #endif
406 
407 /* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
dec_stream_header(struct xz_dec * s)408 static enum xz_ret dec_stream_header(struct xz_dec *s)
409 {
410 	if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
411 		return XZ_FORMAT_ERROR;
412 
413 	if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
414 			!= get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
415 		return XZ_DATA_ERROR;
416 
417 	if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
418 		return XZ_OPTIONS_ERROR;
419 
420 	/*
421 	 * Of integrity checks, we support none (Check ID = 0),
422 	 * CRC32 (Check ID = 1), and optionally CRC64 (Check ID = 4).
423 	 * However, if XZ_DEC_ANY_CHECK is defined, we will accept other
424 	 * check types too, but then the check won't be verified and
425 	 * a warning (XZ_UNSUPPORTED_CHECK) will be given.
426 	 */
427 	if (s->temp.buf[HEADER_MAGIC_SIZE + 1] > XZ_CHECK_MAX)
428 		return XZ_OPTIONS_ERROR;
429 
430 	s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
431 
432 #ifdef XZ_DEC_ANY_CHECK
433 	if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type))
434 		return XZ_UNSUPPORTED_CHECK;
435 #else
436 	if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type))
437 		return XZ_OPTIONS_ERROR;
438 #endif
439 
440 	return XZ_OK;
441 }
442 
443 /* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
dec_stream_footer(struct xz_dec * s)444 static enum xz_ret dec_stream_footer(struct xz_dec *s)
445 {
446 	if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
447 		return XZ_DATA_ERROR;
448 
449 	if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
450 		return XZ_DATA_ERROR;
451 
452 	/*
453 	 * Validate Backward Size. Note that we never added the size of the
454 	 * Index CRC32 field to s->index.size, thus we use s->index.size / 4
455 	 * instead of s->index.size / 4 - 1.
456 	 */
457 	if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
458 		return XZ_DATA_ERROR;
459 
460 	if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
461 		return XZ_DATA_ERROR;
462 
463 	/*
464 	 * Use XZ_STREAM_END instead of XZ_OK to be more convenient
465 	 * for the caller.
466 	 */
467 	return XZ_STREAM_END;
468 }
469 
470 /* Decode the Block Header and initialize the filter chain. */
dec_block_header(struct xz_dec * s)471 static enum xz_ret dec_block_header(struct xz_dec *s)
472 {
473 	enum xz_ret ret;
474 
475 	/*
476 	 * Validate the CRC32. We know that the temp buffer is at least
477 	 * eight bytes so this is safe.
478 	 */
479 	s->temp.size -= 4;
480 	if (xz_crc32(s->temp.buf, s->temp.size, 0)
481 			!= get_le32(s->temp.buf + s->temp.size))
482 		return XZ_DATA_ERROR;
483 
484 	s->temp.pos = 2;
485 
486 	/*
487 	 * Catch unsupported Block Flags. We support only one or two filters
488 	 * in the chain, so we catch that with the same test.
489 	 */
490 #ifdef XZ_DEC_BCJ
491 	if (s->temp.buf[1] & 0x3E)
492 #else
493 	if (s->temp.buf[1] & 0x3F)
494 #endif
495 		return XZ_OPTIONS_ERROR;
496 
497 	/* Compressed Size */
498 	if (s->temp.buf[1] & 0x40) {
499 		if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
500 					!= XZ_STREAM_END)
501 			return XZ_DATA_ERROR;
502 
503 		s->block_header.compressed = s->vli;
504 	} else {
505 		s->block_header.compressed = VLI_UNKNOWN;
506 	}
507 
508 	/* Uncompressed Size */
509 	if (s->temp.buf[1] & 0x80) {
510 		if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
511 				!= XZ_STREAM_END)
512 			return XZ_DATA_ERROR;
513 
514 		s->block_header.uncompressed = s->vli;
515 	} else {
516 		s->block_header.uncompressed = VLI_UNKNOWN;
517 	}
518 
519 #ifdef XZ_DEC_BCJ
520 	/* If there are two filters, the first one must be a BCJ filter. */
521 	s->bcj_active = s->temp.buf[1] & 0x01;
522 	if (s->bcj_active) {
523 		if (s->temp.size - s->temp.pos < 2)
524 			return XZ_OPTIONS_ERROR;
525 
526 		ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
527 		if (ret != XZ_OK)
528 			return ret;
529 
530 		/*
531 		 * We don't support custom start offset,
532 		 * so Size of Properties must be zero.
533 		 */
534 		if (s->temp.buf[s->temp.pos++] != 0x00)
535 			return XZ_OPTIONS_ERROR;
536 	}
537 #endif
538 
539 	/* Valid Filter Flags always take at least two bytes. */
540 	if (s->temp.size - s->temp.pos < 2)
541 		return XZ_DATA_ERROR;
542 
543 	/* Filter ID = LZMA2 */
544 	if (s->temp.buf[s->temp.pos++] != 0x21)
545 		return XZ_OPTIONS_ERROR;
546 
547 	/* Size of Properties = 1-byte Filter Properties */
548 	if (s->temp.buf[s->temp.pos++] != 0x01)
549 		return XZ_OPTIONS_ERROR;
550 
551 	/* Filter Properties contains LZMA2 dictionary size. */
552 	if (s->temp.size - s->temp.pos < 1)
553 		return XZ_DATA_ERROR;
554 
555 	ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
556 	if (ret != XZ_OK)
557 		return ret;
558 
559 	/* The rest must be Header Padding. */
560 	while (s->temp.pos < s->temp.size)
561 		if (s->temp.buf[s->temp.pos++] != 0x00)
562 			return XZ_OPTIONS_ERROR;
563 
564 	s->temp.pos = 0;
565 	s->block.compressed = 0;
566 	s->block.uncompressed = 0;
567 
568 	return XZ_OK;
569 }
570 
dec_main(struct xz_dec * s,struct xz_buf * b)571 static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
572 {
573 	enum xz_ret ret;
574 
575 	/*
576 	 * Store the start position for the case when we are in the middle
577 	 * of the Index field.
578 	 */
579 	s->in_start = b->in_pos;
580 
581 	while (true) {
582 		switch (s->sequence) {
583 		case SEQ_STREAM_HEADER:
584 			/*
585 			 * Stream Header is copied to s->temp, and then
586 			 * decoded from there. This way if the caller
587 			 * gives us only little input at a time, we can
588 			 * still keep the Stream Header decoding code
589 			 * simple. Similar approach is used in many places
590 			 * in this file.
591 			 */
592 			if (!fill_temp(s, b))
593 				return XZ_OK;
594 
595 			/*
596 			 * If dec_stream_header() returns
597 			 * XZ_UNSUPPORTED_CHECK, it is still possible
598 			 * to continue decoding if working in multi-call
599 			 * mode. Thus, update s->sequence before calling
600 			 * dec_stream_header().
601 			 */
602 			s->sequence = SEQ_BLOCK_START;
603 
604 			ret = dec_stream_header(s);
605 			if (ret != XZ_OK)
606 				return ret;
607 
608 		/* Fall through */
609 
610 		case SEQ_BLOCK_START:
611 			/* We need one byte of input to continue. */
612 			if (b->in_pos == b->in_size)
613 				return XZ_OK;
614 
615 			/* See if this is the beginning of the Index field. */
616 			if (b->in[b->in_pos] == 0) {
617 				s->in_start = b->in_pos++;
618 				s->sequence = SEQ_INDEX;
619 				break;
620 			}
621 
622 			/*
623 			 * Calculate the size of the Block Header and
624 			 * prepare to decode it.
625 			 */
626 			s->block_header.size
627 				= ((uint32_t)b->in[b->in_pos] + 1) * 4;
628 
629 			s->temp.size = s->block_header.size;
630 			s->temp.pos = 0;
631 			s->sequence = SEQ_BLOCK_HEADER;
632 
633 		/* Fall through */
634 
635 		case SEQ_BLOCK_HEADER:
636 			if (!fill_temp(s, b))
637 				return XZ_OK;
638 
639 			ret = dec_block_header(s);
640 			if (ret != XZ_OK)
641 				return ret;
642 
643 			s->sequence = SEQ_BLOCK_UNCOMPRESS;
644 
645 		/* Fall through */
646 
647 		case SEQ_BLOCK_UNCOMPRESS:
648 			ret = dec_block(s, b);
649 			if (ret != XZ_STREAM_END)
650 				return ret;
651 
652 			s->sequence = SEQ_BLOCK_PADDING;
653 
654 		/* Fall through */
655 
656 		case SEQ_BLOCK_PADDING:
657 			/*
658 			 * Size of Compressed Data + Block Padding
659 			 * must be a multiple of four. We don't need
660 			 * s->block.compressed for anything else
661 			 * anymore, so we use it here to test the size
662 			 * of the Block Padding field.
663 			 */
664 			while (s->block.compressed & 3) {
665 				if (b->in_pos == b->in_size)
666 					return XZ_OK;
667 
668 				if (b->in[b->in_pos++] != 0)
669 					return XZ_DATA_ERROR;
670 
671 				++s->block.compressed;
672 			}
673 
674 			s->sequence = SEQ_BLOCK_CHECK;
675 
676 		/* Fall through */
677 
678 		case SEQ_BLOCK_CHECK:
679 			if (s->check_type == XZ_CHECK_CRC32) {
680 				ret = crc_validate(s, b, 32);
681 				if (ret != XZ_STREAM_END)
682 					return ret;
683 			}
684 			else if (IS_CRC64(s->check_type)) {
685 				ret = crc_validate(s, b, 64);
686 				if (ret != XZ_STREAM_END)
687 					return ret;
688 			}
689 #ifdef XZ_DEC_ANY_CHECK
690 			else if (!check_skip(s, b)) {
691 				return XZ_OK;
692 			}
693 #endif
694 
695 			s->sequence = SEQ_BLOCK_START;
696 			break;
697 
698 		case SEQ_INDEX:
699 			ret = dec_index(s, b);
700 			if (ret != XZ_STREAM_END)
701 				return ret;
702 
703 			s->sequence = SEQ_INDEX_PADDING;
704 
705 		/* Fall through */
706 
707 		case SEQ_INDEX_PADDING:
708 			while ((s->index.size + (b->in_pos - s->in_start))
709 					& 3) {
710 				if (b->in_pos == b->in_size) {
711 					index_update(s, b);
712 					return XZ_OK;
713 				}
714 
715 				if (b->in[b->in_pos++] != 0)
716 					return XZ_DATA_ERROR;
717 			}
718 
719 			/* Finish the CRC32 value and Index size. */
720 			index_update(s, b);
721 
722 			/* Compare the hashes to validate the Index field. */
723 			if (!memeq(&s->block.hash, &s->index.hash,
724 					sizeof(s->block.hash)))
725 				return XZ_DATA_ERROR;
726 
727 			s->sequence = SEQ_INDEX_CRC32;
728 
729 		/* Fall through */
730 
731 		case SEQ_INDEX_CRC32:
732 			ret = crc_validate(s, b, 32);
733 			if (ret != XZ_STREAM_END)
734 				return ret;
735 
736 			s->temp.size = STREAM_HEADER_SIZE;
737 			s->sequence = SEQ_STREAM_FOOTER;
738 
739 		/* Fall through */
740 
741 		case SEQ_STREAM_FOOTER:
742 			if (!fill_temp(s, b))
743 				return XZ_OK;
744 
745 			return dec_stream_footer(s);
746 
747 		case SEQ_STREAM_PADDING:
748 			/* Never reached, only silencing a warning */
749 			break;
750 		}
751 	}
752 
753 	/* Never reached */
754 }
755 
756 /*
757  * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
758  * multi-call and single-call decoding.
759  *
760  * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
761  * are not going to make any progress anymore. This is to prevent the caller
762  * from calling us infinitely when the input file is truncated or otherwise
763  * corrupt. Since zlib-style API allows that the caller fills the input buffer
764  * only when the decoder doesn't produce any new output, we have to be careful
765  * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
766  * after the second consecutive call to xz_dec_run() that makes no progress.
767  *
768  * In single-call mode, if we couldn't decode everything and no error
769  * occurred, either the input is truncated or the output buffer is too small.
770  * Since we know that the last input byte never produces any output, we know
771  * that if all the input was consumed and decoding wasn't finished, the file
772  * must be corrupt. Otherwise the output buffer has to be too small or the
773  * file is corrupt in a way that decoding it produces too big output.
774  *
775  * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
776  * their original values. This is because with some filter chains there won't
777  * be any valid uncompressed data in the output buffer unless the decoding
778  * actually succeeds (that's the price to pay of using the output buffer as
779  * the workspace).
780  */
xz_dec_run(struct xz_dec * s,struct xz_buf * b)781 XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
782 {
783 	size_t in_start;
784 	size_t out_start;
785 	enum xz_ret ret;
786 
787 	if (DEC_IS_SINGLE(s->mode))
788 		xz_dec_reset(s);
789 
790 	in_start = b->in_pos;
791 	out_start = b->out_pos;
792 	ret = dec_main(s, b);
793 
794 	if (DEC_IS_SINGLE(s->mode)) {
795 		if (ret == XZ_OK)
796 			ret = b->in_pos == b->in_size
797 					? XZ_DATA_ERROR : XZ_BUF_ERROR;
798 
799 		if (ret != XZ_STREAM_END) {
800 			b->in_pos = in_start;
801 			b->out_pos = out_start;
802 		}
803 
804 	} else if (ret == XZ_OK && in_start == b->in_pos
805 			&& out_start == b->out_pos) {
806 		if (s->allow_buf_error)
807 			ret = XZ_BUF_ERROR;
808 
809 		s->allow_buf_error = true;
810 	} else {
811 		s->allow_buf_error = false;
812 	}
813 
814 	return ret;
815 }
816 
817 #ifdef XZ_DEC_CONCATENATED
xz_dec_catrun(struct xz_dec * s,struct xz_buf * b,int finish)818 XZ_EXTERN enum xz_ret xz_dec_catrun(struct xz_dec *s, struct xz_buf *b,
819 				    int finish)
820 {
821 	enum xz_ret ret;
822 
823 	if (DEC_IS_SINGLE(s->mode)) {
824 		xz_dec_reset(s);
825 		finish = true;
826 	}
827 
828 	while (true) {
829 		if (s->sequence == SEQ_STREAM_PADDING) {
830 			/*
831 			 * Skip Stream Padding. Its size must be a multiple
832 			 * of four bytes which is tracked with s->pos.
833 			 */
834 			while (true) {
835 				if (b->in_pos == b->in_size) {
836 					/*
837 					 * Note that if we are repeatedly
838 					 * given no input and finish is false,
839 					 * we will keep returning XZ_OK even
840 					 * though no progress is being made.
841 					 * The lack of XZ_BUF_ERROR support
842 					 * isn't a problem here because a
843 					 * reasonable caller will eventually
844 					 * provide more input or set finish
845 					 * to true.
846 					 */
847 					if (!finish)
848 						return XZ_OK;
849 
850 					if (s->pos != 0)
851 						return XZ_DATA_ERROR;
852 
853 					return XZ_STREAM_END;
854 				}
855 
856 				if (b->in[b->in_pos] != 0x00) {
857 					if (s->pos != 0)
858 						return XZ_DATA_ERROR;
859 
860 					break;
861 				}
862 
863 				++b->in_pos;
864 				s->pos = (s->pos + 1) & 3;
865 			}
866 
867 			/*
868 			 * More input remains. It should be a new Stream.
869 			 *
870 			 * In single-call mode xz_dec_run() will always call
871 			 * xz_dec_reset(). Thus, we need to do it here only
872 			 * in multi-call mode.
873 			 */
874 			if (DEC_IS_MULTI(s->mode))
875 				xz_dec_reset(s);
876 		}
877 
878 		ret = xz_dec_run(s, b);
879 
880 		if (ret != XZ_STREAM_END)
881 			break;
882 
883 		s->sequence = SEQ_STREAM_PADDING;
884 	}
885 
886 	return ret;
887 }
888 #endif
889 
xz_dec_init(enum xz_mode mode,uint32_t dict_max)890 XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max)
891 {
892 	struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL);
893 	if (s == NULL)
894 		return NULL;
895 
896 	s->mode = mode;
897 
898 #ifdef XZ_DEC_BCJ
899 	s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode));
900 	if (s->bcj == NULL)
901 		goto error_bcj;
902 #endif
903 
904 	s->lzma2 = xz_dec_lzma2_create(mode, dict_max);
905 	if (s->lzma2 == NULL)
906 		goto error_lzma2;
907 
908 	xz_dec_reset(s);
909 	return s;
910 
911 error_lzma2:
912 #ifdef XZ_DEC_BCJ
913 	xz_dec_bcj_end(s->bcj);
914 error_bcj:
915 #endif
916 	kfree(s);
917 	return NULL;
918 }
919 
xz_dec_reset(struct xz_dec * s)920 XZ_EXTERN void xz_dec_reset(struct xz_dec *s)
921 {
922 	s->sequence = SEQ_STREAM_HEADER;
923 	s->allow_buf_error = false;
924 	s->pos = 0;
925 	s->crc = 0;
926 	memzero(&s->block, sizeof(s->block));
927 	memzero(&s->index, sizeof(s->index));
928 	s->temp.pos = 0;
929 	s->temp.size = STREAM_HEADER_SIZE;
930 }
931 
xz_dec_end(struct xz_dec * s)932 XZ_EXTERN void xz_dec_end(struct xz_dec *s)
933 {
934 	if (s != NULL) {
935 		xz_dec_lzma2_end(s->lzma2);
936 #ifdef XZ_DEC_BCJ
937 		xz_dec_bcj_end(s->bcj);
938 #endif
939 		kfree(s);
940 	}
941 }
942