• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! The block decompression algorithm.
2 
3 use crate::block::DecompressError;
4 use crate::block::MINMATCH;
5 use crate::sink::Sink;
6 use crate::sink::SliceSink;
7 
8 #[allow(unused_imports)]
9 use alloc::vec;
10 #[allow(unused_imports)]
11 use alloc::vec::Vec;
12 
13 /// Read an integer.
14 ///
15 /// In LZ4, we encode small integers in a way that we can have an arbitrary number of bytes. In
16 /// particular, we add the bytes repeatedly until we hit a non-0xFF byte. When we do, we add
17 /// this byte to our sum and terminate the loop.
18 ///
19 /// # Example
20 ///
21 /// ```notest
22 ///     255, 255, 255, 4, 2, 3, 4, 6, 7
23 /// ```
24 ///
25 /// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because
26 /// 4 is the first non-0xFF byte.
27 #[inline]
read_integer(input: &[u8], input_pos: &mut usize) -> Result<u32, DecompressError>28 fn read_integer(input: &[u8], input_pos: &mut usize) -> Result<u32, DecompressError> {
29     // We start at zero and count upwards.
30     let mut n: u32 = 0;
31     // If this byte takes value 255 (the maximum value it can take), another byte is read
32     // and added to the sum. This repeats until a byte lower than 255 is read.
33     loop {
34         // We add the next byte until we get a byte which we add to the counting variable.
35         let extra: u8 = *input
36             .get(*input_pos)
37             .ok_or(DecompressError::ExpectedAnotherByte)?;
38         *input_pos += 1;
39         n += extra as u32;
40 
41         // We continue if we got 255, break otherwise.
42         if extra != 0xFF {
43             break;
44         }
45     }
46 
47     // 255, 255, 255, 8
48     // 111, 111, 111, 101
49 
50     Ok(n)
51 }
52 
53 /// Read a little-endian 16-bit integer from the input stream.
54 #[inline]
read_u16(input: &[u8], input_pos: &mut usize) -> Result<u16, DecompressError>55 fn read_u16(input: &[u8], input_pos: &mut usize) -> Result<u16, DecompressError> {
56     let dst = input
57         .get(*input_pos..*input_pos + 2)
58         .ok_or(DecompressError::ExpectedAnotherByte)?;
59     *input_pos += 2;
60     Ok(u16::from_le_bytes(dst.try_into().unwrap()))
61 }
62 
63 const FIT_TOKEN_MASK_LITERAL: u8 = 0b00001111;
64 const FIT_TOKEN_MASK_MATCH: u8 = 0b11110000;
65 
66 #[test]
check_token()67 fn check_token() {
68     assert!(!does_token_fit(15));
69     assert!(does_token_fit(14));
70     assert!(does_token_fit(114));
71     assert!(!does_token_fit(0b11110000));
72     assert!(does_token_fit(0b10110000));
73 }
74 
75 /// The token consists of two parts, the literal length (upper 4 bits) and match_length (lower 4
76 /// bits) if the literal length and match_length are both below 15, we don't need to read additional
77 /// data, so the token does fit the metadata.
78 #[inline]
does_token_fit(token: u8) -> bool79 fn does_token_fit(token: u8) -> bool {
80     !((token & FIT_TOKEN_MASK_LITERAL) == FIT_TOKEN_MASK_LITERAL
81         || (token & FIT_TOKEN_MASK_MATCH) == FIT_TOKEN_MASK_MATCH)
82 }
83 
84 /// Decompress all bytes of `input` into `output`.
85 ///
86 /// Returns the number of bytes written (decompressed) into `output`.
87 #[inline(always)] // (always) necessary to get the best performance in non LTO builds
decompress_internal<const USE_DICT: bool, S: Sink>( input: &[u8], output: &mut S, ext_dict: &[u8], ) -> Result<usize, DecompressError>88 pub(crate) fn decompress_internal<const USE_DICT: bool, S: Sink>(
89     input: &[u8],
90     output: &mut S,
91     ext_dict: &[u8],
92 ) -> Result<usize, DecompressError> {
93     let mut input_pos = 0;
94     let initial_output_pos = output.pos();
95 
96     let safe_input_pos = input
97         .len()
98         .saturating_sub(16 /* literal copy */ +  2 /* u16 match offset */);
99     let mut safe_output_pos = output
100         .capacity()
101         .saturating_sub(16 /* literal copy */ + 18 /* match copy */);
102 
103     if USE_DICT {
104         // In the dictionary case the output pointer is moved by the match length in the dictionary.
105         // This may be up to 17 bytes without exiting the loop. So we need to ensure that we have
106         // at least additional 17 bytes of space left in the output buffer in the fast loop.
107         safe_output_pos = safe_output_pos.saturating_sub(17);
108     };
109 
110     // Exhaust the decoder by reading and decompressing all blocks until the remaining buffer is
111     // empty.
112     loop {
113         // Read the token. The token is the first byte in a block. It is divided into two 4-bit
114         // subtokens, the higher and the lower.
115         // This token contains to 4-bit "fields", a higher and a lower, representing the literals'
116         // length and the back reference's length, respectively.
117         let token = *input
118             .get(input_pos)
119             .ok_or(DecompressError::ExpectedAnotherByte)?;
120         input_pos += 1;
121 
122         // Checking for hot-loop.
123         // In most cases the metadata does fit in a single 1byte token (statistically) and we are in
124         // a safe-distance to the end. This enables some optimized handling.
125         //
126         // Ideally we want to check for safe output pos like: output.pos() <= safe_output_pos; But
127         // that doesn't work when the safe_output_pos is 0 due to saturated_sub. So we use
128         // `<` instead of `<=`, which covers that case.
129         if does_token_fit(token) && input_pos <= safe_input_pos && output.pos() < safe_output_pos {
130             let literal_length = (token >> 4) as usize;
131 
132             // casting to [u8;u16] doesn't seem to make a difference vs &[u8] (same assembly)
133             let input: &[u8; 16] = input[input_pos..input_pos + 16].try_into().unwrap();
134 
135             // Copy the literal
136             // The literal is at max 14 bytes, and the is_safe_distance check assures
137             // that we are far away enough from the end so we can safely copy 16 bytes
138             output.extend_from_slice_wild(input, literal_length);
139             input_pos += literal_length;
140 
141             // clone as we don't want to mutate
142             let offset = read_u16(input, &mut literal_length.clone())? as usize;
143             input_pos += 2;
144 
145             let mut match_length = MINMATCH + (token & 0xF) as usize;
146 
147             if USE_DICT && offset > output.pos() {
148                 let copied = copy_from_dict(output, ext_dict, offset, match_length)?;
149                 if copied == match_length {
150                     continue;
151                 }
152                 // match crosses ext_dict and output, offset is still correct as output pos
153                 // increased
154                 match_length -= copied;
155             }
156 
157             // In this branch we know that match_length is at most 18 (14 + MINMATCH).
158             // But the blocks can overlap, so make sure they are at least 18 bytes apart
159             // to enable an optimized copy of 18 bytes.
160             let start = output.pos().saturating_sub(offset);
161             if offset >= match_length {
162                 output.extend_from_within(start, 18, match_length);
163             } else {
164                 output.extend_from_within_overlapping(start, match_length)
165             }
166 
167             continue;
168         }
169 
170         // Now, we read the literals section.
171         // Literal Section
172         // If the initial value is 15, it is indicated that another byte will be read and added to
173         // it
174         let mut literal_length = (token >> 4) as usize;
175         if literal_length != 0 {
176             if literal_length == 15 {
177                 // The literal_length length took the maximal value, indicating that there is more
178                 // than 15 literal_length bytes. We read the extra integer.
179                 literal_length += read_integer(input, &mut input_pos)? as usize;
180             }
181 
182             if literal_length > input.len() - input_pos {
183                 return Err(DecompressError::LiteralOutOfBounds);
184             }
185             #[cfg(not(feature = "unchecked-decode"))]
186             if literal_length > output.capacity() - output.pos() {
187                 return Err(DecompressError::OutputTooSmall {
188                     expected: output.pos() + literal_length,
189                     actual: output.capacity(),
190                 });
191             }
192             output.extend_from_slice(&input[input_pos..input_pos + literal_length]);
193             input_pos += literal_length;
194         }
195 
196         // If the input stream is emptied, we break out of the loop. This is only the case
197         // in the end of the stream, since the block is intact otherwise.
198         if input_pos >= input.len() {
199             break;
200         }
201 
202         let offset = read_u16(input, &mut input_pos)? as usize;
203         // Obtain the initial match length. The match length is the length of the duplicate segment
204         // which will later be copied from data previously decompressed into the output buffer. The
205         // initial length is derived from the second part of the token (the lower nibble), we read
206         // earlier. Since having a match length of less than 4 would mean negative compression
207         // ratio, we start at 4 (MINMATCH).
208 
209         // The initial match length can maximally be 19. As with the literal length, this indicates
210         // that there are more bytes to read.
211         let mut match_length = MINMATCH + (token & 0xF) as usize;
212         if match_length == MINMATCH + 15 {
213             // The match length took the maximal value, indicating that there is more bytes. We
214             // read the extra integer.
215             match_length += read_integer(input, &mut input_pos)? as usize;
216         }
217 
218         #[cfg(not(feature = "unchecked-decode"))]
219         if output.pos() + match_length > output.capacity() {
220             return Err(DecompressError::OutputTooSmall {
221                 expected: output.pos() + match_length,
222                 actual: output.capacity(),
223             });
224         }
225         if USE_DICT && offset > output.pos() {
226             let copied = copy_from_dict(output, ext_dict, offset, match_length)?;
227             if copied == match_length {
228                 continue;
229             }
230             // match crosses ext_dict and output, offset is still correct as output_len was
231             // increased
232             match_length -= copied;
233         }
234         // We now copy from the already decompressed buffer. This allows us for storing duplicates
235         // by simply referencing the other location.
236         duplicate_slice(output, offset, match_length)?;
237     }
238     Ok(output.pos() - initial_output_pos)
239 }
240 
241 #[inline]
copy_from_dict( output: &mut impl Sink, ext_dict: &[u8], offset: usize, match_length: usize, ) -> Result<usize, DecompressError>242 fn copy_from_dict(
243     output: &mut impl Sink,
244     ext_dict: &[u8],
245     offset: usize,
246     match_length: usize,
247 ) -> Result<usize, DecompressError> {
248     // If we're here we know offset > output.pos
249     debug_assert!(offset > output.pos());
250     let (dict_offset, did_overflow) = ext_dict.len().overflowing_sub(offset - output.pos());
251     if did_overflow {
252         return Err(DecompressError::OffsetOutOfBounds);
253     }
254     // Can't copy past ext_dict len, the match may cross dict and output
255     let dict_match_length = match_length.min(ext_dict.len() - dict_offset);
256     let ext_match = &ext_dict[dict_offset..dict_offset + dict_match_length];
257     output.extend_from_slice(ext_match);
258     Ok(dict_match_length)
259 }
260 
261 /// Extends output by self-referential copies
262 #[inline(always)] // (always) necessary otherwise compiler fails to inline it
duplicate_slice( output: &mut impl Sink, offset: usize, match_length: usize, ) -> Result<(), DecompressError>263 fn duplicate_slice(
264     output: &mut impl Sink,
265     offset: usize,
266     match_length: usize,
267 ) -> Result<(), DecompressError> {
268     // This function assumes output will fit match_length, it might panic otherwise.
269     if match_length > offset {
270         duplicate_overlapping_slice(output, offset, match_length)?;
271     } else {
272         let (start, did_overflow) = output.pos().overflowing_sub(offset);
273         if did_overflow {
274             return Err(DecompressError::OffsetOutOfBounds);
275         }
276 
277         match match_length {
278             0..=32 if output.pos() + 32 <= output.capacity() => {
279                 output.extend_from_within(start, 32, match_length)
280             }
281             33..=64 if output.pos() + 64 <= output.capacity() => {
282                 output.extend_from_within(start, 64, match_length)
283             }
284             _ => output.extend_from_within(start, match_length, match_length),
285         }
286     }
287     Ok(())
288 }
289 
290 /// self-referential copy for the case data start (end of output - offset) + match_length overlaps
291 /// into output
292 #[inline]
duplicate_overlapping_slice( sink: &mut impl Sink, offset: usize, match_length: usize, ) -> Result<(), DecompressError>293 fn duplicate_overlapping_slice(
294     sink: &mut impl Sink,
295     offset: usize,
296     match_length: usize,
297 ) -> Result<(), DecompressError> {
298     // This function assumes output will fit match_length, it might panic otherwise.
299     let (start, did_overflow) = sink.pos().overflowing_sub(offset);
300     if did_overflow {
301         return Err(DecompressError::OffsetOutOfBounds);
302     }
303     if offset == 1 {
304         let val = sink.byte_at(start);
305         sink.extend_with_fill(val, match_length);
306     } else {
307         sink.extend_from_within_overlapping(start, match_length);
308     }
309     Ok(())
310 }
311 
312 /// Decompress all bytes of `input` into `output`.
313 /// `output` should be preallocated with a size of of the uncompressed data.
314 #[inline]
decompress_into(input: &[u8], output: &mut [u8]) -> Result<usize, DecompressError>315 pub fn decompress_into(input: &[u8], output: &mut [u8]) -> Result<usize, DecompressError> {
316     decompress_internal::<false, _>(input, &mut SliceSink::new(output, 0), b"")
317 }
318 
319 /// Decompress all bytes of `input` into `output`.
320 ///
321 /// Returns the number of bytes written (decompressed) into `output`.
322 #[inline]
decompress_into_with_dict( input: &[u8], output: &mut [u8], ext_dict: &[u8], ) -> Result<usize, DecompressError>323 pub fn decompress_into_with_dict(
324     input: &[u8],
325     output: &mut [u8],
326     ext_dict: &[u8],
327 ) -> Result<usize, DecompressError> {
328     decompress_internal::<true, _>(input, &mut SliceSink::new(output, 0), ext_dict)
329 }
330 
331 /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
332 /// litte endian. Can be used in conjunction with `compress_prepend_size`
333 #[inline]
decompress_size_prepended(input: &[u8]) -> Result<Vec<u8>, DecompressError>334 pub fn decompress_size_prepended(input: &[u8]) -> Result<Vec<u8>, DecompressError> {
335     let (uncompressed_size, input) = super::uncompressed_size(input)?;
336     decompress(input, uncompressed_size)
337 }
338 
339 /// Decompress all bytes of `input` into a new vec.
340 /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
341 ///
342 /// # Panics
343 /// May panic if the parameter `min_uncompressed_size` is smaller than the
344 /// uncompressed data.
345 #[inline]
decompress(input: &[u8], min_uncompressed_size: usize) -> Result<Vec<u8>, DecompressError>346 pub fn decompress(input: &[u8], min_uncompressed_size: usize) -> Result<Vec<u8>, DecompressError> {
347     let mut decompressed: Vec<u8> = vec![0; min_uncompressed_size];
348     let decomp_len =
349         decompress_internal::<false, _>(input, &mut SliceSink::new(&mut decompressed, 0), b"")?;
350     decompressed.truncate(decomp_len);
351     Ok(decompressed)
352 }
353 
354 /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
355 /// little endian. Can be used in conjunction with `compress_prepend_size_with_dict`
356 #[inline]
decompress_size_prepended_with_dict( input: &[u8], ext_dict: &[u8], ) -> Result<Vec<u8>, DecompressError>357 pub fn decompress_size_prepended_with_dict(
358     input: &[u8],
359     ext_dict: &[u8],
360 ) -> Result<Vec<u8>, DecompressError> {
361     let (uncompressed_size, input) = super::uncompressed_size(input)?;
362     decompress_with_dict(input, uncompressed_size, ext_dict)
363 }
364 
365 /// Decompress all bytes of `input` into a new vec.
366 /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
367 ///
368 /// # Panics
369 /// May panic if the parameter `min_uncompressed_size` is smaller than the
370 /// uncompressed data.
371 #[inline]
decompress_with_dict( input: &[u8], min_uncompressed_size: usize, ext_dict: &[u8], ) -> Result<Vec<u8>, DecompressError>372 pub fn decompress_with_dict(
373     input: &[u8],
374     min_uncompressed_size: usize,
375     ext_dict: &[u8],
376 ) -> Result<Vec<u8>, DecompressError> {
377     let mut decompressed: Vec<u8> = vec![0; min_uncompressed_size];
378     let decomp_len =
379         decompress_internal::<true, _>(input, &mut SliceSink::new(&mut decompressed, 0), ext_dict)?;
380     decompressed.truncate(decomp_len);
381     Ok(decompressed)
382 }
383 
384 #[cfg(test)]
385 mod test {
386     use super::*;
387 
388     #[test]
all_literal()389     fn all_literal() {
390         assert_eq!(decompress(&[0x30, b'a', b'4', b'9'], 3).unwrap(), b"a49");
391     }
392 
393     // this error test is only valid in safe-decode.
394     #[cfg(feature = "safe-decode")]
395     #[test]
offset_oob()396     fn offset_oob() {
397         decompress(&[0x10, b'a', 2, 0], 4).unwrap_err();
398         decompress(&[0x40, b'a', 1, 0], 4).unwrap_err();
399     }
400 }
401