1 //! The block decompression algorithm.
2
3 use crate::block::DecompressError;
4 use crate::block::MINMATCH;
5 use crate::sink::Sink;
6 use crate::sink::SliceSink;
7
8 #[allow(unused_imports)]
9 use alloc::vec;
10 #[allow(unused_imports)]
11 use alloc::vec::Vec;
12
13 /// Read an integer.
14 ///
15 /// In LZ4, we encode small integers in a way that we can have an arbitrary number of bytes. In
16 /// particular, we add the bytes repeatedly until we hit a non-0xFF byte. When we do, we add
17 /// this byte to our sum and terminate the loop.
18 ///
19 /// # Example
20 ///
21 /// ```notest
22 /// 255, 255, 255, 4, 2, 3, 4, 6, 7
23 /// ```
24 ///
25 /// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because
26 /// 4 is the first non-0xFF byte.
27 #[inline]
read_integer(input: &[u8], input_pos: &mut usize) -> Result<u32, DecompressError>28 fn read_integer(input: &[u8], input_pos: &mut usize) -> Result<u32, DecompressError> {
29 // We start at zero and count upwards.
30 let mut n: u32 = 0;
31 // If this byte takes value 255 (the maximum value it can take), another byte is read
32 // and added to the sum. This repeats until a byte lower than 255 is read.
33 loop {
34 // We add the next byte until we get a byte which we add to the counting variable.
35 let extra: u8 = *input
36 .get(*input_pos)
37 .ok_or(DecompressError::ExpectedAnotherByte)?;
38 *input_pos += 1;
39 n += extra as u32;
40
41 // We continue if we got 255, break otherwise.
42 if extra != 0xFF {
43 break;
44 }
45 }
46
47 // 255, 255, 255, 8
48 // 111, 111, 111, 101
49
50 Ok(n)
51 }
52
53 /// Read a little-endian 16-bit integer from the input stream.
54 #[inline]
read_u16(input: &[u8], input_pos: &mut usize) -> Result<u16, DecompressError>55 fn read_u16(input: &[u8], input_pos: &mut usize) -> Result<u16, DecompressError> {
56 let dst = input
57 .get(*input_pos..*input_pos + 2)
58 .ok_or(DecompressError::ExpectedAnotherByte)?;
59 *input_pos += 2;
60 Ok(u16::from_le_bytes(dst.try_into().unwrap()))
61 }
62
63 const FIT_TOKEN_MASK_LITERAL: u8 = 0b00001111;
64 const FIT_TOKEN_MASK_MATCH: u8 = 0b11110000;
65
66 #[test]
check_token()67 fn check_token() {
68 assert!(!does_token_fit(15));
69 assert!(does_token_fit(14));
70 assert!(does_token_fit(114));
71 assert!(!does_token_fit(0b11110000));
72 assert!(does_token_fit(0b10110000));
73 }
74
75 /// The token consists of two parts, the literal length (upper 4 bits) and match_length (lower 4
76 /// bits) if the literal length and match_length are both below 15, we don't need to read additional
77 /// data, so the token does fit the metadata.
78 #[inline]
does_token_fit(token: u8) -> bool79 fn does_token_fit(token: u8) -> bool {
80 !((token & FIT_TOKEN_MASK_LITERAL) == FIT_TOKEN_MASK_LITERAL
81 || (token & FIT_TOKEN_MASK_MATCH) == FIT_TOKEN_MASK_MATCH)
82 }
83
84 /// Decompress all bytes of `input` into `output`.
85 ///
86 /// Returns the number of bytes written (decompressed) into `output`.
87 #[inline(always)] // (always) necessary to get the best performance in non LTO builds
decompress_internal<const USE_DICT: bool, S: Sink>( input: &[u8], output: &mut S, ext_dict: &[u8], ) -> Result<usize, DecompressError>88 pub(crate) fn decompress_internal<const USE_DICT: bool, S: Sink>(
89 input: &[u8],
90 output: &mut S,
91 ext_dict: &[u8],
92 ) -> Result<usize, DecompressError> {
93 let mut input_pos = 0;
94 let initial_output_pos = output.pos();
95
96 let safe_input_pos = input
97 .len()
98 .saturating_sub(16 /* literal copy */ + 2 /* u16 match offset */);
99 let mut safe_output_pos = output
100 .capacity()
101 .saturating_sub(16 /* literal copy */ + 18 /* match copy */);
102
103 if USE_DICT {
104 // In the dictionary case the output pointer is moved by the match length in the dictionary.
105 // This may be up to 17 bytes without exiting the loop. So we need to ensure that we have
106 // at least additional 17 bytes of space left in the output buffer in the fast loop.
107 safe_output_pos = safe_output_pos.saturating_sub(17);
108 };
109
110 // Exhaust the decoder by reading and decompressing all blocks until the remaining buffer is
111 // empty.
112 loop {
113 // Read the token. The token is the first byte in a block. It is divided into two 4-bit
114 // subtokens, the higher and the lower.
115 // This token contains to 4-bit "fields", a higher and a lower, representing the literals'
116 // length and the back reference's length, respectively.
117 let token = *input
118 .get(input_pos)
119 .ok_or(DecompressError::ExpectedAnotherByte)?;
120 input_pos += 1;
121
122 // Checking for hot-loop.
123 // In most cases the metadata does fit in a single 1byte token (statistically) and we are in
124 // a safe-distance to the end. This enables some optimized handling.
125 //
126 // Ideally we want to check for safe output pos like: output.pos() <= safe_output_pos; But
127 // that doesn't work when the safe_output_pos is 0 due to saturated_sub. So we use
128 // `<` instead of `<=`, which covers that case.
129 if does_token_fit(token) && input_pos <= safe_input_pos && output.pos() < safe_output_pos {
130 let literal_length = (token >> 4) as usize;
131
132 // casting to [u8;u16] doesn't seem to make a difference vs &[u8] (same assembly)
133 let input: &[u8; 16] = input[input_pos..input_pos + 16].try_into().unwrap();
134
135 // Copy the literal
136 // The literal is at max 14 bytes, and the is_safe_distance check assures
137 // that we are far away enough from the end so we can safely copy 16 bytes
138 output.extend_from_slice_wild(input, literal_length);
139 input_pos += literal_length;
140
141 // clone as we don't want to mutate
142 let offset = read_u16(input, &mut literal_length.clone())? as usize;
143 input_pos += 2;
144
145 let mut match_length = MINMATCH + (token & 0xF) as usize;
146
147 if USE_DICT && offset > output.pos() {
148 let copied = copy_from_dict(output, ext_dict, offset, match_length)?;
149 if copied == match_length {
150 continue;
151 }
152 // match crosses ext_dict and output, offset is still correct as output pos
153 // increased
154 match_length -= copied;
155 }
156
157 // In this branch we know that match_length is at most 18 (14 + MINMATCH).
158 // But the blocks can overlap, so make sure they are at least 18 bytes apart
159 // to enable an optimized copy of 18 bytes.
160 let start = output.pos().saturating_sub(offset);
161 if offset >= match_length {
162 output.extend_from_within(start, 18, match_length);
163 } else {
164 output.extend_from_within_overlapping(start, match_length)
165 }
166
167 continue;
168 }
169
170 // Now, we read the literals section.
171 // Literal Section
172 // If the initial value is 15, it is indicated that another byte will be read and added to
173 // it
174 let mut literal_length = (token >> 4) as usize;
175 if literal_length != 0 {
176 if literal_length == 15 {
177 // The literal_length length took the maximal value, indicating that there is more
178 // than 15 literal_length bytes. We read the extra integer.
179 literal_length += read_integer(input, &mut input_pos)? as usize;
180 }
181
182 if literal_length > input.len() - input_pos {
183 return Err(DecompressError::LiteralOutOfBounds);
184 }
185 #[cfg(not(feature = "unchecked-decode"))]
186 if literal_length > output.capacity() - output.pos() {
187 return Err(DecompressError::OutputTooSmall {
188 expected: output.pos() + literal_length,
189 actual: output.capacity(),
190 });
191 }
192 output.extend_from_slice(&input[input_pos..input_pos + literal_length]);
193 input_pos += literal_length;
194 }
195
196 // If the input stream is emptied, we break out of the loop. This is only the case
197 // in the end of the stream, since the block is intact otherwise.
198 if input_pos >= input.len() {
199 break;
200 }
201
202 let offset = read_u16(input, &mut input_pos)? as usize;
203 // Obtain the initial match length. The match length is the length of the duplicate segment
204 // which will later be copied from data previously decompressed into the output buffer. The
205 // initial length is derived from the second part of the token (the lower nibble), we read
206 // earlier. Since having a match length of less than 4 would mean negative compression
207 // ratio, we start at 4 (MINMATCH).
208
209 // The initial match length can maximally be 19. As with the literal length, this indicates
210 // that there are more bytes to read.
211 let mut match_length = MINMATCH + (token & 0xF) as usize;
212 if match_length == MINMATCH + 15 {
213 // The match length took the maximal value, indicating that there is more bytes. We
214 // read the extra integer.
215 match_length += read_integer(input, &mut input_pos)? as usize;
216 }
217
218 #[cfg(not(feature = "unchecked-decode"))]
219 if output.pos() + match_length > output.capacity() {
220 return Err(DecompressError::OutputTooSmall {
221 expected: output.pos() + match_length,
222 actual: output.capacity(),
223 });
224 }
225 if USE_DICT && offset > output.pos() {
226 let copied = copy_from_dict(output, ext_dict, offset, match_length)?;
227 if copied == match_length {
228 continue;
229 }
230 // match crosses ext_dict and output, offset is still correct as output_len was
231 // increased
232 match_length -= copied;
233 }
234 // We now copy from the already decompressed buffer. This allows us for storing duplicates
235 // by simply referencing the other location.
236 duplicate_slice(output, offset, match_length)?;
237 }
238 Ok(output.pos() - initial_output_pos)
239 }
240
241 #[inline]
copy_from_dict( output: &mut impl Sink, ext_dict: &[u8], offset: usize, match_length: usize, ) -> Result<usize, DecompressError>242 fn copy_from_dict(
243 output: &mut impl Sink,
244 ext_dict: &[u8],
245 offset: usize,
246 match_length: usize,
247 ) -> Result<usize, DecompressError> {
248 // If we're here we know offset > output.pos
249 debug_assert!(offset > output.pos());
250 let (dict_offset, did_overflow) = ext_dict.len().overflowing_sub(offset - output.pos());
251 if did_overflow {
252 return Err(DecompressError::OffsetOutOfBounds);
253 }
254 // Can't copy past ext_dict len, the match may cross dict and output
255 let dict_match_length = match_length.min(ext_dict.len() - dict_offset);
256 let ext_match = &ext_dict[dict_offset..dict_offset + dict_match_length];
257 output.extend_from_slice(ext_match);
258 Ok(dict_match_length)
259 }
260
261 /// Extends output by self-referential copies
262 #[inline(always)] // (always) necessary otherwise compiler fails to inline it
duplicate_slice( output: &mut impl Sink, offset: usize, match_length: usize, ) -> Result<(), DecompressError>263 fn duplicate_slice(
264 output: &mut impl Sink,
265 offset: usize,
266 match_length: usize,
267 ) -> Result<(), DecompressError> {
268 // This function assumes output will fit match_length, it might panic otherwise.
269 if match_length > offset {
270 duplicate_overlapping_slice(output, offset, match_length)?;
271 } else {
272 let (start, did_overflow) = output.pos().overflowing_sub(offset);
273 if did_overflow {
274 return Err(DecompressError::OffsetOutOfBounds);
275 }
276
277 match match_length {
278 0..=32 if output.pos() + 32 <= output.capacity() => {
279 output.extend_from_within(start, 32, match_length)
280 }
281 33..=64 if output.pos() + 64 <= output.capacity() => {
282 output.extend_from_within(start, 64, match_length)
283 }
284 _ => output.extend_from_within(start, match_length, match_length),
285 }
286 }
287 Ok(())
288 }
289
290 /// self-referential copy for the case data start (end of output - offset) + match_length overlaps
291 /// into output
292 #[inline]
duplicate_overlapping_slice( sink: &mut impl Sink, offset: usize, match_length: usize, ) -> Result<(), DecompressError>293 fn duplicate_overlapping_slice(
294 sink: &mut impl Sink,
295 offset: usize,
296 match_length: usize,
297 ) -> Result<(), DecompressError> {
298 // This function assumes output will fit match_length, it might panic otherwise.
299 let (start, did_overflow) = sink.pos().overflowing_sub(offset);
300 if did_overflow {
301 return Err(DecompressError::OffsetOutOfBounds);
302 }
303 if offset == 1 {
304 let val = sink.byte_at(start);
305 sink.extend_with_fill(val, match_length);
306 } else {
307 sink.extend_from_within_overlapping(start, match_length);
308 }
309 Ok(())
310 }
311
312 /// Decompress all bytes of `input` into `output`.
313 /// `output` should be preallocated with a size of of the uncompressed data.
314 #[inline]
decompress_into(input: &[u8], output: &mut [u8]) -> Result<usize, DecompressError>315 pub fn decompress_into(input: &[u8], output: &mut [u8]) -> Result<usize, DecompressError> {
316 decompress_internal::<false, _>(input, &mut SliceSink::new(output, 0), b"")
317 }
318
319 /// Decompress all bytes of `input` into `output`.
320 ///
321 /// Returns the number of bytes written (decompressed) into `output`.
322 #[inline]
decompress_into_with_dict( input: &[u8], output: &mut [u8], ext_dict: &[u8], ) -> Result<usize, DecompressError>323 pub fn decompress_into_with_dict(
324 input: &[u8],
325 output: &mut [u8],
326 ext_dict: &[u8],
327 ) -> Result<usize, DecompressError> {
328 decompress_internal::<true, _>(input, &mut SliceSink::new(output, 0), ext_dict)
329 }
330
331 /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
332 /// litte endian. Can be used in conjunction with `compress_prepend_size`
333 #[inline]
decompress_size_prepended(input: &[u8]) -> Result<Vec<u8>, DecompressError>334 pub fn decompress_size_prepended(input: &[u8]) -> Result<Vec<u8>, DecompressError> {
335 let (uncompressed_size, input) = super::uncompressed_size(input)?;
336 decompress(input, uncompressed_size)
337 }
338
339 /// Decompress all bytes of `input` into a new vec.
340 /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
341 ///
342 /// # Panics
343 /// May panic if the parameter `min_uncompressed_size` is smaller than the
344 /// uncompressed data.
345 #[inline]
decompress(input: &[u8], min_uncompressed_size: usize) -> Result<Vec<u8>, DecompressError>346 pub fn decompress(input: &[u8], min_uncompressed_size: usize) -> Result<Vec<u8>, DecompressError> {
347 let mut decompressed: Vec<u8> = vec![0; min_uncompressed_size];
348 let decomp_len =
349 decompress_internal::<false, _>(input, &mut SliceSink::new(&mut decompressed, 0), b"")?;
350 decompressed.truncate(decomp_len);
351 Ok(decompressed)
352 }
353
354 /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in
355 /// little endian. Can be used in conjunction with `compress_prepend_size_with_dict`
356 #[inline]
decompress_size_prepended_with_dict( input: &[u8], ext_dict: &[u8], ) -> Result<Vec<u8>, DecompressError>357 pub fn decompress_size_prepended_with_dict(
358 input: &[u8],
359 ext_dict: &[u8],
360 ) -> Result<Vec<u8>, DecompressError> {
361 let (uncompressed_size, input) = super::uncompressed_size(input)?;
362 decompress_with_dict(input, uncompressed_size, ext_dict)
363 }
364
365 /// Decompress all bytes of `input` into a new vec.
366 /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size.
367 ///
368 /// # Panics
369 /// May panic if the parameter `min_uncompressed_size` is smaller than the
370 /// uncompressed data.
371 #[inline]
decompress_with_dict( input: &[u8], min_uncompressed_size: usize, ext_dict: &[u8], ) -> Result<Vec<u8>, DecompressError>372 pub fn decompress_with_dict(
373 input: &[u8],
374 min_uncompressed_size: usize,
375 ext_dict: &[u8],
376 ) -> Result<Vec<u8>, DecompressError> {
377 let mut decompressed: Vec<u8> = vec![0; min_uncompressed_size];
378 let decomp_len =
379 decompress_internal::<true, _>(input, &mut SliceSink::new(&mut decompressed, 0), ext_dict)?;
380 decompressed.truncate(decomp_len);
381 Ok(decompressed)
382 }
383
384 #[cfg(test)]
385 mod test {
386 use super::*;
387
388 #[test]
all_literal()389 fn all_literal() {
390 assert_eq!(decompress(&[0x30, b'a', b'4', b'9'], 3).unwrap(), b"a49");
391 }
392
393 // this error test is only valid in safe-decode.
394 #[cfg(feature = "safe-decode")]
395 #[test]
offset_oob()396 fn offset_oob() {
397 decompress(&[0x10, b'a', 2, 0], 4).unwrap_err();
398 decompress(&[0x40, b'a', 1, 0], 4).unwrap_err();
399 }
400 }
401