1 //! The compression algorithm.
2 //!
3 //! We make use of hash tables to find duplicates. This gives a reasonable compression ratio with a
4 //! high performance. It has fixed memory usage, which contrary to other approachs, makes it less
5 //! memory hungry.
6
7 use crate::block::hashtable::HashTable;
8 use crate::block::END_OFFSET;
9 use crate::block::LZ4_MIN_LENGTH;
10 use crate::block::MAX_DISTANCE;
11 use crate::block::MFLIMIT;
12 use crate::block::MINMATCH;
13 #[cfg(not(feature = "safe-encode"))]
14 use crate::sink::PtrSink;
15 use crate::sink::Sink;
16 use crate::sink::SliceSink;
17 #[allow(unused_imports)]
18 use alloc::vec;
19
20 #[allow(unused_imports)]
21 use alloc::vec::Vec;
22
23 use super::hashtable::HashTable4K;
24 use super::hashtable::HashTable4KU16;
25 use super::{CompressError, WINDOW_SIZE};
26
27 /// Increase step size after 1<<INCREASE_STEPSIZE_BITSHIFT non matches
28 const INCREASE_STEPSIZE_BITSHIFT: usize = 5;
29
30 /// Read a 4-byte "batch" from some position.
31 ///
32 /// This will read a native-endian 4-byte integer from some position.
33 #[inline]
34 #[cfg(not(feature = "safe-encode"))]
get_batch(input: &[u8], n: usize) -> u3235 pub(super) fn get_batch(input: &[u8], n: usize) -> u32 {
36 unsafe { read_u32_ptr(input.as_ptr().add(n)) }
37 }
38
39 #[inline]
40 #[cfg(feature = "safe-encode")]
get_batch(input: &[u8], n: usize) -> u3241 pub(super) fn get_batch(input: &[u8], n: usize) -> u32 {
42 u32::from_ne_bytes(input[n..n + 4].try_into().unwrap())
43 }
44
45 /// Read an usize sized "batch" from some position.
46 ///
47 /// This will read a native-endian usize from some position.
48 #[inline]
49 #[allow(dead_code)]
50 #[cfg(not(feature = "safe-encode"))]
get_batch_arch(input: &[u8], n: usize) -> usize51 pub(super) fn get_batch_arch(input: &[u8], n: usize) -> usize {
52 unsafe { read_usize_ptr(input.as_ptr().add(n)) }
53 }
54
55 #[inline]
56 #[allow(dead_code)]
57 #[cfg(feature = "safe-encode")]
get_batch_arch(input: &[u8], n: usize) -> usize58 pub(super) fn get_batch_arch(input: &[u8], n: usize) -> usize {
59 const USIZE_SIZE: usize = core::mem::size_of::<usize>();
60 let arr: &[u8; USIZE_SIZE] = input[n..n + USIZE_SIZE].try_into().unwrap();
61 usize::from_ne_bytes(*arr)
62 }
63
64 #[inline]
token_from_literal(lit_len: usize) -> u865 fn token_from_literal(lit_len: usize) -> u8 {
66 if lit_len < 0xF {
67 // Since we can fit the literals length into it, there is no need for saturation.
68 (lit_len as u8) << 4
69 } else {
70 // We were unable to fit the literals into it, so we saturate to 0xF. We will later
71 // write the extensional value.
72 0xF0
73 }
74 }
75
76 #[inline]
token_from_literal_and_match_length(lit_len: usize, duplicate_length: usize) -> u877 fn token_from_literal_and_match_length(lit_len: usize, duplicate_length: usize) -> u8 {
78 let mut token = if lit_len < 0xF {
79 // Since we can fit the literals length into it, there is no need for saturation.
80 (lit_len as u8) << 4
81 } else {
82 // We were unable to fit the literals into it, so we saturate to 0xF. We will later
83 // write the extensional value.
84 0xF0
85 };
86
87 token |= if duplicate_length < 0xF {
88 // We could fit it in.
89 duplicate_length as u8
90 } else {
91 // We were unable to fit it in, so we default to 0xF, which will later be extended.
92 0xF
93 };
94
95 token
96 }
97
98 /// Counts the number of same bytes in two byte streams.
99 /// `input` is the complete input
100 /// `cur` is the current position in the input. it will be incremented by the number of matched
101 /// bytes `source` either the same as input or an external slice
102 /// `candidate` is the candidate position in `source`
103 ///
104 /// The function ignores the last END_OFFSET bytes in input as those should be literals.
105 #[inline]
106 #[cfg(feature = "safe-encode")]
count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usize) -> usize107 fn count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usize) -> usize {
108 const USIZE_SIZE: usize = core::mem::size_of::<usize>();
109 let cur_slice = &input[*cur..input.len() - END_OFFSET];
110 let cand_slice = &source[candidate..];
111
112 let mut num = 0;
113 for (block1, block2) in cur_slice
114 .chunks_exact(USIZE_SIZE)
115 .zip(cand_slice.chunks_exact(USIZE_SIZE))
116 {
117 let input_block = usize::from_ne_bytes(block1.try_into().unwrap());
118 let match_block = usize::from_ne_bytes(block2.try_into().unwrap());
119
120 if input_block == match_block {
121 num += USIZE_SIZE;
122 } else {
123 let diff = input_block ^ match_block;
124 num += (diff.to_le().trailing_zeros() / 8) as usize;
125 *cur += num;
126 return num;
127 }
128 }
129
130 // If we're here we may have 1 to 7 bytes left to check close to the end of input
131 // or source slices. Since this is rare occurrence we mark it cold to get better
132 // ~5% better performance.
133 #[cold]
134 fn count_same_bytes_tail(a: &[u8], b: &[u8], offset: usize) -> usize {
135 a.iter()
136 .zip(b)
137 .skip(offset)
138 .take_while(|(a, b)| a == b)
139 .count()
140 }
141 num += count_same_bytes_tail(cur_slice, cand_slice, num);
142
143 *cur += num;
144 num
145 }
146
147 /// Counts the number of same bytes in two byte streams.
148 /// `input` is the complete input
149 /// `cur` is the current position in the input. it will be incremented by the number of matched
150 /// bytes `source` either the same as input OR an external slice
151 /// `candidate` is the candidate position in `source`
152 ///
153 /// The function ignores the last END_OFFSET bytes in input as those should be literals.
154 #[inline]
155 #[cfg(not(feature = "safe-encode"))]
count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usize) -> usize156 fn count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usize) -> usize {
157 let max_input_match = input.len().saturating_sub(*cur + END_OFFSET);
158 let max_candidate_match = source.len() - candidate;
159 // Considering both limits calc how far we may match in input.
160 let input_end = *cur + max_input_match.min(max_candidate_match);
161
162 let start = *cur;
163 let mut source_ptr = unsafe { source.as_ptr().add(candidate) };
164
165 // compare 4/8 bytes blocks depending on the arch
166 const STEP_SIZE: usize = core::mem::size_of::<usize>();
167 while *cur + STEP_SIZE <= input_end {
168 let diff = read_usize_ptr(unsafe { input.as_ptr().add(*cur) }) ^ read_usize_ptr(source_ptr);
169
170 if diff == 0 {
171 *cur += STEP_SIZE;
172 unsafe {
173 source_ptr = source_ptr.add(STEP_SIZE);
174 }
175 } else {
176 *cur += (diff.to_le().trailing_zeros() / 8) as usize;
177 return *cur - start;
178 }
179 }
180
181 // compare 4 bytes block
182 #[cfg(target_pointer_width = "64")]
183 {
184 if input_end - *cur >= 4 {
185 let diff = read_u32_ptr(unsafe { input.as_ptr().add(*cur) }) ^ read_u32_ptr(source_ptr);
186
187 if diff == 0 {
188 *cur += 4;
189 unsafe {
190 source_ptr = source_ptr.add(4);
191 }
192 } else {
193 *cur += (diff.to_le().trailing_zeros() / 8) as usize;
194 return *cur - start;
195 }
196 }
197 }
198
199 // compare 2 bytes block
200 if input_end - *cur >= 2
201 && unsafe { read_u16_ptr(input.as_ptr().add(*cur)) == read_u16_ptr(source_ptr) }
202 {
203 *cur += 2;
204 unsafe {
205 source_ptr = source_ptr.add(2);
206 }
207 }
208
209 if *cur < input_end
210 && unsafe { input.as_ptr().add(*cur).read() } == unsafe { source_ptr.read() }
211 {
212 *cur += 1;
213 }
214
215 *cur - start
216 }
217
218 /// Write an integer to the output.
219 ///
220 /// Each additional byte then represent a value from 0 to 255, which is added to the previous value
221 /// to produce a total length. When the byte value is 255, another byte must read and added, and so
222 /// on. There can be any number of bytes of value "255" following token
223 #[inline]
224 #[cfg(feature = "safe-encode")]
write_integer(output: &mut impl Sink, mut n: usize)225 fn write_integer(output: &mut impl Sink, mut n: usize) {
226 // Note: Since `n` is usually < 0xFF and writing multiple bytes to the output
227 // requires 2 branches of bound check (due to the possibility of add overflows)
228 // the simple byte at a time implementation below is faster in most cases.
229 while n >= 0xFF {
230 n -= 0xFF;
231 push_byte(output, 0xFF);
232 }
233 push_byte(output, n as u8);
234 }
235
236 /// Write an integer to the output.
237 ///
238 /// Each additional byte then represent a value from 0 to 255, which is added to the previous value
239 /// to produce a total length. When the byte value is 255, another byte must read and added, and so
240 /// on. There can be any number of bytes of value "255" following token
241 #[inline]
242 #[cfg(not(feature = "safe-encode"))]
write_integer(output: &mut impl Sink, mut n: usize)243 fn write_integer(output: &mut impl Sink, mut n: usize) {
244 // Write the 0xFF bytes as long as the integer is higher than said value.
245 if n >= 4 * 0xFF {
246 // In this unlikelly branch we use a fill instead of a loop,
247 // otherwise rustc may output a large unrolled/vectorized loop.
248 let bulk = n / (4 * 0xFF);
249 n %= 4 * 0xFF;
250 unsafe {
251 core::ptr::write_bytes(output.pos_mut_ptr(), 0xFF, 4 * bulk);
252 output.set_pos(output.pos() + 4 * bulk);
253 }
254 }
255
256 // Handle last 1 to 4 bytes
257 push_u32(output, 0xFFFFFFFF);
258 // Updating output len for the remainder
259 unsafe {
260 output.set_pos(output.pos() - 4 + 1 + n / 255);
261 // Write the remaining byte.
262 *output.pos_mut_ptr().sub(1) = (n % 255) as u8;
263 }
264 }
265
266 /// Handle the last bytes from the input as literals
267 #[cold]
handle_last_literals(output: &mut impl Sink, input: &[u8], start: usize)268 fn handle_last_literals(output: &mut impl Sink, input: &[u8], start: usize) {
269 let lit_len = input.len() - start;
270
271 let token = token_from_literal(lit_len);
272 push_byte(output, token);
273 if lit_len >= 0xF {
274 write_integer(output, lit_len - 0xF);
275 }
276 // Now, write the actual literals.
277 output.extend_from_slice(&input[start..]);
278 }
279
280 /// Moves the cursors back as long as the bytes match, to find additional bytes in a duplicate
281 #[inline]
282 #[cfg(feature = "safe-encode")]
backtrack_match( input: &[u8], cur: &mut usize, literal_start: usize, source: &[u8], candidate: &mut usize, )283 fn backtrack_match(
284 input: &[u8],
285 cur: &mut usize,
286 literal_start: usize,
287 source: &[u8],
288 candidate: &mut usize,
289 ) {
290 // Note: Even if iterator version of this loop has less branches inside the loop it has more
291 // branches before the loop. That in practice seems to make it slower than the while version
292 // bellow. TODO: It should be possible remove all bounds checks, since we are walking
293 // backwards
294 while *candidate > 0 && *cur > literal_start && input[*cur - 1] == source[*candidate - 1] {
295 *cur -= 1;
296 *candidate -= 1;
297 }
298 }
299
300 /// Moves the cursors back as long as the bytes match, to find additional bytes in a duplicate
301 #[inline]
302 #[cfg(not(feature = "safe-encode"))]
backtrack_match( input: &[u8], cur: &mut usize, literal_start: usize, source: &[u8], candidate: &mut usize, )303 fn backtrack_match(
304 input: &[u8],
305 cur: &mut usize,
306 literal_start: usize,
307 source: &[u8],
308 candidate: &mut usize,
309 ) {
310 while unsafe {
311 *candidate > 0
312 && *cur > literal_start
313 && input.get_unchecked(*cur - 1) == source.get_unchecked(*candidate - 1)
314 } {
315 *cur -= 1;
316 *candidate -= 1;
317 }
318 }
319
320 /// Compress all bytes of `input[input_pos..]` into `output`.
321 ///
322 /// Bytes in `input[..input_pos]` are treated as a preamble and can be used for lookback.
323 /// This part is known as the compressor "prefix".
324 /// Bytes in `ext_dict` logically precede the bytes in `input` and can also be used for lookback.
325 ///
326 /// `input_stream_offset` is the logical position of the first byte of `input`. This allows same
327 /// `dict` to be used for many calls to `compress_internal` as we can "readdress" the first byte of
328 /// `input` to be something other than 0.
329 ///
330 /// `dict` is the dictionary of previously encoded sequences.
331 ///
332 /// This is used to find duplicates in the stream so they are not written multiple times.
333 ///
334 /// Every four bytes are hashed, and in the resulting slot their position in the input buffer
335 /// is placed in the dict. This way we can easily look up a candidate to back references.
336 ///
337 /// Returns the number of bytes written (compressed) into `output`.
338 ///
339 /// # Const parameters
340 /// `USE_DICT`: Disables usage of ext_dict (it'll panic if a non-empty slice is used).
341 /// In other words, this generates more optimized code when an external dictionary isn't used.
342 ///
343 /// A similar const argument could be used to disable the Prefix mode (eg. USE_PREFIX),
344 /// which would impose `input_pos == 0 && input_stream_offset == 0`. Experiments didn't
345 /// show significant improvement though.
346 // Intentionally avoid inlining.
347 // Empirical tests revealed it to be rarely better but often significantly detrimental.
348 #[inline(never)]
compress_internal<T: HashTable, const USE_DICT: bool, S: Sink>( input: &[u8], input_pos: usize, output: &mut S, dict: &mut T, ext_dict: &[u8], input_stream_offset: usize, ) -> Result<usize, CompressError>349 pub(crate) fn compress_internal<T: HashTable, const USE_DICT: bool, S: Sink>(
350 input: &[u8],
351 input_pos: usize,
352 output: &mut S,
353 dict: &mut T,
354 ext_dict: &[u8],
355 input_stream_offset: usize,
356 ) -> Result<usize, CompressError> {
357 assert!(input_pos <= input.len());
358 if USE_DICT {
359 assert!(ext_dict.len() <= super::WINDOW_SIZE);
360 assert!(ext_dict.len() <= input_stream_offset);
361 // Check for overflow hazard when using ext_dict
362 assert!(input_stream_offset
363 .checked_add(input.len())
364 .and_then(|i| i.checked_add(ext_dict.len()))
365 .map_or(false, |i| i <= isize::MAX as usize));
366 } else {
367 assert!(ext_dict.is_empty());
368 }
369 if output.capacity() - output.pos() < get_maximum_output_size(input.len() - input_pos) {
370 return Err(CompressError::OutputTooSmall);
371 }
372
373 let output_start_pos = output.pos();
374 if input.len() - input_pos < LZ4_MIN_LENGTH {
375 handle_last_literals(output, input, input_pos);
376 return Ok(output.pos() - output_start_pos);
377 }
378
379 let ext_dict_stream_offset = input_stream_offset - ext_dict.len();
380 let end_pos_check = input.len() - MFLIMIT;
381 let mut literal_start = input_pos;
382 let mut cur = input_pos;
383
384 if cur == 0 && input_stream_offset == 0 {
385 // According to the spec we can't start with a match,
386 // except when referencing another block.
387 let hash = T::get_hash_at(input, 0);
388 dict.put_at(hash, 0);
389 cur = 1;
390 }
391
392 loop {
393 // Read the next block into two sections, the literals and the duplicates.
394 let mut step_size;
395 let mut candidate;
396 let mut candidate_source;
397 let mut offset;
398 let mut non_match_count = 1 << INCREASE_STEPSIZE_BITSHIFT;
399 // The number of bytes before our cursor, where the duplicate starts.
400 let mut next_cur = cur;
401
402 // In this loop we search for duplicates via the hashtable. 4bytes or 8bytes are hashed and
403 // compared.
404 loop {
405 step_size = non_match_count >> INCREASE_STEPSIZE_BITSHIFT;
406 non_match_count += 1;
407
408 cur = next_cur;
409 next_cur += step_size;
410
411 // Same as cur + MFLIMIT > input.len()
412 if cur > end_pos_check {
413 handle_last_literals(output, input, literal_start);
414 return Ok(output.pos() - output_start_pos);
415 }
416 // Find a candidate in the dictionary with the hash of the current four bytes.
417 // Unchecked is safe as long as the values from the hash function don't exceed the size
418 // of the table. This is ensured by right shifting the hash values
419 // (`dict_bitshift`) to fit them in the table
420
421 // [Bounds Check]: Can be elided due to `end_pos_check` above
422 let hash = T::get_hash_at(input, cur);
423 candidate = dict.get_at(hash);
424 dict.put_at(hash, cur + input_stream_offset);
425
426 // Sanity check: Matches can't be ahead of `cur`.
427 debug_assert!(candidate <= input_stream_offset + cur);
428
429 // Two requirements to the candidate exists:
430 // - We should not return a position which is merely a hash collision, so that the
431 // candidate actually matches what we search for.
432 // - We can address up to 16-bit offset, hence we are only able to address the candidate
433 // if its offset is less than or equals to 0xFFFF.
434 if input_stream_offset + cur - candidate > MAX_DISTANCE {
435 continue;
436 }
437
438 if candidate >= input_stream_offset {
439 // match within input
440 offset = (input_stream_offset + cur - candidate) as u16;
441 candidate -= input_stream_offset;
442 candidate_source = input;
443 } else if USE_DICT {
444 // Sanity check, which may fail if we lost history beyond MAX_DISTANCE
445 debug_assert!(
446 candidate >= ext_dict_stream_offset,
447 "Lost history in ext dict mode"
448 );
449 // match within ext dict
450 offset = (input_stream_offset + cur - candidate) as u16;
451 candidate -= ext_dict_stream_offset;
452 candidate_source = ext_dict;
453 } else {
454 // Match is not reachable anymore
455 // eg. compressing an independent block frame w/o clearing
456 // the matches tables, only increasing input_stream_offset.
457 // Sanity check
458 debug_assert!(input_pos == 0, "Lost history in prefix mode");
459 continue;
460 }
461 // [Bounds Check]: Candidate is coming from the Hashmap. It can't be out of bounds, but
462 // impossible to prove for the compiler and remove the bounds checks.
463 let cand_bytes: u32 = get_batch(candidate_source, candidate);
464 // [Bounds Check]: Should be able to be elided due to `end_pos_check`.
465 let curr_bytes: u32 = get_batch(input, cur);
466
467 if cand_bytes == curr_bytes {
468 break;
469 }
470 }
471
472 // Extend the match backwards if we can
473 backtrack_match(
474 input,
475 &mut cur,
476 literal_start,
477 candidate_source,
478 &mut candidate,
479 );
480
481 // The length (in bytes) of the literals section.
482 let lit_len = cur - literal_start;
483
484 // Generate the higher half of the token.
485 cur += MINMATCH;
486 candidate += MINMATCH;
487 let duplicate_length = count_same_bytes(input, &mut cur, candidate_source, candidate);
488
489 // Note: The `- 2` offset was copied from the reference implementation, it could be
490 // arbitrary.
491 let hash = T::get_hash_at(input, cur - 2);
492 dict.put_at(hash, cur - 2 + input_stream_offset);
493
494 let token = token_from_literal_and_match_length(lit_len, duplicate_length);
495
496 // Push the token to the output stream.
497 push_byte(output, token);
498 // If we were unable to fit the literals length into the token, write the extensional
499 // part.
500 if lit_len >= 0xF {
501 write_integer(output, lit_len - 0xF);
502 }
503
504 // Now, write the actual literals.
505 //
506 // The unsafe version copies blocks of 8bytes, and therefore may copy up to 7bytes more than
507 // needed. This is safe, because the last 12 bytes (MF_LIMIT) are handled in
508 // handle_last_literals.
509 copy_literals_wild(output, input, literal_start, lit_len);
510 // write the offset in little endian.
511 push_u16(output, offset);
512
513 // If we were unable to fit the duplicates length into the token, write the
514 // extensional part.
515 if duplicate_length >= 0xF {
516 write_integer(output, duplicate_length - 0xF);
517 }
518 literal_start = cur;
519 }
520 }
521
522 #[inline]
523 #[cfg(feature = "safe-encode")]
push_byte(output: &mut impl Sink, el: u8)524 fn push_byte(output: &mut impl Sink, el: u8) {
525 output.push(el);
526 }
527
528 #[inline]
529 #[cfg(not(feature = "safe-encode"))]
push_byte(output: &mut impl Sink, el: u8)530 fn push_byte(output: &mut impl Sink, el: u8) {
531 unsafe {
532 core::ptr::write(output.pos_mut_ptr(), el);
533 output.set_pos(output.pos() + 1);
534 }
535 }
536
537 #[inline]
538 #[cfg(feature = "safe-encode")]
push_u16(output: &mut impl Sink, el: u16)539 fn push_u16(output: &mut impl Sink, el: u16) {
540 output.extend_from_slice(&el.to_le_bytes());
541 }
542
543 #[inline]
544 #[cfg(not(feature = "safe-encode"))]
push_u16(output: &mut impl Sink, el: u16)545 fn push_u16(output: &mut impl Sink, el: u16) {
546 unsafe {
547 core::ptr::copy_nonoverlapping(el.to_le_bytes().as_ptr(), output.pos_mut_ptr(), 2);
548 output.set_pos(output.pos() + 2);
549 }
550 }
551
552 #[inline]
553 #[cfg(not(feature = "safe-encode"))]
push_u32(output: &mut impl Sink, el: u32)554 fn push_u32(output: &mut impl Sink, el: u32) {
555 unsafe {
556 core::ptr::copy_nonoverlapping(el.to_le_bytes().as_ptr(), output.pos_mut_ptr(), 4);
557 output.set_pos(output.pos() + 4);
558 }
559 }
560
561 #[inline(always)] // (always) necessary otherwise compiler fails to inline it
562 #[cfg(feature = "safe-encode")]
copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize)563 fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize) {
564 output.extend_from_slice_wild(&input[input_start..input_start + len], len)
565 }
566
567 #[inline]
568 #[cfg(not(feature = "safe-encode"))]
copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize)569 fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize) {
570 debug_assert!(input_start + len / 8 * 8 + ((len % 8) != 0) as usize * 8 <= input.len());
571 debug_assert!(output.pos() + len / 8 * 8 + ((len % 8) != 0) as usize * 8 <= output.capacity());
572 unsafe {
573 // Note: This used to be a wild copy loop of 8 bytes, but the compiler consistently
574 // transformed it into a call to memcopy, which hurts performance significantly for
575 // small copies, which are common.
576 let start_ptr = input.as_ptr().add(input_start);
577 match len {
578 0..=8 => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), 8),
579 9..=16 => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), 16),
580 17..=24 => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), 24),
581 _ => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), len),
582 }
583 output.set_pos(output.pos() + len);
584 }
585 }
586
587 /// Compress all bytes of `input` into `output`.
588 /// The method chooses an appropriate hashtable to lookup duplicates.
589 /// output should be preallocated with a size of
590 /// `get_maximum_output_size`.
591 ///
592 /// Returns the number of bytes written (compressed) into `output`.
593
594 #[inline]
compress_into_sink_with_dict<const USE_DICT: bool>( input: &[u8], output: &mut impl Sink, mut dict_data: &[u8], ) -> Result<usize, CompressError>595 pub(crate) fn compress_into_sink_with_dict<const USE_DICT: bool>(
596 input: &[u8],
597 output: &mut impl Sink,
598 mut dict_data: &[u8],
599 ) -> Result<usize, CompressError> {
600 if dict_data.len() + input.len() < u16::MAX as usize {
601 let mut dict = HashTable4KU16::new();
602 init_dict(&mut dict, &mut dict_data);
603 compress_internal::<_, USE_DICT, _>(input, 0, output, &mut dict, dict_data, dict_data.len())
604 } else {
605 let mut dict = HashTable4K::new();
606 init_dict(&mut dict, &mut dict_data);
607 compress_internal::<_, USE_DICT, _>(input, 0, output, &mut dict, dict_data, dict_data.len())
608 }
609 }
610
611 #[inline]
init_dict<T: HashTable>(dict: &mut T, dict_data: &mut &[u8])612 fn init_dict<T: HashTable>(dict: &mut T, dict_data: &mut &[u8]) {
613 if dict_data.len() > WINDOW_SIZE {
614 *dict_data = &dict_data[dict_data.len() - WINDOW_SIZE..];
615 }
616 let mut i = 0usize;
617 while i + core::mem::size_of::<usize>() <= dict_data.len() {
618 let hash = T::get_hash_at(dict_data, i);
619 dict.put_at(hash, i);
620 // Note: The 3 byte step was copied from the reference implementation, it could be
621 // arbitrary.
622 i += 3;
623 }
624 }
625
626 /// Returns the maximum output size of the compressed data.
627 /// Can be used to preallocate capacity on the output vector
628 #[inline]
get_maximum_output_size(input_len: usize) -> usize629 pub const fn get_maximum_output_size(input_len: usize) -> usize {
630 16 + 4 + (input_len * 110 / 100) as usize
631 }
632
633 /// Compress all bytes of `input` into `output`.
634 /// The method chooses an appropriate hashtable to lookup duplicates.
635 /// output should be preallocated with a size of
636 /// `get_maximum_output_size`.
637 ///
638 /// Returns the number of bytes written (compressed) into `output`.
639 #[inline]
compress_into(input: &[u8], output: &mut [u8]) -> Result<usize, CompressError>640 pub fn compress_into(input: &[u8], output: &mut [u8]) -> Result<usize, CompressError> {
641 compress_into_sink_with_dict::<false>(input, &mut SliceSink::new(output, 0), b"")
642 }
643
644 /// Compress all bytes of `input` into `output`.
645 /// The method chooses an appropriate hashtable to lookup duplicates.
646 /// output should be preallocated with a size of
647 /// `get_maximum_output_size`.
648 ///
649 /// Returns the number of bytes written (compressed) into `output`.
650 #[inline]
compress_into_with_dict( input: &[u8], output: &mut [u8], dict_data: &[u8], ) -> Result<usize, CompressError>651 pub fn compress_into_with_dict(
652 input: &[u8],
653 output: &mut [u8],
654 dict_data: &[u8],
655 ) -> Result<usize, CompressError> {
656 compress_into_sink_with_dict::<true>(input, &mut SliceSink::new(output, 0), dict_data)
657 }
658
659 #[inline]
compress_into_vec_with_dict<const USE_DICT: bool>( input: &[u8], prepend_size: bool, mut dict_data: &[u8], ) -> Vec<u8>660 fn compress_into_vec_with_dict<const USE_DICT: bool>(
661 input: &[u8],
662 prepend_size: bool,
663 mut dict_data: &[u8],
664 ) -> Vec<u8> {
665 let prepend_size_num_bytes = if prepend_size { 4 } else { 0 };
666 let max_compressed_size = get_maximum_output_size(input.len()) + prepend_size_num_bytes;
667 if dict_data.len() <= 3 {
668 dict_data = b"";
669 }
670 #[cfg(feature = "safe-encode")]
671 let mut compressed = {
672 let mut compressed: Vec<u8> = vec![0u8; max_compressed_size];
673 let out = if prepend_size {
674 compressed[..4].copy_from_slice(&(input.len() as u32).to_le_bytes());
675 &mut compressed[4..]
676 } else {
677 &mut compressed
678 };
679 let compressed_len =
680 compress_into_sink_with_dict::<USE_DICT>(input, &mut SliceSink::new(out, 0), dict_data)
681 .unwrap();
682
683 compressed.truncate(prepend_size_num_bytes + compressed_len);
684 compressed
685 };
686 #[cfg(not(feature = "safe-encode"))]
687 let mut compressed = {
688 let mut vec = Vec::with_capacity(max_compressed_size);
689 let start_pos = if prepend_size {
690 vec.extend_from_slice(&(input.len() as u32).to_le_bytes());
691 4
692 } else {
693 0
694 };
695 let compressed_len = compress_into_sink_with_dict::<USE_DICT>(
696 input,
697 &mut PtrSink::from_vec(&mut vec, start_pos),
698 dict_data,
699 )
700 .unwrap();
701 unsafe {
702 vec.set_len(prepend_size_num_bytes + compressed_len);
703 }
704 vec
705 };
706
707 compressed.shrink_to_fit();
708 compressed
709 }
710
711 /// Compress all bytes of `input` into `output`. The uncompressed size will be prepended as a little
712 /// endian u32. Can be used in conjunction with `decompress_size_prepended`
713 #[inline]
compress_prepend_size(input: &[u8]) -> Vec<u8>714 pub fn compress_prepend_size(input: &[u8]) -> Vec<u8> {
715 compress_into_vec_with_dict::<false>(input, true, b"")
716 }
717
718 /// Compress all bytes of `input`.
719 #[inline]
compress(input: &[u8]) -> Vec<u8>720 pub fn compress(input: &[u8]) -> Vec<u8> {
721 compress_into_vec_with_dict::<false>(input, false, b"")
722 }
723
724 /// Compress all bytes of `input` with an external dictionary.
725 #[inline]
compress_with_dict(input: &[u8], ext_dict: &[u8]) -> Vec<u8>726 pub fn compress_with_dict(input: &[u8], ext_dict: &[u8]) -> Vec<u8> {
727 compress_into_vec_with_dict::<true>(input, false, ext_dict)
728 }
729
730 /// Compress all bytes of `input` into `output`. The uncompressed size will be prepended as a little
731 /// endian u32. Can be used in conjunction with `decompress_size_prepended_with_dict`
732 #[inline]
compress_prepend_size_with_dict(input: &[u8], ext_dict: &[u8]) -> Vec<u8>733 pub fn compress_prepend_size_with_dict(input: &[u8], ext_dict: &[u8]) -> Vec<u8> {
734 compress_into_vec_with_dict::<true>(input, true, ext_dict)
735 }
736
737 #[inline]
738 #[cfg(not(feature = "safe-encode"))]
read_u16_ptr(input: *const u8) -> u16739 fn read_u16_ptr(input: *const u8) -> u16 {
740 let mut num: u16 = 0;
741 unsafe {
742 core::ptr::copy_nonoverlapping(input, &mut num as *mut u16 as *mut u8, 2);
743 }
744 num
745 }
746
747 #[inline]
748 #[cfg(not(feature = "safe-encode"))]
read_u32_ptr(input: *const u8) -> u32749 fn read_u32_ptr(input: *const u8) -> u32 {
750 let mut num: u32 = 0;
751 unsafe {
752 core::ptr::copy_nonoverlapping(input, &mut num as *mut u32 as *mut u8, 4);
753 }
754 num
755 }
756
757 #[inline]
758 #[cfg(not(feature = "safe-encode"))]
read_usize_ptr(input: *const u8) -> usize759 fn read_usize_ptr(input: *const u8) -> usize {
760 let mut num: usize = 0;
761 unsafe {
762 core::ptr::copy_nonoverlapping(
763 input,
764 &mut num as *mut usize as *mut u8,
765 core::mem::size_of::<usize>(),
766 );
767 }
768 num
769 }
770
771 #[cfg(test)]
772 mod tests {
773 use super::*;
774
775 #[test]
test_count_same_bytes()776 fn test_count_same_bytes() {
777 // 8byte aligned block, zeros and ones are added because the end/offset
778 let first: &[u8] = &[
779 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
780 ];
781 let second: &[u8] = &[
782 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
783 ];
784 assert_eq!(count_same_bytes(first, &mut 0, second, 0), 16);
785
786 // 4byte aligned block
787 let first: &[u8] = &[
788 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,
789 0, 0, 0,
790 ];
791 let second: &[u8] = &[
792 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1,
793 1, 1, 1,
794 ];
795 assert_eq!(count_same_bytes(first, &mut 0, second, 0), 20);
796
797 // 2byte aligned block
798 let first: &[u8] = &[
799 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 0, 0, 0, 0, 0, 0, 0,
800 0, 0, 0, 0, 0,
801 ];
802 let second: &[u8] = &[
803 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 1, 1, 1, 1, 1, 1, 1,
804 1, 1, 1, 1, 1,
805 ];
806 assert_eq!(count_same_bytes(first, &mut 0, second, 0), 22);
807
808 // 1byte aligned block
809 let first: &[u8] = &[
810 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 0, 0, 0, 0, 0, 0,
811 0, 0, 0, 0, 0, 0,
812 ];
813 let second: &[u8] = &[
814 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 1, 1, 1, 1, 1, 1,
815 1, 1, 1, 1, 1, 1,
816 ];
817 assert_eq!(count_same_bytes(first, &mut 0, second, 0), 23);
818
819 // 1byte aligned block - last byte different
820 let first: &[u8] = &[
821 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 0, 0, 0, 0, 0, 0,
822 0, 0, 0, 0, 0, 0,
823 ];
824 let second: &[u8] = &[
825 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 6, 1, 1, 1, 1, 1, 1,
826 1, 1, 1, 1, 1, 1,
827 ];
828 assert_eq!(count_same_bytes(first, &mut 0, second, 0), 22);
829
830 // 1byte aligned block
831 let first: &[u8] = &[
832 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 9, 5, 0, 0, 0, 0, 0, 0,
833 0, 0, 0, 0, 0, 0,
834 ];
835 let second: &[u8] = &[
836 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 6, 1, 1, 1, 1, 1, 1,
837 1, 1, 1, 1, 1, 1,
838 ];
839 assert_eq!(count_same_bytes(first, &mut 0, second, 0), 21);
840
841 for diff_idx in 8..100 {
842 let first: Vec<u8> = (0u8..255).cycle().take(100 + 12).collect();
843 let mut second = first.clone();
844 second[diff_idx] = 255;
845 for start in 0..=diff_idx {
846 let same_bytes = count_same_bytes(&first, &mut start.clone(), &second, start);
847 assert_eq!(same_bytes, diff_idx - start);
848 }
849 }
850 }
851
852 #[test]
test_bug()853 fn test_bug() {
854 let input: &[u8] = &[
855 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18,
856 ];
857 let _out = compress(input);
858 }
859
860 #[test]
test_dict()861 fn test_dict() {
862 let input: &[u8] = &[
863 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18,
864 ];
865 let dict = input;
866 let compressed = compress_with_dict(input, dict);
867 assert_lt!(compressed.len(), compress(input).len());
868
869 assert!(compressed.len() < compress(input).len());
870 let mut uncompressed = vec![0u8; input.len()];
871 let uncomp_size = crate::block::decompress::decompress_into_with_dict(
872 &compressed,
873 &mut uncompressed,
874 dict,
875 )
876 .unwrap();
877 uncompressed.truncate(uncomp_size);
878 assert_eq!(input, uncompressed);
879 }
880
881 #[test]
test_dict_no_panic()882 fn test_dict_no_panic() {
883 let input: &[u8] = &[
884 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18,
885 ];
886 let dict = &[10, 12, 14];
887 let _compressed = compress_with_dict(input, dict);
888 }
889
890 #[test]
test_dict_match_crossing()891 fn test_dict_match_crossing() {
892 let input: &[u8] = &[
893 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18,
894 ];
895 let dict = input;
896 let compressed = compress_with_dict(input, dict);
897 assert_lt!(compressed.len(), compress(input).len());
898
899 let mut uncompressed = vec![0u8; input.len() * 2];
900 // copy first half of the input into output
901 let dict_cutoff = dict.len() / 2;
902 let output_start = dict.len() - dict_cutoff;
903 uncompressed[..output_start].copy_from_slice(&dict[dict_cutoff..]);
904 let uncomp_len = {
905 let mut sink = SliceSink::new(&mut uncompressed[..], output_start);
906 crate::block::decompress::decompress_internal::<true, _>(
907 &compressed,
908 &mut sink,
909 &dict[..dict_cutoff],
910 )
911 .unwrap()
912 };
913 assert_eq!(input.len(), uncomp_len);
914 assert_eq!(
915 input,
916 &uncompressed[output_start..output_start + uncomp_len]
917 );
918 }
919
920 #[test]
test_conformant_last_block()921 fn test_conformant_last_block() {
922 // From the spec:
923 // The last match must start at least 12 bytes before the end of block.
924 // The last match is part of the penultimate sequence. It is followed by the last sequence,
925 // which contains only literals. Note that, as a consequence, an independent block <
926 // 13 bytes cannot be compressed, because the match must copy "something",
927 // so it needs at least one prior byte.
928 // When a block can reference data from another block, it can start immediately with a match
929 // and no literal, so a block of 12 bytes can be compressed.
930 let aaas: &[u8] = b"aaaaaaaaaaaaaaa";
931
932 // uncompressible
933 let out = compress(&aaas[..12]);
934 assert_gt!(out.len(), 12);
935 // compressible
936 let out = compress(&aaas[..13]);
937 assert_le!(out.len(), 13);
938 let out = compress(&aaas[..14]);
939 assert_le!(out.len(), 14);
940 let out = compress(&aaas[..15]);
941 assert_le!(out.len(), 15);
942
943 // dict uncompressible
944 let out = compress_with_dict(&aaas[..11], aaas);
945 assert_gt!(out.len(), 11);
946 // compressible
947 let out = compress_with_dict(&aaas[..12], aaas);
948 // According to the spec this _could_ compres, but it doesn't in this lib
949 // as it aborts compression for any input len < LZ4_MIN_LENGTH
950 assert_gt!(out.len(), 12);
951 let out = compress_with_dict(&aaas[..13], aaas);
952 assert_le!(out.len(), 13);
953 let out = compress_with_dict(&aaas[..14], aaas);
954 assert_le!(out.len(), 14);
955 let out = compress_with_dict(&aaas[..15], aaas);
956 assert_le!(out.len(), 15);
957 }
958
959 #[test]
test_dict_size()960 fn test_dict_size() {
961 let dict = vec![b'a'; 1024 * 1024];
962 let input = &b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"[..];
963 let compressed = compress_prepend_size_with_dict(input, &dict);
964 let decompressed =
965 crate::block::decompress_size_prepended_with_dict(&compressed, &dict).unwrap();
966 assert_eq!(decompressed, input);
967 }
968 }
969