• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use twox_hash::XxHash32;
2 
3 use super::Error;
4 use std::{
5     fmt::Debug,
6     hash::Hasher,
7     io,
8     io::{Read, Write},
9 };
10 
11 const FLG_RESERVED_MASK: u8 = 0b00000010;
12 const FLG_VERSION_MASK: u8 = 0b11000000;
13 const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000;
14 
15 const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000;
16 const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000;
17 const FLG_CONTENT_SIZE: u8 = 0b00001000;
18 const FLG_CONTENT_CHECKSUM: u8 = 0b00000100;
19 const FLG_DICTIONARY_ID: u8 = 0b00000001;
20 
21 const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK;
22 const BD_BLOCK_SIZE_MASK: u8 = 0b01110000;
23 const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4;
24 
25 const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000;
26 
27 const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204;
28 pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102;
29 const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive<u32> = 0x184D2A50..=0x184D2A5F;
30 
31 pub(crate) const MAGIC_NUMBER_SIZE: usize = 4;
32 pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7;
33 pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19;
34 pub(crate) const BLOCK_INFO_SIZE: usize = 4;
35 
36 #[derive(Clone, Copy, PartialEq, Debug)]
37 /// Different predefines blocksizes to choose when compressing data.
38 #[derive(Default)]
39 pub enum BlockSize {
40     /// Will detect optimal frame size based on the size of the first write call
41     #[default]
42     Auto = 0,
43     /// The default block size.
44     Max64KB = 4,
45     /// 256KB block size.
46     Max256KB = 5,
47     /// 1MB block size.
48     Max1MB = 6,
49     /// 4MB block size.
50     Max4MB = 7,
51     /// 8MB block size.
52     Max8MB = 8,
53 }
54 
55 impl BlockSize {
56     /// Try to find optimal size based on passed buffer length.
from_buf_length(buf_len: usize) -> Self57     pub(crate) fn from_buf_length(buf_len: usize) -> Self {
58         let mut blocksize = BlockSize::Max4MB;
59 
60         for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] {
61             if buf_len > candidate.get_size() {
62                 return blocksize;
63             }
64             blocksize = candidate;
65         }
66         BlockSize::Max64KB
67     }
get_size(&self) -> usize68     pub(crate) fn get_size(&self) -> usize {
69         match self {
70             BlockSize::Auto => unreachable!(),
71             BlockSize::Max64KB => 64 * 1024,
72             BlockSize::Max256KB => 256 * 1024,
73             BlockSize::Max1MB => 1024 * 1024,
74             BlockSize::Max4MB => 4 * 1024 * 1024,
75             BlockSize::Max8MB => 8 * 1024 * 1024,
76         }
77     }
78 }
79 
80 #[derive(Clone, Copy, PartialEq, Debug)]
81 /// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo]
82 #[derive(Default)]
83 pub enum BlockMode {
84     /// Every block is compressed independently. The default.
85     #[default]
86     Independent,
87     /// Blocks can reference data from previous blocks.
88     ///
89     /// Effective when the stream contains small blocks.
90     Linked,
91 }
92 
93 // From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
94 //
95 // General Structure of LZ4 Frame format
96 // -------------------------------------
97 //
98 // | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum |
99 // |:-------:|:-------------:| ----- | ----- | ------- | ----------- |
100 // | 4 bytes |  3-15 bytes   |       |       | 4 bytes | 0-4 bytes   |
101 //
102 // Frame Descriptor
103 // ----------------
104 //
105 // | FLG     | BD      | (Content Size) | (Dictionary ID) | HC      |
106 // | ------- | ------- |:--------------:|:---------------:| ------- |
107 // | 1 byte  | 1 byte  |  0 - 8 bytes   |   0 - 4 bytes   | 1 byte  |
108 //
109 // __FLG byte__
110 //
111 // |  BitNb  |  7-6  |   5   |    4     |  3   |    2     |    1     |   0  |
112 // | ------- |-------|-------|----------|------|----------|----------|------|
113 // |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID|
114 //
115 // __BD byte__
116 //
117 // |  BitNb  |     7    |     6-5-4     |  3-2-1-0 |
118 // | ------- | -------- | ------------- | -------- |
119 // |FieldName|*Reserved*| Block MaxSize |*Reserved*|
120 //
121 // Data Blocks
122 // -----------
123 //
124 // | Block Size |  data  | (Block Checksum) |
125 // |:----------:| ------ |:----------------:|
126 // |  4 bytes   |        |   0 - 4 bytes    |
127 //
128 #[derive(Debug, Default, Clone)]
129 /// The metadata for de/compressing with lz4 frame format.
130 pub struct FrameInfo {
131     /// If set, includes the total uncompressed size of data in the frame.
132     pub content_size: Option<u64>,
133     /// The identifier for the dictionary that must be used to correctly decode data.
134     /// The compressor and the decompressor must use exactly the same dictionary.
135     ///
136     /// Note that this is currently unsupported and for this reason it's not pub.
137     pub(crate) dict_id: Option<u32>,
138     /// The maximum uncompressed size of each data block.
139     pub block_size: BlockSize,
140     /// The block mode.
141     pub block_mode: BlockMode,
142     /// If set, includes a checksum for each data block in the frame.
143     pub block_checksums: bool,
144     /// If set, includes a content checksum to verify that the full frame contents have been
145     /// decoded correctly.
146     pub content_checksum: bool,
147     /// If set, use the legacy frame format
148     pub legacy_frame: bool,
149 }
150 
151 impl FrameInfo {
152     /// Create a new `FrameInfo`.
new() -> Self153     pub fn new() -> Self {
154         Self::default()
155     }
156 
157     /// Whether to include the total uncompressed size of data in the frame.
content_size(mut self, content_size: Option<u64>) -> Self158     pub fn content_size(mut self, content_size: Option<u64>) -> Self {
159         self.content_size = content_size;
160         self
161     }
162 
163     /// The maximum uncompressed size of each data block.
block_size(mut self, block_size: BlockSize) -> Self164     pub fn block_size(mut self, block_size: BlockSize) -> Self {
165         self.block_size = block_size;
166         self
167     }
168 
169     /// The block mode.
block_mode(mut self, block_mode: BlockMode) -> Self170     pub fn block_mode(mut self, block_mode: BlockMode) -> Self {
171         self.block_mode = block_mode;
172         self
173     }
174 
175     /// If set, includes a checksum for each data block in the frame.
block_checksums(mut self, block_checksums: bool) -> Self176     pub fn block_checksums(mut self, block_checksums: bool) -> Self {
177         self.block_checksums = block_checksums;
178         self
179     }
180 
181     /// If set, includes a content checksum to verify that the full frame contents have been
182     /// decoded correctly.
content_checksum(mut self, content_checksum: bool) -> Self183     pub fn content_checksum(mut self, content_checksum: bool) -> Self {
184         self.content_checksum = content_checksum;
185         self
186     }
187 
188     /// If set, use the legacy frame format.
legacy_frame(mut self, legacy_frame: bool) -> Self189     pub fn legacy_frame(mut self, legacy_frame: bool) -> Self {
190         self.legacy_frame = legacy_frame;
191         self
192     }
193 
read_size(input: &[u8]) -> Result<usize, Error>194     pub(crate) fn read_size(input: &[u8]) -> Result<usize, Error> {
195         let mut required = MIN_FRAME_INFO_SIZE;
196         let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap());
197         if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
198             return Ok(MAGIC_NUMBER_SIZE);
199         }
200 
201         if input.len() < required {
202             return Ok(required);
203         }
204 
205         if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
206             return Ok(8);
207         }
208         if magic_num != LZ4F_MAGIC_NUMBER {
209             return Err(Error::WrongMagicNumber);
210         }
211 
212         if input[4] & FLG_CONTENT_SIZE != 0 {
213             required += 8;
214         }
215         if input[4] & FLG_DICTIONARY_ID != 0 {
216             required += 4
217         }
218         Ok(required)
219     }
220 
write_size(&self) -> usize221     pub(crate) fn write_size(&self) -> usize {
222         let mut required = MIN_FRAME_INFO_SIZE;
223         if self.content_size.is_some() {
224             required += 8;
225         }
226         if self.dict_id.is_some() {
227             required += 4;
228         }
229         required
230     }
231 
write(&self, output: &mut [u8]) -> Result<usize, Error>232     pub(crate) fn write(&self, output: &mut [u8]) -> Result<usize, Error> {
233         let write_size = self.write_size();
234         if output.len() < write_size {
235             return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into()));
236         }
237         let mut buffer = [0u8; MAX_FRAME_INFO_SIZE];
238         assert!(write_size <= buffer.len());
239         buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes());
240         buffer[4] = FLG_SUPPORTED_VERSION_BITS;
241         if self.block_checksums {
242             buffer[4] |= FLG_BLOCK_CHECKSUMS;
243         }
244         if self.content_checksum {
245             buffer[4] |= FLG_CONTENT_CHECKSUM;
246         }
247         if self.block_mode == BlockMode::Independent {
248             buffer[4] |= FLG_INDEPENDENT_BLOCKS;
249         }
250         buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT;
251 
252         // Optional section
253         let mut offset = 6;
254         if let Some(size) = self.content_size {
255             buffer[4] |= FLG_CONTENT_SIZE;
256             buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes());
257             offset += 8;
258         }
259         if let Some(dict_id) = self.dict_id {
260             buffer[4] |= FLG_DICTIONARY_ID;
261             buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes());
262             offset += 4;
263         }
264 
265         // Header checksum
266         let mut hasher = XxHash32::with_seed(0);
267         hasher.write(&buffer[4..offset]);
268         let header_checksum = (hasher.finish() >> 8) as u8;
269         buffer[offset] = header_checksum;
270         offset += 1;
271 
272         debug_assert_eq!(offset, write_size);
273         output[..write_size].copy_from_slice(&buffer[..write_size]);
274         Ok(write_size)
275     }
276 
read(mut input: &[u8]) -> Result<FrameInfo, Error>277     pub(crate) fn read(mut input: &[u8]) -> Result<FrameInfo, Error> {
278         let original_input = input;
279         // 4 byte Magic
280         let magic_num = {
281             let mut buffer = [0u8; 4];
282             input.read_exact(&mut buffer)?;
283             u32::from_le_bytes(buffer)
284         };
285         if magic_num == LZ4F_LEGACY_MAGIC_NUMBER {
286             return Ok(FrameInfo {
287                 block_size: BlockSize::Max8MB,
288                 legacy_frame: true,
289                 ..FrameInfo::default()
290             });
291         }
292         if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) {
293             let mut buffer = [0u8; 4];
294             input.read_exact(&mut buffer)?;
295             let user_data_len = u32::from_le_bytes(buffer);
296             return Err(Error::SkippableFrame(user_data_len));
297         }
298         if magic_num != LZ4F_MAGIC_NUMBER {
299             return Err(Error::WrongMagicNumber);
300         }
301 
302         // fixed size section
303         let [flg_byte, bd_byte] = {
304             let mut buffer = [0u8, 0];
305             input.read_exact(&mut buffer)?;
306             buffer
307         };
308 
309         if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS {
310             // version is always 01
311             return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK));
312         }
313 
314         if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 {
315             return Err(Error::ReservedBitsSet);
316         }
317 
318         let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 {
319             BlockMode::Independent
320         } else {
321             BlockMode::Linked
322         };
323         let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0;
324         let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0;
325 
326         let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT {
327             i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)),
328             4 => BlockSize::Max64KB,
329             5 => BlockSize::Max256KB,
330             6 => BlockSize::Max1MB,
331             7 => BlockSize::Max4MB,
332             _ => unreachable!(),
333         };
334 
335         // var len section
336         let mut content_size = None;
337         if flg_byte & FLG_CONTENT_SIZE != 0 {
338             let mut buffer = [0u8; 8];
339             input.read_exact(&mut buffer).unwrap();
340             content_size = Some(u64::from_le_bytes(buffer));
341         }
342 
343         let mut dict_id = None;
344         if flg_byte & FLG_DICTIONARY_ID != 0 {
345             let mut buffer = [0u8; 4];
346             input.read_exact(&mut buffer)?;
347             dict_id = Some(u32::from_le_bytes(buffer));
348         }
349 
350         // 1 byte header checksum
351         let expected_checksum = {
352             let mut buffer = [0u8; 1];
353             input.read_exact(&mut buffer)?;
354             buffer[0]
355         };
356         let mut hasher = XxHash32::with_seed(0);
357         hasher.write(&original_input[4..original_input.len() - input.len() - 1]);
358         let header_hash = (hasher.finish() >> 8) as u8;
359         if header_hash != expected_checksum {
360             return Err(Error::HeaderChecksumError);
361         }
362 
363         Ok(FrameInfo {
364             content_size,
365             dict_id,
366             block_size,
367             block_mode,
368             block_checksums,
369             content_checksum,
370             legacy_frame: false,
371         })
372     }
373 }
374 
375 #[derive(Debug)]
376 pub(crate) enum BlockInfo {
377     Compressed(u32),
378     Uncompressed(u32),
379     EndMark,
380 }
381 
382 impl BlockInfo {
read(mut input: &[u8]) -> Result<Self, Error>383     pub(crate) fn read(mut input: &[u8]) -> Result<Self, Error> {
384         let mut size_buffer = [0u8; 4];
385         input.read_exact(&mut size_buffer)?;
386         let size = u32::from_le_bytes(size_buffer);
387         if size == 0 {
388             Ok(BlockInfo::EndMark)
389         } else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 {
390             Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT))
391         } else {
392             Ok(BlockInfo::Compressed(size))
393         }
394     }
395 
write(&self, mut output: &mut [u8]) -> Result<usize, Error>396     pub(crate) fn write(&self, mut output: &mut [u8]) -> Result<usize, Error> {
397         let value = match self {
398             BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo),
399             BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len)
400                 if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 =>
401             {
402                 return Err(Error::InvalidBlockInfo)
403             }
404             BlockInfo::Compressed(len) => *len,
405             BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT,
406             BlockInfo::EndMark => 0,
407         };
408         output.write_all(&value.to_le_bytes())?;
409         Ok(4)
410     }
411 }
412