1 use twox_hash::XxHash32; 2 3 use super::Error; 4 use std::{ 5 fmt::Debug, 6 hash::Hasher, 7 io, 8 io::{Read, Write}, 9 }; 10 11 const FLG_RESERVED_MASK: u8 = 0b00000010; 12 const FLG_VERSION_MASK: u8 = 0b11000000; 13 const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000; 14 15 const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000; 16 const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000; 17 const FLG_CONTENT_SIZE: u8 = 0b00001000; 18 const FLG_CONTENT_CHECKSUM: u8 = 0b00000100; 19 const FLG_DICTIONARY_ID: u8 = 0b00000001; 20 21 const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK; 22 const BD_BLOCK_SIZE_MASK: u8 = 0b01110000; 23 const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4; 24 25 const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000; 26 27 const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204; 28 pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102; 29 const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive<u32> = 0x184D2A50..=0x184D2A5F; 30 31 pub(crate) const MAGIC_NUMBER_SIZE: usize = 4; 32 pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7; 33 pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19; 34 pub(crate) const BLOCK_INFO_SIZE: usize = 4; 35 36 #[derive(Clone, Copy, PartialEq, Debug)] 37 /// Different predefines blocksizes to choose when compressing data. 38 #[derive(Default)] 39 pub enum BlockSize { 40 /// Will detect optimal frame size based on the size of the first write call 41 #[default] 42 Auto = 0, 43 /// The default block size. 44 Max64KB = 4, 45 /// 256KB block size. 46 Max256KB = 5, 47 /// 1MB block size. 48 Max1MB = 6, 49 /// 4MB block size. 50 Max4MB = 7, 51 /// 8MB block size. 52 Max8MB = 8, 53 } 54 55 impl BlockSize { 56 /// Try to find optimal size based on passed buffer length. from_buf_length(buf_len: usize) -> Self57 pub(crate) fn from_buf_length(buf_len: usize) -> Self { 58 let mut blocksize = BlockSize::Max4MB; 59 60 for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] { 61 if buf_len > candidate.get_size() { 62 return blocksize; 63 } 64 blocksize = candidate; 65 } 66 BlockSize::Max64KB 67 } get_size(&self) -> usize68 pub(crate) fn get_size(&self) -> usize { 69 match self { 70 BlockSize::Auto => unreachable!(), 71 BlockSize::Max64KB => 64 * 1024, 72 BlockSize::Max256KB => 256 * 1024, 73 BlockSize::Max1MB => 1024 * 1024, 74 BlockSize::Max4MB => 4 * 1024 * 1024, 75 BlockSize::Max8MB => 8 * 1024 * 1024, 76 } 77 } 78 } 79 80 #[derive(Clone, Copy, PartialEq, Debug)] 81 /// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo] 82 #[derive(Default)] 83 pub enum BlockMode { 84 /// Every block is compressed independently. The default. 85 #[default] 86 Independent, 87 /// Blocks can reference data from previous blocks. 88 /// 89 /// Effective when the stream contains small blocks. 90 Linked, 91 } 92 93 // From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md 94 // 95 // General Structure of LZ4 Frame format 96 // ------------------------------------- 97 // 98 // | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum | 99 // |:-------:|:-------------:| ----- | ----- | ------- | ----------- | 100 // | 4 bytes | 3-15 bytes | | | 4 bytes | 0-4 bytes | 101 // 102 // Frame Descriptor 103 // ---------------- 104 // 105 // | FLG | BD | (Content Size) | (Dictionary ID) | HC | 106 // | ------- | ------- |:--------------:|:---------------:| ------- | 107 // | 1 byte | 1 byte | 0 - 8 bytes | 0 - 4 bytes | 1 byte | 108 // 109 // __FLG byte__ 110 // 111 // | BitNb | 7-6 | 5 | 4 | 3 | 2 | 1 | 0 | 112 // | ------- |-------|-------|----------|------|----------|----------|------| 113 // |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID| 114 // 115 // __BD byte__ 116 // 117 // | BitNb | 7 | 6-5-4 | 3-2-1-0 | 118 // | ------- | -------- | ------------- | -------- | 119 // |FieldName|*Reserved*| Block MaxSize |*Reserved*| 120 // 121 // Data Blocks 122 // ----------- 123 // 124 // | Block Size | data | (Block Checksum) | 125 // |:----------:| ------ |:----------------:| 126 // | 4 bytes | | 0 - 4 bytes | 127 // 128 #[derive(Debug, Default, Clone)] 129 /// The metadata for de/compressing with lz4 frame format. 130 pub struct FrameInfo { 131 /// If set, includes the total uncompressed size of data in the frame. 132 pub content_size: Option<u64>, 133 /// The identifier for the dictionary that must be used to correctly decode data. 134 /// The compressor and the decompressor must use exactly the same dictionary. 135 /// 136 /// Note that this is currently unsupported and for this reason it's not pub. 137 pub(crate) dict_id: Option<u32>, 138 /// The maximum uncompressed size of each data block. 139 pub block_size: BlockSize, 140 /// The block mode. 141 pub block_mode: BlockMode, 142 /// If set, includes a checksum for each data block in the frame. 143 pub block_checksums: bool, 144 /// If set, includes a content checksum to verify that the full frame contents have been 145 /// decoded correctly. 146 pub content_checksum: bool, 147 /// If set, use the legacy frame format 148 pub legacy_frame: bool, 149 } 150 151 impl FrameInfo { 152 /// Create a new `FrameInfo`. new() -> Self153 pub fn new() -> Self { 154 Self::default() 155 } 156 157 /// Whether to include the total uncompressed size of data in the frame. content_size(mut self, content_size: Option<u64>) -> Self158 pub fn content_size(mut self, content_size: Option<u64>) -> Self { 159 self.content_size = content_size; 160 self 161 } 162 163 /// The maximum uncompressed size of each data block. block_size(mut self, block_size: BlockSize) -> Self164 pub fn block_size(mut self, block_size: BlockSize) -> Self { 165 self.block_size = block_size; 166 self 167 } 168 169 /// The block mode. block_mode(mut self, block_mode: BlockMode) -> Self170 pub fn block_mode(mut self, block_mode: BlockMode) -> Self { 171 self.block_mode = block_mode; 172 self 173 } 174 175 /// If set, includes a checksum for each data block in the frame. block_checksums(mut self, block_checksums: bool) -> Self176 pub fn block_checksums(mut self, block_checksums: bool) -> Self { 177 self.block_checksums = block_checksums; 178 self 179 } 180 181 /// If set, includes a content checksum to verify that the full frame contents have been 182 /// decoded correctly. content_checksum(mut self, content_checksum: bool) -> Self183 pub fn content_checksum(mut self, content_checksum: bool) -> Self { 184 self.content_checksum = content_checksum; 185 self 186 } 187 188 /// If set, use the legacy frame format. legacy_frame(mut self, legacy_frame: bool) -> Self189 pub fn legacy_frame(mut self, legacy_frame: bool) -> Self { 190 self.legacy_frame = legacy_frame; 191 self 192 } 193 read_size(input: &[u8]) -> Result<usize, Error>194 pub(crate) fn read_size(input: &[u8]) -> Result<usize, Error> { 195 let mut required = MIN_FRAME_INFO_SIZE; 196 let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap()); 197 if magic_num == LZ4F_LEGACY_MAGIC_NUMBER { 198 return Ok(MAGIC_NUMBER_SIZE); 199 } 200 201 if input.len() < required { 202 return Ok(required); 203 } 204 205 if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) { 206 return Ok(8); 207 } 208 if magic_num != LZ4F_MAGIC_NUMBER { 209 return Err(Error::WrongMagicNumber); 210 } 211 212 if input[4] & FLG_CONTENT_SIZE != 0 { 213 required += 8; 214 } 215 if input[4] & FLG_DICTIONARY_ID != 0 { 216 required += 4 217 } 218 Ok(required) 219 } 220 write_size(&self) -> usize221 pub(crate) fn write_size(&self) -> usize { 222 let mut required = MIN_FRAME_INFO_SIZE; 223 if self.content_size.is_some() { 224 required += 8; 225 } 226 if self.dict_id.is_some() { 227 required += 4; 228 } 229 required 230 } 231 write(&self, output: &mut [u8]) -> Result<usize, Error>232 pub(crate) fn write(&self, output: &mut [u8]) -> Result<usize, Error> { 233 let write_size = self.write_size(); 234 if output.len() < write_size { 235 return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into())); 236 } 237 let mut buffer = [0u8; MAX_FRAME_INFO_SIZE]; 238 assert!(write_size <= buffer.len()); 239 buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes()); 240 buffer[4] = FLG_SUPPORTED_VERSION_BITS; 241 if self.block_checksums { 242 buffer[4] |= FLG_BLOCK_CHECKSUMS; 243 } 244 if self.content_checksum { 245 buffer[4] |= FLG_CONTENT_CHECKSUM; 246 } 247 if self.block_mode == BlockMode::Independent { 248 buffer[4] |= FLG_INDEPENDENT_BLOCKS; 249 } 250 buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT; 251 252 // Optional section 253 let mut offset = 6; 254 if let Some(size) = self.content_size { 255 buffer[4] |= FLG_CONTENT_SIZE; 256 buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes()); 257 offset += 8; 258 } 259 if let Some(dict_id) = self.dict_id { 260 buffer[4] |= FLG_DICTIONARY_ID; 261 buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes()); 262 offset += 4; 263 } 264 265 // Header checksum 266 let mut hasher = XxHash32::with_seed(0); 267 hasher.write(&buffer[4..offset]); 268 let header_checksum = (hasher.finish() >> 8) as u8; 269 buffer[offset] = header_checksum; 270 offset += 1; 271 272 debug_assert_eq!(offset, write_size); 273 output[..write_size].copy_from_slice(&buffer[..write_size]); 274 Ok(write_size) 275 } 276 read(mut input: &[u8]) -> Result<FrameInfo, Error>277 pub(crate) fn read(mut input: &[u8]) -> Result<FrameInfo, Error> { 278 let original_input = input; 279 // 4 byte Magic 280 let magic_num = { 281 let mut buffer = [0u8; 4]; 282 input.read_exact(&mut buffer)?; 283 u32::from_le_bytes(buffer) 284 }; 285 if magic_num == LZ4F_LEGACY_MAGIC_NUMBER { 286 return Ok(FrameInfo { 287 block_size: BlockSize::Max8MB, 288 legacy_frame: true, 289 ..FrameInfo::default() 290 }); 291 } 292 if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) { 293 let mut buffer = [0u8; 4]; 294 input.read_exact(&mut buffer)?; 295 let user_data_len = u32::from_le_bytes(buffer); 296 return Err(Error::SkippableFrame(user_data_len)); 297 } 298 if magic_num != LZ4F_MAGIC_NUMBER { 299 return Err(Error::WrongMagicNumber); 300 } 301 302 // fixed size section 303 let [flg_byte, bd_byte] = { 304 let mut buffer = [0u8, 0]; 305 input.read_exact(&mut buffer)?; 306 buffer 307 }; 308 309 if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS { 310 // version is always 01 311 return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK)); 312 } 313 314 if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 { 315 return Err(Error::ReservedBitsSet); 316 } 317 318 let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 { 319 BlockMode::Independent 320 } else { 321 BlockMode::Linked 322 }; 323 let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0; 324 let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0; 325 326 let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT { 327 i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)), 328 4 => BlockSize::Max64KB, 329 5 => BlockSize::Max256KB, 330 6 => BlockSize::Max1MB, 331 7 => BlockSize::Max4MB, 332 _ => unreachable!(), 333 }; 334 335 // var len section 336 let mut content_size = None; 337 if flg_byte & FLG_CONTENT_SIZE != 0 { 338 let mut buffer = [0u8; 8]; 339 input.read_exact(&mut buffer).unwrap(); 340 content_size = Some(u64::from_le_bytes(buffer)); 341 } 342 343 let mut dict_id = None; 344 if flg_byte & FLG_DICTIONARY_ID != 0 { 345 let mut buffer = [0u8; 4]; 346 input.read_exact(&mut buffer)?; 347 dict_id = Some(u32::from_le_bytes(buffer)); 348 } 349 350 // 1 byte header checksum 351 let expected_checksum = { 352 let mut buffer = [0u8; 1]; 353 input.read_exact(&mut buffer)?; 354 buffer[0] 355 }; 356 let mut hasher = XxHash32::with_seed(0); 357 hasher.write(&original_input[4..original_input.len() - input.len() - 1]); 358 let header_hash = (hasher.finish() >> 8) as u8; 359 if header_hash != expected_checksum { 360 return Err(Error::HeaderChecksumError); 361 } 362 363 Ok(FrameInfo { 364 content_size, 365 dict_id, 366 block_size, 367 block_mode, 368 block_checksums, 369 content_checksum, 370 legacy_frame: false, 371 }) 372 } 373 } 374 375 #[derive(Debug)] 376 pub(crate) enum BlockInfo { 377 Compressed(u32), 378 Uncompressed(u32), 379 EndMark, 380 } 381 382 impl BlockInfo { read(mut input: &[u8]) -> Result<Self, Error>383 pub(crate) fn read(mut input: &[u8]) -> Result<Self, Error> { 384 let mut size_buffer = [0u8; 4]; 385 input.read_exact(&mut size_buffer)?; 386 let size = u32::from_le_bytes(size_buffer); 387 if size == 0 { 388 Ok(BlockInfo::EndMark) 389 } else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 { 390 Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT)) 391 } else { 392 Ok(BlockInfo::Compressed(size)) 393 } 394 } 395 write(&self, mut output: &mut [u8]) -> Result<usize, Error>396 pub(crate) fn write(&self, mut output: &mut [u8]) -> Result<usize, Error> { 397 let value = match self { 398 BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo), 399 BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len) 400 if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 => 401 { 402 return Err(Error::InvalidBlockInfo) 403 } 404 BlockInfo::Compressed(len) => *len, 405 BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT, 406 BlockInfo::EndMark => 0, 407 }; 408 output.write_all(&value.to_le_bytes())?; 409 Ok(4) 410 } 411 } 412