1 // Copyright 2015 Ilkka Rauta 2 // 3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 6 // option. This file may not be copied, modified, or distributed 7 // except according to those terms. 8 9 //! BitReader is a helper type to extract strings of bits from a slice of bytes. 10 //! 11 //! Here is how you read first a single bit, then three bits and finally four bits from a byte 12 //! buffer: 13 //! 14 //! ``` 15 //! use bitreader::BitReader; 16 //! 17 //! let slice_of_u8 = &[0b1000_1111]; 18 //! let mut reader = BitReader::new(slice_of_u8); 19 //! 20 //! // You probably should use try! or some other error handling mechanism in real code if the 21 //! // length of the input is not known in advance. 22 //! let a_single_bit = reader.read_u8(1).unwrap(); 23 //! assert_eq!(a_single_bit, 1); 24 //! 25 //! let more_bits = reader.read_u8(3).unwrap(); 26 //! assert_eq!(more_bits, 0); 27 //! 28 //! let last_bits_of_byte = reader.read_u8(4).unwrap(); 29 //! assert_eq!(last_bits_of_byte, 0b1111); 30 //! ``` 31 //! You can naturally read bits from longer buffer of data than just a single byte. 32 //! 33 //! As you read bits, the internal cursor of BitReader moves on along the stream of bits. Big 34 //! endian format is assumed when reading the multi-byte values. BitReader supports reading maximum 35 //! of 64 bits at a time (with read_u64). Reading signed values directly is not supported at the 36 //! moment. 37 //! 38 //! The reads do not need to be aligned in any particular way. 39 //! 40 //! Reading zero bits is a no-op. 41 //! 42 //! You can also skip over a number of bits, in which case there is no arbitrary small limits like 43 //! when reading the values to a variable. However, you can not seek past the end of the slice, 44 //! either when reading or when skipping bits. 45 //! 46 //! Note that the code will likely not work correctly if the slice is longer than 2^61 bytes, but 47 //! exceeding that should be pretty unlikely. Let's get back to this when people read exabytes of 48 //! information one bit at a time. 49 #![no_std] 50 cfg_if::cfg_if!{ 51 if #[cfg(feature = "std")] { 52 extern crate std; 53 use std::cmp::min; 54 use std::prelude::v1::*; 55 use std::fmt; 56 use std::error::Error; 57 use std::result; 58 } else { 59 use core::result; 60 use core::fmt; 61 use core::cmp::min; 62 } 63 } 64 65 #[cfg(test)] 66 mod tests; 67 68 /// BitReader reads data from a byte slice at the granularity of a single bit. 69 pub struct BitReader<'a> { 70 bytes: &'a [u8], 71 /// Position from the start of the slice, counted as bits instead of bytes 72 position: u64, 73 relative_offset: u64, 74 75 /// Length this reader is allowed to read from the slice, counted as bits instead of bytes. 76 length: u64, 77 } 78 79 impl<'a> BitReader<'a> { 80 /// Construct a new BitReader from a byte slice. The returned reader lives at most as long as 81 /// the slice given to is valid. 82 #[inline] new(bytes: &'a [u8]) -> BitReader<'a>83 pub fn new(bytes: &'a [u8]) -> BitReader<'a> { 84 BitReader { 85 bytes: bytes, 86 position: 0, 87 relative_offset: 0, 88 length: bytes.len() as u64 * 8, 89 } 90 } 91 92 /// Returns a copy of current BitReader, with the difference that its position() returns 93 /// positions relative to the position of the original BitReader at the construction time. 94 /// After construction, both readers are otherwise completely independent, except of course 95 /// for sharing the same source data. 96 /// 97 /// ``` 98 /// use bitreader::BitReader; 99 /// 100 /// let bytes = &[0b11110000, 0b00001111]; 101 /// let mut original = BitReader::new(bytes); 102 /// assert_eq!(original.read_u8(4).unwrap(), 0b1111); 103 /// assert_eq!(original.position(), 4); 104 /// 105 /// let mut relative = original.relative_reader(); 106 /// assert_eq!(relative.position(), 0); 107 /// 108 /// assert_eq!(original.read_u8(8).unwrap(), 0); 109 /// assert_eq!(relative.read_u8(8).unwrap(), 0); 110 /// 111 /// assert_eq!(original.position(), 12); 112 /// assert_eq!(relative.position(), 8); 113 /// ``` 114 #[inline] relative_reader(&self) -> BitReader<'a>115 pub fn relative_reader(&self) -> BitReader<'a> { 116 BitReader { 117 bytes: self.bytes, 118 position: self.position, 119 relative_offset: self.position, 120 length: self.length - self.position(), 121 } 122 } 123 124 /// Returns a copy of current BitReader, with the difference that its position() returns 125 /// positions relative to the position of the original BitReader at the construction time, and 126 /// will not allow reading more than len bits. After construction, both readers are otherwise 127 // completely independent, except of course for sharing the same source data. 128 /// 129 /// ``` 130 /// use bitreader::BitReader; 131 /// use bitreader::BitReaderError; 132 /// 133 /// let bytes = &[0b11110000, 0b00001111]; 134 /// let mut original = BitReader::new(bytes); 135 /// assert_eq!(original.read_u8(4).unwrap(), 0b1111); 136 /// assert_eq!(original.position(), 4); 137 /// 138 /// let mut relative = original.relative_reader_atmost(8); 139 /// assert_eq!(relative.position(), 0); 140 /// 141 /// assert_eq!(original.read_u8(8).unwrap(), 0); 142 /// assert_eq!(relative.read_u8(8).unwrap(), 0); 143 /// 144 /// assert_eq!(original.position(), 12); 145 /// assert_eq!(relative.position(), 8); 146 /// 147 /// assert_eq!(relative.read_u8(8).unwrap_err(), BitReaderError::NotEnoughData{ 148 /// position: 8, 149 /// length: 8, 150 /// requested: 8 151 /// }); 152 /// ``` 153 #[inline] relative_reader_atmost(&self, len: u64) -> BitReader<'a>154 pub fn relative_reader_atmost(&self, len: u64) -> BitReader<'a> { 155 BitReader { 156 bytes: self.bytes, 157 position: self.position, 158 relative_offset: self.position, 159 length: min(self.length - self.position(), len), 160 } 161 } 162 163 /// Read at most 8 bits into a u8. 164 #[inline] read_u8(&mut self, bit_count: u8) -> Result<u8>165 pub fn read_u8(&mut self, bit_count: u8) -> Result<u8> { 166 let value = self.read_value(bit_count, 8)?; 167 Ok((value & 0xff) as u8) 168 } 169 170 /// Read at most 8 bits into a u8, but without moving the cursor forward. 171 #[inline] peek_u8(&self, bit_count: u8) -> Result<u8>172 pub fn peek_u8(&self, bit_count: u8) -> Result<u8> { 173 self.relative_reader().read_u8(bit_count) 174 } 175 176 /// Fills the entire `output_bytes` slice. If there aren't enough bits remaining 177 /// after the internal cursor's current position, the cursor won't be moved forward 178 /// and the contents of `output_bytes` won't be modified. read_u8_slice(&mut self, output_bytes: &mut [u8]) -> Result<()>179 pub fn read_u8_slice(&mut self, output_bytes: &mut [u8]) -> Result<()> { 180 let requested = output_bytes.len() as u64 * 8; 181 if requested > self.remaining() { 182 Err(BitReaderError::NotEnoughData { 183 position: self.position(), 184 length: self.length, 185 requested, 186 }) 187 } else { 188 for byte in output_bytes.iter_mut() { 189 *byte = self.read_u8(8)?; 190 } 191 Ok(()) 192 } 193 } 194 195 /// Read at most 16 bits into a u16. 196 #[inline] read_u16(&mut self, bit_count: u8) -> Result<u16>197 pub fn read_u16(&mut self, bit_count: u8) -> Result<u16> { 198 let value = self.read_value(bit_count, 16)?; 199 Ok((value & 0xffff) as u16) 200 } 201 202 /// Read at most 16 bits into a u16, but without moving the cursor forward. 203 #[inline] peek_u16(&self, bit_count: u8) -> Result<u16>204 pub fn peek_u16(&self, bit_count: u8) -> Result<u16> { 205 self.relative_reader().read_u16(bit_count) 206 } 207 208 /// Read at most 32 bits into a u32. 209 #[inline] read_u32(&mut self, bit_count: u8) -> Result<u32>210 pub fn read_u32(&mut self, bit_count: u8) -> Result<u32> { 211 let value = self.read_value(bit_count, 32)?; 212 Ok((value & 0xffffffff) as u32) 213 } 214 215 /// Read at most 32 bits into a u32, but without moving the cursor forward. 216 #[inline] peek_u32(&self, bit_count: u8) -> Result<u32>217 pub fn peek_u32(&self, bit_count: u8) -> Result<u32> { 218 self.relative_reader().read_u32(bit_count) 219 } 220 221 /// Read at most 64 bits into a u64. 222 #[inline] read_u64(&mut self, bit_count: u8) -> Result<u64>223 pub fn read_u64(&mut self, bit_count: u8) -> Result<u64> { 224 let value = self.read_value(bit_count, 64)?; 225 Ok(value) 226 } 227 228 /// Read at most 64 bits into a u64, but without moving the cursor forward. 229 #[inline] peek_u64(&self, bit_count: u8) -> Result<u64>230 pub fn peek_u64(&self, bit_count: u8) -> Result<u64> { 231 self.relative_reader().read_u64(bit_count) 232 } 233 234 /// Read at most 8 bits into a i8. 235 /// Assumes the bits are stored in two's complement format. 236 #[inline] read_i8(&mut self, bit_count: u8) -> Result<i8>237 pub fn read_i8(&mut self, bit_count: u8) -> Result<i8> { 238 let value = self.read_signed_value(bit_count, 8)?; 239 Ok((value & 0xff) as i8) 240 } 241 242 /// Read at most 16 bits into a i16. 243 /// Assumes the bits are stored in two's complement format. 244 #[inline] read_i16(&mut self, bit_count: u8) -> Result<i16>245 pub fn read_i16(&mut self, bit_count: u8) -> Result<i16> { 246 let value = self.read_signed_value(bit_count, 16)?; 247 Ok((value & 0xffff) as i16) 248 } 249 250 /// Read at most 32 bits into a i32. 251 /// Assumes the bits are stored in two's complement format. 252 #[inline] read_i32(&mut self, bit_count: u8) -> Result<i32>253 pub fn read_i32(&mut self, bit_count: u8) -> Result<i32> { 254 let value = self.read_signed_value(bit_count, 32)?; 255 Ok((value & 0xffffffff) as i32) 256 } 257 258 /// Read at most 64 bits into a i64. 259 /// Assumes the bits are stored in two's complement format. 260 #[inline] read_i64(&mut self, bit_count: u8) -> Result<i64>261 pub fn read_i64(&mut self, bit_count: u8) -> Result<i64> { 262 let value = self.read_signed_value(bit_count, 64)?; 263 Ok(value) 264 } 265 266 /// Read a single bit as a boolean value. 267 /// Interprets 1 as true and 0 as false. 268 #[inline] read_bool(&mut self) -> Result<bool>269 pub fn read_bool(&mut self) -> Result<bool> { 270 match self.read_value(1, 1)? { 271 0 => Ok(false), 272 _ => Ok(true), 273 } 274 } 275 276 /// Read a single bit as a boolean value, but without moving the cursor forward. 277 /// Interprets 1 as true and 0 as false. 278 #[inline] peek_bool(&self) -> Result<bool>279 pub fn peek_bool(&self) -> Result<bool> { 280 self.relative_reader().read_bool() 281 } 282 283 /// Skip arbitrary number of bits. However, you can skip at most to the end of the byte slice. skip(&mut self, bit_count: u64) -> Result<()>284 pub fn skip(&mut self, bit_count: u64) -> Result<()> { 285 let end_position = self.position + bit_count; 286 if end_position > (self.relative_offset + self.length) { 287 return Err(BitReaderError::NotEnoughData { 288 position: self.position(), 289 length: self.length, 290 requested: bit_count, 291 }); 292 } 293 self.position = end_position; 294 Ok(()) 295 } 296 297 /// Returns the position of the cursor, or how many bits have been read so far. 298 #[inline] position(&self) -> u64299 pub fn position(&self) -> u64 { 300 self.position - self.relative_offset 301 } 302 303 /// Returns the number of bits not yet read from the underlying slice. 304 #[inline] remaining(&self) -> u64305 pub fn remaining(&self) -> u64 { 306 self.length - self.position() 307 } 308 309 /// Helper to make sure the "bit cursor" is exactly at the beginning of a byte, or at specific 310 /// multi-byte alignment position. 311 /// 312 /// For example `reader.is_aligned(1)` returns true if exactly n bytes, or n * 8 bits, has been 313 /// read. Similarly, `reader.is_aligned(4)` returns true if exactly n * 32 bits, or n 4-byte 314 /// sequences has been read. 315 /// 316 /// This function can be used to validate the data is being read properly, for example by 317 /// adding invocations wrapped into `debug_assert!()` to places where it is known the data 318 /// should be n-byte aligned. 319 #[inline] is_aligned(&self, alignment_bytes: u32) -> bool320 pub fn is_aligned(&self, alignment_bytes: u32) -> bool { 321 self.position % (alignment_bytes as u64 * 8) == 0 322 } 323 324 /// Helper to move the "bit cursor" to exactly the beginning of a byte, or to a specific 325 /// multi-byte alignment position. 326 /// 327 /// That is, `reader.align(n)` moves the cursor to the next position that 328 /// is a multiple of n * 8 bits, if it's not correctly aligned already. align(&mut self, alignment_bytes: u32) -> Result<()>329 pub fn align(&mut self, alignment_bytes: u32) -> Result<()> { 330 let alignment_bits = alignment_bytes as u64 * 8; 331 let cur_alignment = self.position % alignment_bits; 332 let bits_to_skip = (alignment_bits - cur_alignment) % alignment_bits; 333 self.skip(bits_to_skip) 334 } 335 336 #[inline] read_signed_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<i64>337 fn read_signed_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<i64> { 338 if bit_count == 0 { 339 return Ok(0); 340 } 341 let unsigned = self.read_value(bit_count, maximum_count)?; 342 // Fill the bits above the requested bits with all ones or all zeros, 343 // depending on the sign bit. 344 let sign_bit = unsigned >> (bit_count - 1) & 1; 345 let high_bits = if sign_bit == 1 { -1 } else { 0 }; 346 if bit_count == 64 { 347 // Avoid left-shift-with-overflow exception 348 return Ok(unsigned as i64); 349 } 350 Ok(high_bits << bit_count | unsigned as i64) 351 } 352 353 #[cold] too_many_bits_for_type_error(&self, bit_count: u8, maximum_count: u8) -> Result<u64>354 fn too_many_bits_for_type_error(&self, bit_count: u8, maximum_count: u8) -> Result<u64> { 355 Err(BitReaderError::TooManyBitsForType { 356 position: self.position, 357 requested: bit_count, 358 allowed: maximum_count, 359 }) 360 } 361 362 #[inline] 363 // Inlining allows the size checks to be eliminated for constant values read_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<u64>364 fn read_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<u64> { 365 if bit_count == 0 { 366 return Ok(0); 367 } 368 if bit_count > maximum_count { 369 return self.too_many_bits_for_type_error(bit_count, maximum_count); 370 } 371 self.read_bits(bit_count) 372 } 373 read_bits(&mut self, bit_count: u8) -> Result<u64>374 fn read_bits(&mut self, bit_count: u8) -> Result<u64> { 375 let start_position = self.position; 376 let end_position = self.position + bit_count as u64; 377 if end_position > (self.relative_offset + self.length) { 378 return Err(BitReaderError::NotEnoughData { 379 position: self.position(), 380 length: self.length, 381 requested: bit_count as u64, 382 }); 383 } 384 385 let mut value: u64 = 0; 386 387 for i in start_position..end_position { 388 let byte_index = (i / 8) as usize; 389 // This will never fail, but prevents Rust from generating panic code 390 let byte = if let Some(byte) = self.bytes.get(byte_index).copied() { 391 byte 392 } else { 393 break; 394 }; 395 let shift = 7 - (i % 8); 396 let bit = (byte >> shift) as u64 & 1; 397 value = (value << 1) | bit; 398 } 399 400 self.position = end_position; 401 Ok(value) 402 } 403 } 404 405 /// Result type for those BitReader operations that can fail. 406 pub type Result<T> = result::Result<T, BitReaderError>; 407 408 /// Error enumeration of BitReader errors. 409 #[derive(Debug,PartialEq,Copy,Clone)] 410 pub enum BitReaderError { 411 /// Requested more bits than there are left in the byte slice at the current position. 412 NotEnoughData { 413 /// Current posititon in bits relative to the beginning of the reader. 414 position: u64, 415 416 /// Total readable length in bits of the underlaying slice. 417 length: u64, 418 419 /// Bits requested to be read. 420 requested: u64, 421 }, 422 /// Requested more bits than the returned variable can hold, for example more than 8 bits when 423 /// reading into a u8. 424 TooManyBitsForType { 425 position: u64, 426 requested: u8, 427 allowed: u8, 428 } 429 } 430 431 #[cfg(feature = "std")] 432 impl Error for BitReaderError { description(&self) -> &str433 fn description(&self) -> &str { 434 match *self { 435 BitReaderError::NotEnoughData {..} => "Requested more bits than the byte slice has left", 436 BitReaderError::TooManyBitsForType {..} => "Requested more bits than the requested integer type can hold", 437 } 438 } 439 } 440 441 impl fmt::Display for BitReaderError { fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result442 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 443 //self.description().fmt(fmt) 444 match *self { 445 BitReaderError::NotEnoughData { position, length, requested } => write!(fmt, "BitReader: Requested {} bits with only {}/{} bits left (position {})", requested, length - position, length, position), 446 BitReaderError::TooManyBitsForType { position, requested, allowed } => write!(fmt, "BitReader: Requested {} bits while the type can only hold {} (position {})", requested, allowed, position), 447 } 448 } 449 } 450 451 /// Helper trait to allow reading bits into a variable without explicitly mentioning its type. 452 /// 453 /// If you can't or want, for some reason, to use BitReader's read methods (`read_u8` etc.) but 454 /// want to rely on type inference instead, you can use the ReadInto trait. The trait is 455 /// implemented for all basic integer types (8/16/32/64 bits, signed/unsigned) 456 /// and the boolean type. 457 /// 458 /// ``` 459 /// use bitreader::{BitReader,ReadInto}; 460 /// 461 /// let slice_of_u8 = &[0b1110_0000]; 462 /// let mut reader = BitReader::new(slice_of_u8); 463 /// 464 /// struct Foo { 465 /// bar: u8, 466 /// valid: bool, 467 /// } 468 /// 469 /// // No type mentioned here, instead the type of bits is inferred from the type of Foo::bar, 470 /// // and consequently the correct "overload" is used. 471 /// let bits = ReadInto::read(&mut reader, 2).unwrap(); 472 /// let valid = ReadInto::read(&mut reader, 1).unwrap(); 473 /// 474 /// let foo = Foo { bar: bits, valid: valid }; 475 /// assert_eq!(foo.bar, 3); 476 /// assert!(foo.valid); 477 /// ``` 478 pub trait ReadInto 479 where Self: Sized 480 { read(reader: &mut BitReader, bits: u8) -> Result<Self>481 fn read(reader: &mut BitReader, bits: u8) -> Result<Self>; 482 } 483 484 // There's eight almost identical implementations, let's make this easier. 485 macro_rules! impl_read_into { 486 ($T:ty, $method:ident) => ( 487 impl ReadInto for $T { 488 fn read(reader: &mut BitReader, bits: u8) -> Result<Self> { 489 reader.$method(bits) 490 } 491 } 492 ) 493 } 494 495 impl_read_into!(u8, read_u8); 496 impl_read_into!(u16, read_u16); 497 impl_read_into!(u32, read_u32); 498 impl_read_into!(u64, read_u64); 499 500 impl_read_into!(i8, read_i8); 501 impl_read_into!(i16, read_i16); 502 impl_read_into!(i32, read_i32); 503 impl_read_into!(i64, read_i64); 504 505 // We can't cast to bool, so this requires a separate method. 506 impl ReadInto for bool { read(reader: &mut BitReader, bits: u8) -> Result<Self>507 fn read(reader: &mut BitReader, bits: u8) -> Result<Self> { 508 match reader.read_u8(bits)? { 509 0 => Ok(false), 510 _ => Ok(true), 511 } 512 } 513 } 514