• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 Ilkka Rauta
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
8 
9 //! BitReader is a helper type to extract strings of bits from a slice of bytes.
10 //!
11 //! Here is how you read first a single bit, then three bits and finally four bits from a byte
12 //! buffer:
13 //!
14 //! ```
15 //! use bitreader::BitReader;
16 //!
17 //! let slice_of_u8 = &[0b1000_1111];
18 //! let mut reader = BitReader::new(slice_of_u8);
19 //!
20 //! // You probably should use try! or some other error handling mechanism in real code if the
21 //! // length of the input is not known in advance.
22 //! let a_single_bit = reader.read_u8(1).unwrap();
23 //! assert_eq!(a_single_bit, 1);
24 //!
25 //! let more_bits = reader.read_u8(3).unwrap();
26 //! assert_eq!(more_bits, 0);
27 //!
28 //! let last_bits_of_byte = reader.read_u8(4).unwrap();
29 //! assert_eq!(last_bits_of_byte, 0b1111);
30 //! ```
31 //! You can naturally read bits from longer buffer of data than just a single byte.
32 //!
33 //! As you read bits, the internal cursor of BitReader moves on along the stream of bits. Big
34 //! endian format is assumed when reading the multi-byte values. BitReader supports reading maximum
35 //! of 64 bits at a time (with read_u64). Reading signed values directly is not supported at the
36 //! moment.
37 //!
38 //! The reads do not need to be aligned in any particular way.
39 //!
40 //! Reading zero bits is a no-op.
41 //!
42 //! You can also skip over a number of bits, in which case there is no arbitrary small limits like
43 //! when reading the values to a variable. However, you can not seek past the end of the slice,
44 //! either when reading or when skipping bits.
45 //!
46 //! Note that the code will likely not work correctly if the slice is longer than 2^61 bytes, but
47 //! exceeding that should be pretty unlikely. Let's get back to this when people read exabytes of
48 //! information one bit at a time.
49 #![no_std]
50 cfg_if::cfg_if!{
51     if #[cfg(feature = "std")] {
52         extern crate std;
53         use std::cmp::min;
54         use std::prelude::v1::*;
55         use std::fmt;
56         use std::error::Error;
57         use std::result;
58     } else {
59         use core::result;
60         use core::fmt;
61         use core::cmp::min;
62     }
63 }
64 
65 #[cfg(test)]
66 mod tests;
67 
68 /// BitReader reads data from a byte slice at the granularity of a single bit.
69 pub struct BitReader<'a> {
70     bytes: &'a [u8],
71     /// Position from the start of the slice, counted as bits instead of bytes
72     position: u64,
73     relative_offset: u64,
74 
75     /// Length this reader is allowed to read from the slice, counted as bits instead of bytes.
76     length: u64,
77 }
78 
79 impl<'a> BitReader<'a> {
80     /// Construct a new BitReader from a byte slice. The returned reader lives at most as long as
81     /// the slice given to is valid.
82     #[inline]
new(bytes: &'a [u8]) -> BitReader<'a>83     pub fn new(bytes: &'a [u8]) -> BitReader<'a> {
84         BitReader {
85             bytes: bytes,
86             position: 0,
87             relative_offset: 0,
88             length: bytes.len() as u64 * 8,
89         }
90     }
91 
92     /// Returns a copy of current BitReader, with the difference that its position() returns
93     /// positions relative to the position of the original BitReader at the construction time.
94     /// After construction, both readers are otherwise completely independent, except of course
95     /// for sharing the same source data.
96     ///
97     /// ```
98     /// use bitreader::BitReader;
99     ///
100     /// let bytes = &[0b11110000, 0b00001111];
101     /// let mut original = BitReader::new(bytes);
102     /// assert_eq!(original.read_u8(4).unwrap(), 0b1111);
103     /// assert_eq!(original.position(), 4);
104     ///
105     /// let mut relative = original.relative_reader();
106     /// assert_eq!(relative.position(), 0);
107     ///
108     /// assert_eq!(original.read_u8(8).unwrap(), 0);
109     /// assert_eq!(relative.read_u8(8).unwrap(), 0);
110     ///
111     /// assert_eq!(original.position(), 12);
112     /// assert_eq!(relative.position(), 8);
113     /// ```
114     #[inline]
relative_reader(&self) -> BitReader<'a>115     pub fn relative_reader(&self) -> BitReader<'a> {
116         BitReader {
117             bytes: self.bytes,
118             position: self.position,
119             relative_offset: self.position,
120             length: self.length - self.position(),
121         }
122     }
123 
124     /// Returns a copy of current BitReader, with the difference that its position() returns
125     /// positions relative to the position of the original BitReader at the construction time, and
126     /// will not allow reading more than len bits. After construction, both readers are otherwise
127     // completely independent, except of course for sharing the same source data.
128     ///
129     /// ```
130     /// use bitreader::BitReader;
131     /// use bitreader::BitReaderError;
132     ///
133     /// let bytes = &[0b11110000, 0b00001111];
134     /// let mut original = BitReader::new(bytes);
135     /// assert_eq!(original.read_u8(4).unwrap(), 0b1111);
136     /// assert_eq!(original.position(), 4);
137     ///
138     /// let mut relative = original.relative_reader_atmost(8);
139     /// assert_eq!(relative.position(), 0);
140     ///
141     /// assert_eq!(original.read_u8(8).unwrap(), 0);
142     /// assert_eq!(relative.read_u8(8).unwrap(), 0);
143     ///
144     /// assert_eq!(original.position(), 12);
145     /// assert_eq!(relative.position(), 8);
146     ///
147     /// assert_eq!(relative.read_u8(8).unwrap_err(), BitReaderError::NotEnoughData{
148     ///    position: 8,
149     ///    length: 8,
150     ///    requested: 8
151     /// });
152     /// ```
153     #[inline]
relative_reader_atmost(&self, len: u64) -> BitReader<'a>154     pub fn relative_reader_atmost(&self, len: u64) -> BitReader<'a> {
155         BitReader {
156             bytes: self.bytes,
157             position: self.position,
158             relative_offset: self.position,
159             length: min(self.length - self.position(), len),
160         }
161     }
162 
163     /// Read at most 8 bits into a u8.
164     #[inline]
read_u8(&mut self, bit_count: u8) -> Result<u8>165     pub fn read_u8(&mut self, bit_count: u8) -> Result<u8> {
166         let value = self.read_value(bit_count, 8)?;
167         Ok((value & 0xff) as u8)
168     }
169 
170     /// Read at most 8 bits into a u8, but without moving the cursor forward.
171     #[inline]
peek_u8(&self, bit_count: u8) -> Result<u8>172     pub fn peek_u8(&self, bit_count: u8) -> Result<u8> {
173         self.relative_reader().read_u8(bit_count)
174     }
175 
176     /// Fills the entire `output_bytes` slice. If there aren't enough bits remaining
177     /// after the internal cursor's current position, the cursor won't be moved forward
178     /// and the contents of `output_bytes` won't be modified.
read_u8_slice(&mut self, output_bytes: &mut [u8]) -> Result<()>179     pub fn read_u8_slice(&mut self, output_bytes: &mut [u8]) -> Result<()> {
180         let requested = output_bytes.len() as u64 * 8;
181         if requested > self.remaining() {
182             Err(BitReaderError::NotEnoughData {
183                 position: self.position(),
184                 length: self.length,
185                 requested,
186             })
187         } else {
188             for byte in output_bytes.iter_mut() {
189                 *byte = self.read_u8(8)?;
190             }
191             Ok(())
192         }
193     }
194 
195     /// Read at most 16 bits into a u16.
196     #[inline]
read_u16(&mut self, bit_count: u8) -> Result<u16>197     pub fn read_u16(&mut self, bit_count: u8) -> Result<u16> {
198         let value = self.read_value(bit_count, 16)?;
199         Ok((value & 0xffff) as u16)
200     }
201 
202     /// Read at most 16 bits into a u16, but without moving the cursor forward.
203     #[inline]
peek_u16(&self, bit_count: u8) -> Result<u16>204     pub fn peek_u16(&self, bit_count: u8) -> Result<u16> {
205         self.relative_reader().read_u16(bit_count)
206     }
207 
208     /// Read at most 32 bits into a u32.
209     #[inline]
read_u32(&mut self, bit_count: u8) -> Result<u32>210     pub fn read_u32(&mut self, bit_count: u8) -> Result<u32> {
211         let value = self.read_value(bit_count, 32)?;
212         Ok((value & 0xffffffff) as u32)
213     }
214 
215     /// Read at most 32 bits into a u32, but without moving the cursor forward.
216     #[inline]
peek_u32(&self, bit_count: u8) -> Result<u32>217     pub fn peek_u32(&self, bit_count: u8) -> Result<u32> {
218         self.relative_reader().read_u32(bit_count)
219     }
220 
221     /// Read at most 64 bits into a u64.
222     #[inline]
read_u64(&mut self, bit_count: u8) -> Result<u64>223     pub fn read_u64(&mut self, bit_count: u8) -> Result<u64> {
224         let value = self.read_value(bit_count, 64)?;
225         Ok(value)
226     }
227 
228     /// Read at most 64 bits into a u64, but without moving the cursor forward.
229     #[inline]
peek_u64(&self, bit_count: u8) -> Result<u64>230     pub fn peek_u64(&self, bit_count: u8) -> Result<u64> {
231         self.relative_reader().read_u64(bit_count)
232     }
233 
234     /// Read at most 8 bits into a i8.
235     /// Assumes the bits are stored in two's complement format.
236     #[inline]
read_i8(&mut self, bit_count: u8) -> Result<i8>237     pub fn read_i8(&mut self, bit_count: u8) -> Result<i8> {
238         let value = self.read_signed_value(bit_count, 8)?;
239         Ok((value & 0xff) as i8)
240     }
241 
242     /// Read at most 16 bits into a i16.
243     /// Assumes the bits are stored in two's complement format.
244     #[inline]
read_i16(&mut self, bit_count: u8) -> Result<i16>245     pub fn read_i16(&mut self, bit_count: u8) -> Result<i16> {
246         let value = self.read_signed_value(bit_count, 16)?;
247         Ok((value & 0xffff) as i16)
248     }
249 
250     /// Read at most 32 bits into a i32.
251     /// Assumes the bits are stored in two's complement format.
252     #[inline]
read_i32(&mut self, bit_count: u8) -> Result<i32>253     pub fn read_i32(&mut self, bit_count: u8) -> Result<i32> {
254         let value = self.read_signed_value(bit_count, 32)?;
255         Ok((value & 0xffffffff) as i32)
256     }
257 
258     /// Read at most 64 bits into a i64.
259     /// Assumes the bits are stored in two's complement format.
260     #[inline]
read_i64(&mut self, bit_count: u8) -> Result<i64>261     pub fn read_i64(&mut self, bit_count: u8) -> Result<i64> {
262         let value = self.read_signed_value(bit_count, 64)?;
263         Ok(value)
264     }
265 
266     /// Read a single bit as a boolean value.
267     /// Interprets 1 as true and 0 as false.
268     #[inline]
read_bool(&mut self) -> Result<bool>269     pub fn read_bool(&mut self) -> Result<bool> {
270         match self.read_value(1, 1)? {
271             0 => Ok(false),
272             _ => Ok(true),
273         }
274     }
275 
276     /// Read a single bit as a boolean value, but without moving the cursor forward.
277     /// Interprets 1 as true and 0 as false.
278     #[inline]
peek_bool(&self) -> Result<bool>279     pub fn peek_bool(&self) -> Result<bool> {
280         self.relative_reader().read_bool()
281     }
282 
283     /// Skip arbitrary number of bits. However, you can skip at most to the end of the byte slice.
skip(&mut self, bit_count: u64) -> Result<()>284     pub fn skip(&mut self, bit_count: u64) -> Result<()> {
285         let end_position = self.position + bit_count;
286         if end_position > (self.relative_offset + self.length) {
287             return Err(BitReaderError::NotEnoughData {
288                 position: self.position(),
289                 length: self.length,
290                 requested: bit_count,
291             });
292         }
293         self.position = end_position;
294         Ok(())
295     }
296 
297     /// Returns the position of the cursor, or how many bits have been read so far.
298     #[inline]
position(&self) -> u64299     pub fn position(&self) -> u64 {
300         self.position - self.relative_offset
301     }
302 
303     /// Returns the number of bits not yet read from the underlying slice.
304     #[inline]
remaining(&self) -> u64305     pub fn remaining(&self) -> u64 {
306         self.length - self.position()
307     }
308 
309     /// Helper to make sure the "bit cursor" is exactly at the beginning of a byte, or at specific
310     /// multi-byte alignment position.
311     ///
312     /// For example `reader.is_aligned(1)` returns true if exactly n bytes, or n * 8 bits, has been
313     /// read. Similarly, `reader.is_aligned(4)` returns true if exactly n * 32 bits, or n 4-byte
314     /// sequences has been read.
315     ///
316     /// This function can be used to validate the data is being read properly, for example by
317     /// adding invocations wrapped into `debug_assert!()` to places where it is known the data
318     /// should be n-byte aligned.
319     #[inline]
is_aligned(&self, alignment_bytes: u32) -> bool320     pub fn is_aligned(&self, alignment_bytes: u32) -> bool {
321         self.position % (alignment_bytes as u64 * 8) == 0
322     }
323 
324     /// Helper to move the "bit cursor" to exactly the beginning of a byte, or to a specific
325     /// multi-byte alignment position.
326     ///
327     /// That is, `reader.align(n)` moves the cursor to the next position that
328     /// is a multiple of n * 8 bits, if it's not correctly aligned already.
align(&mut self, alignment_bytes: u32) -> Result<()>329     pub fn align(&mut self, alignment_bytes: u32) -> Result<()> {
330         let alignment_bits = alignment_bytes as u64 * 8;
331         let cur_alignment = self.position % alignment_bits;
332         let bits_to_skip = (alignment_bits - cur_alignment) % alignment_bits;
333         self.skip(bits_to_skip)
334     }
335 
336     #[inline]
read_signed_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<i64>337     fn read_signed_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<i64> {
338         if bit_count == 0 {
339             return Ok(0);
340         }
341         let unsigned = self.read_value(bit_count, maximum_count)?;
342         // Fill the bits above the requested bits with all ones or all zeros,
343         // depending on the sign bit.
344         let sign_bit = unsigned >> (bit_count - 1) & 1;
345         let high_bits = if sign_bit == 1 { -1 } else { 0 };
346         if bit_count == 64 {
347             // Avoid left-shift-with-overflow exception
348             return Ok(unsigned as i64);
349         }
350         Ok(high_bits << bit_count | unsigned as i64)
351     }
352 
353     #[cold]
too_many_bits_for_type_error(&self, bit_count: u8, maximum_count: u8) -> Result<u64>354     fn too_many_bits_for_type_error(&self, bit_count: u8, maximum_count: u8) -> Result<u64> {
355         Err(BitReaderError::TooManyBitsForType {
356             position: self.position,
357             requested: bit_count,
358             allowed: maximum_count,
359         })
360     }
361 
362     #[inline]
363     // Inlining allows the size checks to be eliminated for constant values
read_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<u64>364     fn read_value(&mut self, bit_count: u8, maximum_count: u8) -> Result<u64> {
365         if bit_count == 0 {
366             return Ok(0);
367         }
368         if bit_count > maximum_count {
369             return self.too_many_bits_for_type_error(bit_count, maximum_count);
370         }
371         self.read_bits(bit_count)
372     }
373 
read_bits(&mut self, bit_count: u8) -> Result<u64>374     fn read_bits(&mut self, bit_count: u8) -> Result<u64> {
375         let start_position = self.position;
376         let end_position = self.position + bit_count as u64;
377         if end_position > (self.relative_offset + self.length) {
378             return Err(BitReaderError::NotEnoughData {
379                 position: self.position(),
380                 length: self.length,
381                 requested: bit_count as u64,
382             });
383         }
384 
385         let mut value: u64 = 0;
386 
387         for i in start_position..end_position {
388             let byte_index = (i / 8) as usize;
389             // This will never fail, but prevents Rust from generating panic code
390             let byte = if let Some(byte) = self.bytes.get(byte_index).copied() {
391                 byte
392             } else {
393                 break;
394             };
395             let shift = 7 - (i % 8);
396             let bit = (byte >> shift) as u64 & 1;
397             value = (value << 1) | bit;
398         }
399 
400         self.position = end_position;
401         Ok(value)
402     }
403 }
404 
405 /// Result type for those BitReader operations that can fail.
406 pub type Result<T> = result::Result<T, BitReaderError>;
407 
408 /// Error enumeration of BitReader errors.
409 #[derive(Debug,PartialEq,Copy,Clone)]
410 pub enum BitReaderError {
411     /// Requested more bits than there are left in the byte slice at the current position.
412     NotEnoughData {
413         /// Current posititon in bits relative to the beginning of the reader.
414         position: u64,
415 
416         /// Total readable length in bits of the underlaying slice.
417         length: u64,
418 
419         /// Bits requested to be read.
420         requested: u64,
421     },
422     /// Requested more bits than the returned variable can hold, for example more than 8 bits when
423     /// reading into a u8.
424     TooManyBitsForType {
425         position: u64,
426         requested: u8,
427         allowed: u8,
428     }
429 }
430 
431 #[cfg(feature = "std")]
432 impl Error for BitReaderError {
description(&self) -> &str433     fn description(&self) -> &str {
434         match *self {
435             BitReaderError::NotEnoughData {..} => "Requested more bits than the byte slice has left",
436             BitReaderError::TooManyBitsForType {..} => "Requested more bits than the requested integer type can hold",
437         }
438     }
439 }
440 
441 impl fmt::Display for BitReaderError {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result442     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
443         //self.description().fmt(fmt)
444         match *self {
445             BitReaderError::NotEnoughData { position, length, requested } => write!(fmt, "BitReader: Requested {} bits with only {}/{} bits left (position {})", requested, length - position, length, position),
446             BitReaderError::TooManyBitsForType { position, requested, allowed } => write!(fmt, "BitReader: Requested {} bits while the type can only hold {} (position {})", requested, allowed, position),
447         }
448     }
449 }
450 
451 /// Helper trait to allow reading bits into a variable without explicitly mentioning its type.
452 ///
453 /// If you can't or want, for some reason, to use BitReader's read methods (`read_u8` etc.) but
454 /// want to rely on type inference instead, you can use the ReadInto trait. The trait is
455 /// implemented for all basic integer types (8/16/32/64 bits, signed/unsigned)
456 /// and the boolean type.
457 ///
458 /// ```
459 /// use bitreader::{BitReader,ReadInto};
460 ///
461 /// let slice_of_u8 = &[0b1110_0000];
462 /// let mut reader = BitReader::new(slice_of_u8);
463 ///
464 /// struct Foo {
465 ///     bar: u8,
466 ///     valid: bool,
467 /// }
468 ///
469 /// // No type mentioned here, instead the type of bits is inferred from the type of Foo::bar,
470 /// // and consequently the correct "overload" is used.
471 /// let bits = ReadInto::read(&mut reader, 2).unwrap();
472 /// let valid = ReadInto::read(&mut reader, 1).unwrap();
473 ///
474 /// let foo = Foo { bar: bits, valid: valid };
475 /// assert_eq!(foo.bar, 3);
476 /// assert!(foo.valid);
477 /// ```
478 pub trait ReadInto
479     where Self: Sized
480 {
read(reader: &mut BitReader, bits: u8) -> Result<Self>481     fn read(reader: &mut BitReader, bits: u8) -> Result<Self>;
482 }
483 
484 // There's eight almost identical implementations, let's make this easier.
485 macro_rules! impl_read_into {
486     ($T:ty, $method:ident) => (
487         impl ReadInto for $T {
488             fn read(reader: &mut BitReader, bits: u8) -> Result<Self> {
489                 reader.$method(bits)
490             }
491         }
492     )
493 }
494 
495 impl_read_into!(u8, read_u8);
496 impl_read_into!(u16, read_u16);
497 impl_read_into!(u32, read_u32);
498 impl_read_into!(u64, read_u64);
499 
500 impl_read_into!(i8, read_i8);
501 impl_read_into!(i16, read_i16);
502 impl_read_into!(i32, read_i32);
503 impl_read_into!(i64, read_i64);
504 
505 // We can't cast to bool, so this requires a separate method.
506 impl ReadInto for bool {
read(reader: &mut BitReader, bits: u8) -> Result<Self>507     fn read(reader: &mut BitReader, bits: u8) -> Result<Self> {
508         match reader.read_u8(bits)? {
509             0 => Ok(false),
510             _ => Ok(true),
511         }
512     }
513 }
514