• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /// A type that provides a human readable debug impl for arbitrary bytes.
2 ///
3 /// This generally works best when the bytes are presumed to be mostly UTF-8,
4 /// but will work for anything.
5 ///
6 /// N.B. This is copied nearly verbatim from regex-automata. Sigh.
7 pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);
8 
9 impl<'a> core::fmt::Debug for Bytes<'a> {
fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result10     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
11         write!(f, "\"")?;
12         // This is a sad re-implementation of a similar impl found in bstr.
13         let mut bytes = self.0;
14         while let Some(result) = utf8_decode(bytes) {
15             let ch = match result {
16                 Ok(ch) => ch,
17                 Err(byte) => {
18                     write!(f, r"\x{:02x}", byte)?;
19                     bytes = &bytes[1..];
20                     continue;
21                 }
22             };
23             bytes = &bytes[ch.len_utf8()..];
24             match ch {
25                 '\0' => write!(f, "\\0")?,
26                 // ASCII control characters except \0, \n, \r, \t
27                 '\x01'..='\x08'
28                 | '\x0b'
29                 | '\x0c'
30                 | '\x0e'..='\x19'
31                 | '\x7f' => {
32                     write!(f, "\\x{:02x}", u32::from(ch))?;
33                 }
34                 '\n' | '\r' | '\t' | _ => {
35                     write!(f, "{}", ch.escape_debug())?;
36                 }
37             }
38         }
39         write!(f, "\"")?;
40         Ok(())
41     }
42 }
43 
44 /// Decodes the next UTF-8 encoded codepoint from the given byte slice.
45 ///
46 /// If no valid encoding of a codepoint exists at the beginning of the given
47 /// byte slice, then the first byte is returned instead.
48 ///
49 /// This returns `None` if and only if `bytes` is empty.
utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>>50 pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
51     fn len(byte: u8) -> Option<usize> {
52         if byte <= 0x7F {
53             return Some(1);
54         } else if byte & 0b1100_0000 == 0b1000_0000 {
55             return None;
56         } else if byte <= 0b1101_1111 {
57             Some(2)
58         } else if byte <= 0b1110_1111 {
59             Some(3)
60         } else if byte <= 0b1111_0111 {
61             Some(4)
62         } else {
63             None
64         }
65     }
66 
67     if bytes.is_empty() {
68         return None;
69     }
70     let len = match len(bytes[0]) {
71         None => return Some(Err(bytes[0])),
72         Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
73         Some(1) => return Some(Ok(char::from(bytes[0]))),
74         Some(len) => len,
75     };
76     match core::str::from_utf8(&bytes[..len]) {
77         Ok(s) => Some(Ok(s.chars().next().unwrap())),
78         Err(_) => Some(Err(bytes[0])),
79     }
80 }
81