1 use super::*; 2 3 use ciborium_io::Read; 4 5 use core::marker::PhantomData; 6 7 /// A parser for incoming segments 8 pub trait Parser: Default { 9 /// The type of item that is parsed 10 type Item: ?Sized; 11 12 /// The parsing error that may occur 13 type Error; 14 15 /// The main parsing function 16 /// 17 /// This function processes the incoming bytes and returns the item. 18 /// 19 /// One important detail that **MUST NOT** be overlooked is that the 20 /// parser may save data from a previous parsing attempt. The number of 21 /// bytes saved is indicated by the `Parser::saved()` function. The saved 22 /// bytes will be copied into the beginning of the `bytes` array before 23 /// processing. Therefore, two requirements should be met. 24 /// 25 /// First, the incoming byte slice should be larger than the saved bytes. 26 /// 27 /// Second, the incoming byte slice should contain new bytes only after 28 /// the saved byte prefix. 29 /// 30 /// If both criteria are met, this allows the parser to prepend its saved 31 /// bytes without any additional allocation. parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>32 fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>; 33 34 /// Indicates the number of saved bytes in the parser saved(&self) -> usize35 fn saved(&self) -> usize { 36 0 37 } 38 } 39 40 /// A bytes parser 41 /// 42 /// No actual processing is performed and the input bytes are directly 43 /// returned. This implies that this parser never saves any bytes internally. 44 #[derive(Default)] 45 pub struct Bytes(()); 46 47 impl Parser for Bytes { 48 type Item = [u8]; 49 type Error = core::convert::Infallible; 50 parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error>51 fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> { 52 Ok(bytes) 53 } 54 } 55 56 /// A text parser 57 /// 58 /// This parser converts the input bytes to a `str`. This parser preserves 59 /// trailing invalid UTF-8 sequences in the case that chunking fell in the 60 /// middle of a valid UTF-8 character. 61 #[derive(Default)] 62 pub struct Text { 63 stored: usize, 64 buffer: [u8; 3], 65 } 66 67 impl Parser for Text { 68 type Item = str; 69 type Error = core::str::Utf8Error; 70 parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error>71 fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> { 72 // If we cannot advance, return nothing. 73 if bytes.len() <= self.stored { 74 return Ok(""); 75 } 76 77 // Copy previously invalid data into place. 78 bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]); 79 80 Ok(match core::str::from_utf8(bytes) { 81 Ok(s) => s, 82 Err(e) => { 83 let valid_len = e.valid_up_to(); 84 let invalid_len = bytes.len() - valid_len; 85 86 // If the size of the invalid UTF-8 is large enough to hold 87 // all valid UTF-8 characters, we have a syntax error. 88 if invalid_len > self.buffer.len() { 89 return Err(e); 90 } 91 92 // Otherwise, store the invalid bytes for the next read cycle. 93 self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]); 94 self.stored = invalid_len; 95 96 // Decode the valid part of the string. 97 core::str::from_utf8(&bytes[..valid_len]).unwrap() 98 } 99 }) 100 } 101 saved(&self) -> usize102 fn saved(&self) -> usize { 103 self.stored 104 } 105 } 106 107 /// A CBOR segment 108 /// 109 /// This type represents a single bytes or text segment on the wire. It can be 110 /// read out in parsed chunks based on the size of the input scratch buffer. 111 pub struct Segment<'r, R: Read, P: Parser> { 112 reader: &'r mut Decoder<R>, 113 unread: usize, 114 offset: usize, 115 parser: P, 116 } 117 118 impl<'r, R: Read, P: Parser> Segment<'r, R, P> { 119 /// Gets the number of unprocessed bytes 120 #[inline] left(&self) -> usize121 pub fn left(&self) -> usize { 122 self.unread + self.parser.saved() 123 } 124 125 /// Gets the next parsed chunk within the segment 126 /// 127 /// Returns `Ok(None)` when all chunks have been read. 128 #[inline] pull<'a>( &mut self, buffer: &'a mut [u8], ) -> Result<Option<&'a P::Item>, Error<R::Error>>129 pub fn pull<'a>( 130 &mut self, 131 buffer: &'a mut [u8], 132 ) -> Result<Option<&'a P::Item>, Error<R::Error>> { 133 use core::cmp::min; 134 135 let prev = self.parser.saved(); 136 match self.unread { 137 0 if prev == 0 => return Ok(None), 138 0 => return Err(Error::Syntax(self.offset)), 139 _ => (), 140 } 141 142 // Determine how many bytes to read. 143 let size = min(buffer.len(), prev + self.unread); 144 let full = &mut buffer[..size]; 145 let next = &mut full[min(size, prev)..]; 146 147 // Read additional bytes. 148 self.reader.read_exact(next)?; 149 self.unread -= next.len(); 150 151 self.parser 152 .parse(full) 153 .or(Err(Error::Syntax(self.offset))) 154 .map(Some) 155 } 156 } 157 158 /// A sequence of CBOR segments 159 /// 160 /// CBOR allows for bytes or text items to be segmented. This type represents 161 /// the state of that segmented input stream. 162 pub struct Segments<'r, R: Read, P: Parser> { 163 reader: &'r mut Decoder<R>, 164 finish: bool, 165 nested: usize, 166 parser: PhantomData<P>, 167 unwrap: fn(Header) -> Result<Option<usize>, ()>, 168 } 169 170 impl<'r, R: Read, P: Parser> Segments<'r, R, P> { 171 #[inline] new( decoder: &'r mut Decoder<R>, unwrap: fn(Header) -> Result<Option<usize>, ()>, ) -> Self172 pub(crate) fn new( 173 decoder: &'r mut Decoder<R>, 174 unwrap: fn(Header) -> Result<Option<usize>, ()>, 175 ) -> Self { 176 Self { 177 reader: decoder, 178 finish: false, 179 nested: 0, 180 parser: PhantomData, 181 unwrap, 182 } 183 } 184 185 /// Gets the next segment in the stream 186 /// 187 /// Returns `Ok(None)` at the conclusion of the stream. 188 #[inline] pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>>189 pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> { 190 while !self.finish { 191 let offset = self.reader.offset(); 192 match self.reader.pull()? { 193 Header::Break if self.nested == 1 => return Ok(None), 194 Header::Break if self.nested > 1 => self.nested -= 1, 195 header => match (self.unwrap)(header) { 196 Err(..) => return Err(Error::Syntax(offset)), 197 Ok(None) => self.nested += 1, 198 Ok(Some(len)) => { 199 self.finish = self.nested == 0; 200 return Ok(Some(Segment { 201 reader: self.reader, 202 unread: len, 203 offset, 204 parser: P::default(), 205 })); 206 } 207 }, 208 } 209 } 210 211 Ok(None) 212 } 213 } 214