• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use super::*;
2 
3 use ciborium_io::Read;
4 
5 use core::marker::PhantomData;
6 
7 /// A parser for incoming segments
8 pub trait Parser: Default {
9     /// The type of item that is parsed
10     type Item: ?Sized;
11 
12     /// The parsing error that may occur
13     type Error;
14 
15     /// The main parsing function
16     ///
17     /// This function processes the incoming bytes and returns the item.
18     ///
19     /// One important detail that **MUST NOT** be overlooked is that the
20     /// parser may save data from a previous parsing attempt. The number of
21     /// bytes saved is indicated by the `Parser::saved()` function. The saved
22     /// bytes will be copied into the beginning of the `bytes` array before
23     /// processing. Therefore, two requirements should be met.
24     ///
25     /// First, the incoming byte slice should be larger than the saved bytes.
26     ///
27     /// Second, the incoming byte slice should contain new bytes only after
28     /// the saved byte prefix.
29     ///
30     /// If both criteria are met, this allows the parser to prepend its saved
31     /// bytes without any additional allocation.
parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>32     fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>;
33 
34     /// Indicates the number of saved bytes in the parser
saved(&self) -> usize35     fn saved(&self) -> usize {
36         0
37     }
38 }
39 
40 /// A bytes parser
41 ///
42 /// No actual processing is performed and the input bytes are directly
43 /// returned. This implies that this parser never saves any bytes internally.
44 #[derive(Default)]
45 pub struct Bytes(());
46 
47 impl Parser for Bytes {
48     type Item = [u8];
49     type Error = core::convert::Infallible;
50 
parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error>51     fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> {
52         Ok(bytes)
53     }
54 }
55 
56 /// A text parser
57 ///
58 /// This parser converts the input bytes to a `str`. This parser preserves
59 /// trailing invalid UTF-8 sequences in the case that chunking fell in the
60 /// middle of a valid UTF-8 character.
61 #[derive(Default)]
62 pub struct Text {
63     stored: usize,
64     buffer: [u8; 3],
65 }
66 
67 impl Parser for Text {
68     type Item = str;
69     type Error = core::str::Utf8Error;
70 
parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error>71     fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> {
72         // If we cannot advance, return nothing.
73         if bytes.len() <= self.stored {
74             return Ok("");
75         }
76 
77         // Copy previously invalid data into place.
78         bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]);
79 
80         Ok(match core::str::from_utf8(bytes) {
81             Ok(s) => s,
82             Err(e) => {
83                 let valid_len = e.valid_up_to();
84                 let invalid_len = bytes.len() - valid_len;
85 
86                 // If the size of the invalid UTF-8 is large enough to hold
87                 // all valid UTF-8 characters, we have a syntax error.
88                 if invalid_len > self.buffer.len() {
89                     return Err(e);
90                 }
91 
92                 // Otherwise, store the invalid bytes for the next read cycle.
93                 self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]);
94                 self.stored = invalid_len;
95 
96                 // Decode the valid part of the string.
97                 core::str::from_utf8(&bytes[..valid_len]).unwrap()
98             }
99         })
100     }
101 
saved(&self) -> usize102     fn saved(&self) -> usize {
103         self.stored
104     }
105 }
106 
107 /// A CBOR segment
108 ///
109 /// This type represents a single bytes or text segment on the wire. It can be
110 /// read out in parsed chunks based on the size of the input scratch buffer.
111 pub struct Segment<'r, R: Read, P: Parser> {
112     reader: &'r mut Decoder<R>,
113     unread: usize,
114     offset: usize,
115     parser: P,
116 }
117 
118 impl<'r, R: Read, P: Parser> Segment<'r, R, P> {
119     /// Gets the number of unprocessed bytes
120     #[inline]
left(&self) -> usize121     pub fn left(&self) -> usize {
122         self.unread + self.parser.saved()
123     }
124 
125     /// Gets the next parsed chunk within the segment
126     ///
127     /// Returns `Ok(None)` when all chunks have been read.
128     #[inline]
pull<'a>( &mut self, buffer: &'a mut [u8], ) -> Result<Option<&'a P::Item>, Error<R::Error>>129     pub fn pull<'a>(
130         &mut self,
131         buffer: &'a mut [u8],
132     ) -> Result<Option<&'a P::Item>, Error<R::Error>> {
133         use core::cmp::min;
134 
135         let prev = self.parser.saved();
136         match self.unread {
137             0 if prev == 0 => return Ok(None),
138             0 => return Err(Error::Syntax(self.offset)),
139             _ => (),
140         }
141 
142         // Determine how many bytes to read.
143         let size = min(buffer.len(), prev + self.unread);
144         let full = &mut buffer[..size];
145         let next = &mut full[min(size, prev)..];
146 
147         // Read additional bytes.
148         self.reader.read_exact(next)?;
149         self.unread -= next.len();
150 
151         self.parser
152             .parse(full)
153             .or(Err(Error::Syntax(self.offset)))
154             .map(Some)
155     }
156 }
157 
158 /// A sequence of CBOR segments
159 ///
160 /// CBOR allows for bytes or text items to be segmented. This type represents
161 /// the state of that segmented input stream.
162 pub struct Segments<'r, R: Read, P: Parser> {
163     reader: &'r mut Decoder<R>,
164     finish: bool,
165     nested: usize,
166     parser: PhantomData<P>,
167     unwrap: fn(Header) -> Result<Option<usize>, ()>,
168 }
169 
170 impl<'r, R: Read, P: Parser> Segments<'r, R, P> {
171     #[inline]
new( decoder: &'r mut Decoder<R>, unwrap: fn(Header) -> Result<Option<usize>, ()>, ) -> Self172     pub(crate) fn new(
173         decoder: &'r mut Decoder<R>,
174         unwrap: fn(Header) -> Result<Option<usize>, ()>,
175     ) -> Self {
176         Self {
177             reader: decoder,
178             finish: false,
179             nested: 0,
180             parser: PhantomData,
181             unwrap,
182         }
183     }
184 
185     /// Gets the next segment in the stream
186     ///
187     /// Returns `Ok(None)` at the conclusion of the stream.
188     #[inline]
pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>>189     pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> {
190         while !self.finish {
191             let offset = self.reader.offset();
192             match self.reader.pull()? {
193                 Header::Break if self.nested == 1 => return Ok(None),
194                 Header::Break if self.nested > 1 => self.nested -= 1,
195                 header => match (self.unwrap)(header) {
196                     Err(..) => return Err(Error::Syntax(offset)),
197                     Ok(None) => self.nested += 1,
198                     Ok(Some(len)) => {
199                         self.finish = self.nested == 0;
200                         return Ok(Some(Segment {
201                             reader: self.reader,
202                             unread: len,
203                             offset,
204                             parser: P::default(),
205                         }));
206                     }
207                 },
208             }
209         }
210 
211         Ok(None)
212     }
213 }
214