• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::str;
2 
3 use protobuf_support::lexer::int;
4 use protobuf_support::lexer::loc::Loc;
5 use protobuf_support::lexer::parser_language::ParserLanguage;
6 use protobuf_support::lexer::str_lit::StrLitDecodeError;
7 use protobuf_support::lexer::tokenizer::Tokenizer;
8 use protobuf_support::lexer::tokenizer::TokenizerError;
9 
10 use crate::message_dyn::MessageDyn;
11 use crate::message_full::MessageFull;
12 use crate::reflect::EnumDescriptor;
13 use crate::reflect::EnumValueDescriptor;
14 use crate::reflect::MessageDescriptor;
15 use crate::reflect::ReflectValueBox;
16 use crate::reflect::RuntimeFieldType;
17 use crate::reflect::RuntimeType;
18 
19 #[derive(Debug, thiserror::Error)]
20 pub enum ParseErrorWithoutLoc {
21     #[error(transparent)]
22     TokenizerError(#[from] TokenizerError),
23     #[error(transparent)]
24     StrLitDecodeError(#[from] StrLitDecodeError),
25     #[error("Unknown field: `{}`", .0)]
26     UnknownField(String),
27     #[error("Unknown enum value: `{}`", .0)]
28     UnknownEnumValue(String),
29     #[error("Map field specified more than once: `{}`", .0)]
30     MapFieldIsSpecifiedMoreThanOnce(String),
31     #[error("Integer overflow")]
32     IntegerOverflow,
33     #[error("Expecting bool")]
34     ExpectingBool,
35     #[error("Message not initialized")]
36     MessageNotInitialized,
37 }
38 
39 impl From<int::Overflow> for ParseErrorWithoutLoc {
from(_: int::Overflow) -> Self40     fn from(_: int::Overflow) -> Self {
41         ParseErrorWithoutLoc::IntegerOverflow
42     }
43 }
44 
45 /// Text format parse error.
46 #[derive(Debug, thiserror::Error)]
47 #[error("{}: {}", loc, error)]
48 pub struct ParseError {
49     error: ParseErrorWithoutLoc,
50     loc: Loc,
51 }
52 
53 pub type ParseResult<A> = Result<A, ParseErrorWithoutLoc>;
54 pub type ParseWithLocResult<A> = Result<A, ParseError>;
55 
56 #[derive(Clone)]
57 struct Parser<'a> {
58     tokenizer: Tokenizer<'a>,
59 }
60 
61 impl<'a> Parser<'a> {
62     // Text format
63 
next_field_name(&mut self) -> ParseResult<String>64     fn next_field_name(&mut self) -> ParseResult<String> {
65         Ok(self.tokenizer.next_ident()?)
66     }
67 
read_colon(&mut self, desc: &'static str) -> ParseResult<()>68     fn read_colon(&mut self, desc: &'static str) -> ParseResult<()> {
69         Ok(self.tokenizer.next_symbol_expect_eq(':', desc)?)
70     }
71 
read_enum<'e>(&mut self, e: &'e EnumDescriptor) -> ParseResult<EnumValueDescriptor>72     fn read_enum<'e>(&mut self, e: &'e EnumDescriptor) -> ParseResult<EnumValueDescriptor> {
73         self.read_colon("enum")?;
74 
75         // TODO: read integer?
76         let ident = self.tokenizer.next_ident()?;
77         let value = match e.value_by_name(&ident) {
78             Some(value) => value,
79             None => return Err(ParseErrorWithoutLoc::UnknownEnumValue(ident)),
80         };
81         Ok(value)
82     }
83 
read_u64(&mut self) -> ParseResult<u64>84     fn read_u64(&mut self) -> ParseResult<u64> {
85         self.read_colon("u64")?;
86 
87         Ok(self.tokenizer.next_int_lit()?)
88     }
89 
read_u32(&mut self) -> ParseResult<u32>90     fn read_u32(&mut self) -> ParseResult<u32> {
91         self.read_colon("int value")?;
92 
93         let int_lit = self.tokenizer.next_int_lit()?;
94         let value_u32 = int_lit as u32;
95         if value_u32 as u64 != int_lit {
96             return Err(ParseErrorWithoutLoc::IntegerOverflow);
97         }
98         Ok(value_u32)
99     }
100 
read_i64(&mut self) -> ParseResult<i64>101     fn read_i64(&mut self) -> ParseResult<i64> {
102         self.read_colon("int value")?;
103 
104         if self.tokenizer.next_symbol_if_eq('-')? {
105             let int_lit = self.tokenizer.next_int_lit()?;
106             Ok(int::neg(int_lit)?)
107         } else {
108             let int_lit = self.tokenizer.next_int_lit()?;
109             if int_lit > i64::MAX as u64 {
110                 return Err(ParseErrorWithoutLoc::IntegerOverflow);
111             }
112             Ok(int_lit as i64)
113         }
114     }
115 
read_i32(&mut self) -> ParseResult<i32>116     fn read_i32(&mut self) -> ParseResult<i32> {
117         let value = self.read_i64()?;
118         if value < i32::min_value() as i64 || value > i32::max_value() as i64 {
119             return Err(ParseErrorWithoutLoc::IntegerOverflow);
120         }
121         Ok(value as i32)
122     }
123 
read_f64(&mut self) -> ParseResult<f64>124     fn read_f64(&mut self) -> ParseResult<f64> {
125         self.read_colon("float value")?;
126 
127         let minus = self.tokenizer.next_symbol_if_eq('-')?;
128 
129         let value = if let Ok(value) = self.tokenizer.next_int_lit() {
130             value as f64
131         } else {
132             self.tokenizer.next_float_lit()?
133         };
134 
135         Ok(if minus { -value } else { value })
136     }
137 
read_f32(&mut self) -> ParseResult<f32>138     fn read_f32(&mut self) -> ParseResult<f32> {
139         Ok(self.read_f64()? as f32)
140     }
141 
read_bool(&mut self) -> ParseResult<bool>142     fn read_bool(&mut self) -> ParseResult<bool> {
143         self.read_colon("bool value")?;
144 
145         if self.tokenizer.next_ident_if_eq("true")? {
146             Ok(true)
147         } else if self.tokenizer.next_ident_if_eq("false")? {
148             Ok(false)
149         } else {
150             Err(ParseErrorWithoutLoc::ExpectingBool)
151         }
152     }
153 
read_string(&mut self) -> ParseResult<String>154     fn read_string(&mut self) -> ParseResult<String> {
155         self.read_colon("string value")?;
156 
157         Ok(self
158             .tokenizer
159             .next_str_lit()
160             .and_then(|s| s.decode_utf8().map_err(From::from))?)
161     }
162 
read_bytes(&mut self) -> ParseResult<Vec<u8>>163     fn read_bytes(&mut self) -> ParseResult<Vec<u8>> {
164         self.read_colon("bytes value")?;
165 
166         Ok(self
167             .tokenizer
168             .next_str_lit()
169             .and_then(|s| s.decode_bytes().map_err(From::from))?)
170     }
171 
read_message(&mut self, descriptor: &MessageDescriptor) -> ParseResult<Box<dyn MessageDyn>>172     fn read_message(&mut self, descriptor: &MessageDescriptor) -> ParseResult<Box<dyn MessageDyn>> {
173         let mut message = descriptor.new_instance();
174 
175         let symbol = self.tokenizer.next_symbol_expect_eq_oneof(&['{', '<'])?;
176         let terminator = if symbol == '{' { '}' } else { '>' };
177         while !self.tokenizer.lookahead_is_symbol(terminator)? {
178             self.merge_field(&mut *message, descriptor)?;
179         }
180         self.tokenizer
181             .next_symbol_expect_eq(terminator, "message")?;
182         Ok(message)
183     }
184 
read_map_entry( &mut self, k: &RuntimeType, v: &RuntimeType, ) -> ParseResult<(ReflectValueBox, ReflectValueBox)>185     fn read_map_entry(
186         &mut self,
187         k: &RuntimeType,
188         v: &RuntimeType,
189     ) -> ParseResult<(ReflectValueBox, ReflectValueBox)> {
190         let key_field_name: &str = "key";
191         let value_field_name: &str = "value";
192 
193         let mut key = None;
194         let mut value = None;
195         self.tokenizer.next_symbol_expect_eq('{', "map entry")?;
196         while !self.tokenizer.lookahead_is_symbol('}')? {
197             let ident = self.next_field_name()?;
198             let (field, field_type) = if ident == key_field_name {
199                 (&mut key, k)
200             } else if ident == value_field_name {
201                 (&mut value, v)
202             } else {
203                 return Err(ParseErrorWithoutLoc::UnknownField(ident));
204             };
205 
206             if let Some(..) = *field {
207                 return Err(ParseErrorWithoutLoc::MapFieldIsSpecifiedMoreThanOnce(ident));
208             }
209 
210             let field_value = self.read_value_of_type(field_type)?;
211 
212             *field = Some(field_value);
213         }
214         self.tokenizer.next_symbol_expect_eq('}', "map entry")?;
215         let key = match key {
216             Some(key) => key,
217             None => k.default_value_ref().to_box(),
218         };
219         let value = match value {
220             Some(value) => value,
221             None => v.default_value_ref().to_box(),
222         };
223         Ok((key, value))
224     }
225 
read_value_of_type(&mut self, t: &RuntimeType) -> ParseResult<ReflectValueBox>226     fn read_value_of_type(&mut self, t: &RuntimeType) -> ParseResult<ReflectValueBox> {
227         Ok(match t {
228             RuntimeType::Enum(d) => {
229                 let value = self.read_enum(&d)?.value();
230                 ReflectValueBox::Enum(d.clone(), value)
231             }
232             RuntimeType::U32 => ReflectValueBox::U32(self.read_u32()?),
233             RuntimeType::U64 => ReflectValueBox::U64(self.read_u64()?),
234             RuntimeType::I32 => ReflectValueBox::I32(self.read_i32()?),
235             RuntimeType::I64 => ReflectValueBox::I64(self.read_i64()?),
236             RuntimeType::F32 => ReflectValueBox::F32(self.read_f32()?),
237             RuntimeType::F64 => ReflectValueBox::F64(self.read_f64()?),
238             RuntimeType::Bool => ReflectValueBox::Bool(self.read_bool()?),
239             RuntimeType::String => ReflectValueBox::String(self.read_string()?),
240             RuntimeType::VecU8 => ReflectValueBox::Bytes(self.read_bytes()?),
241             RuntimeType::Message(m) => ReflectValueBox::Message(self.read_message(&m)?),
242         })
243     }
244 
merge_field( &mut self, message: &mut dyn MessageDyn, descriptor: &MessageDescriptor, ) -> ParseResult<()>245     fn merge_field(
246         &mut self,
247         message: &mut dyn MessageDyn,
248         descriptor: &MessageDescriptor,
249     ) -> ParseResult<()> {
250         let field_name = self.next_field_name()?;
251 
252         let field = match descriptor.field_by_name(&field_name) {
253             Some(field) => field,
254             None => {
255                 // TODO: shouldn't unknown fields be quietly skipped?
256                 return Err(ParseErrorWithoutLoc::UnknownField(field_name));
257             }
258         };
259 
260         match field.runtime_field_type() {
261             RuntimeFieldType::Singular(t) => {
262                 let value = self.read_value_of_type(&t)?;
263                 field.set_singular_field(message, value);
264             }
265             RuntimeFieldType::Repeated(t) => {
266                 let value = self.read_value_of_type(&t)?;
267                 field.mut_repeated(message).push(value);
268             }
269             RuntimeFieldType::Map(k, v) => {
270                 let (k, v) = self.read_map_entry(&k, &v)?;
271                 field.mut_map(message).insert(k, v);
272             }
273         };
274 
275         Ok(())
276     }
277 
merge_inner(&mut self, message: &mut dyn MessageDyn) -> ParseResult<()>278     fn merge_inner(&mut self, message: &mut dyn MessageDyn) -> ParseResult<()> {
279         loop {
280             if self.tokenizer.syntax_eof()? {
281                 break;
282             }
283             let descriptor = message.descriptor_dyn();
284             self.merge_field(message, &descriptor)?;
285         }
286         Ok(())
287     }
288 
merge(&mut self, message: &mut dyn MessageDyn) -> ParseWithLocResult<()>289     fn merge(&mut self, message: &mut dyn MessageDyn) -> ParseWithLocResult<()> {
290         match self.merge_inner(message) {
291             Ok(()) => Ok(()),
292             Err(error) => Err(ParseError {
293                 error,
294                 loc: self.tokenizer.loc(),
295             }),
296         }
297     }
298 }
299 
300 /// Parse text format message.
301 ///
302 /// This function does not check if message required fields are set.
merge_from_str(message: &mut dyn MessageDyn, input: &str) -> ParseWithLocResult<()>303 pub fn merge_from_str(message: &mut dyn MessageDyn, input: &str) -> ParseWithLocResult<()> {
304     let mut parser = Parser {
305         tokenizer: Tokenizer::new(input, ParserLanguage::TextFormat),
306     };
307     parser.merge(message)
308 }
309 
310 /// Parse text format message.
parse_from_str<M: MessageFull>(input: &str) -> ParseWithLocResult<M>311 pub fn parse_from_str<M: MessageFull>(input: &str) -> ParseWithLocResult<M> {
312     let mut m = M::new();
313     merge_from_str(&mut m, input)?;
314     if let Err(_) = m.check_initialized() {
315         return Err(ParseError {
316             error: ParseErrorWithoutLoc::MessageNotInitialized,
317             loc: Loc::start(),
318         });
319     }
320     Ok(m)
321 }
322