• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::str;
2 
3 use protobuf_support::lexer::int;
4 use protobuf_support::lexer::lexer_impl::LexerError;
5 use protobuf_support::lexer::num_lit::NumLit;
6 use protobuf_support::lexer::parser_language::ParserLanguage;
7 use protobuf_support::lexer::str_lit::StrLitDecodeError;
8 use protobuf_support::lexer::token::Token;
9 use protobuf_support::lexer::tokenizer::Tokenizer;
10 use protobuf_support::lexer::tokenizer::TokenizerError;
11 
12 use crate::model::AnyTypeUrl;
13 use crate::model::ProtobufConstantMessageFieldName;
14 use crate::proto_path::ProtoPathBuf;
15 use crate::protobuf_abs_path::ProtobufAbsPath;
16 use crate::protobuf_ident::ProtobufIdent;
17 use crate::protobuf_path::ProtobufPath;
18 use crate::protobuf_rel_path::ProtobufRelPath;
19 use crate::pure::model;
20 use crate::pure::model::EnumValue;
21 use crate::pure::model::Enumeration;
22 use crate::pure::model::Extension;
23 use crate::pure::model::Field;
24 use crate::pure::model::FieldNumberRange;
25 use crate::pure::model::FieldOrOneOf;
26 use crate::pure::model::FieldType;
27 use crate::pure::model::FileDescriptor;
28 use crate::pure::model::Group;
29 use crate::pure::model::ImportVis;
30 use crate::pure::model::Message;
31 use crate::pure::model::Method;
32 use crate::pure::model::OneOf;
33 use crate::pure::model::ProtobufConstant;
34 use crate::pure::model::ProtobufConstantMessage;
35 use crate::pure::model::ProtobufOption;
36 use crate::pure::model::ProtobufOptionName;
37 use crate::pure::model::ProtobufOptionNameExt;
38 use crate::pure::model::ProtobufOptionNamePart;
39 use crate::pure::model::Rule;
40 use crate::pure::model::Service;
41 use crate::pure::model::Syntax;
42 use crate::pure::model::WithLoc;
43 
44 /// Basic information about parsing error.
45 #[derive(Debug, thiserror::Error)]
46 pub(crate) enum ParserError {
47     #[error("{0}")]
48     TokenizerError(#[source] TokenizerError),
49     // TODO
50     #[error("incorrect input")]
51     IncorrectInput,
52     #[error("not UTF-8")]
53     NotUtf8,
54     #[error("expecting a constant")]
55     ExpectConstant,
56     #[error("unknown syntax")]
57     UnknownSyntax,
58     #[error("integer overflow")]
59     IntegerOverflow,
60     #[error("label not allowed")]
61     LabelNotAllowed,
62     #[error("label required")]
63     LabelRequired,
64     #[error("group name should start with upper case")]
65     GroupNameShouldStartWithUpperCase,
66     #[error("map field not allowed")]
67     MapFieldNotAllowed,
68     #[error("string literal decode error: {0}")]
69     StrLitDecodeError(#[source] StrLitDecodeError),
70     #[error("lexer error: {0}")]
71     LexerError(#[source] LexerError),
72     #[error("oneof in group")]
73     OneOfInGroup,
74     #[error("oneof in oneof")]
75     OneOfInOneOf,
76     #[error("oneof in extend")]
77     OneOfInExtend,
78 }
79 
80 impl From<TokenizerError> for ParserError {
from(e: TokenizerError) -> Self81     fn from(e: TokenizerError) -> Self {
82         ParserError::TokenizerError(e)
83     }
84 }
85 
86 impl From<StrLitDecodeError> for ParserError {
from(e: StrLitDecodeError) -> Self87     fn from(e: StrLitDecodeError) -> Self {
88         ParserError::StrLitDecodeError(e)
89     }
90 }
91 
92 impl From<LexerError> for ParserError {
from(e: LexerError) -> Self93     fn from(e: LexerError) -> Self {
94         ParserError::LexerError(e)
95     }
96 }
97 
98 impl From<int::Overflow> for ParserError {
from(_: int::Overflow) -> Self99     fn from(_: int::Overflow) -> Self {
100         ParserError::IntegerOverflow
101     }
102 }
103 
104 #[derive(Debug, thiserror::Error)]
105 #[error("at {line}:{col}: {error}")]
106 pub struct ParserErrorWithLocation {
107     #[source]
108     pub error: anyhow::Error,
109     /// 1-based
110     pub line: u32,
111     /// 1-based
112     pub col: u32,
113 }
114 
115 trait ToU8 {
to_u8(&self) -> anyhow::Result<u8>116     fn to_u8(&self) -> anyhow::Result<u8>;
117 }
118 
119 trait ToI32 {
to_i32(&self) -> anyhow::Result<i32>120     fn to_i32(&self) -> anyhow::Result<i32>;
121 }
122 
123 trait ToI64 {
to_i64(&self) -> anyhow::Result<i64>124     fn to_i64(&self) -> anyhow::Result<i64>;
125 }
126 
127 trait ToChar {
to_char(&self) -> anyhow::Result<char>128     fn to_char(&self) -> anyhow::Result<char>;
129 }
130 
131 impl ToI32 for u64 {
to_i32(&self) -> anyhow::Result<i32>132     fn to_i32(&self) -> anyhow::Result<i32> {
133         if *self <= i32::max_value() as u64 {
134             Ok(*self as i32)
135         } else {
136             Err(ParserError::IntegerOverflow.into())
137         }
138     }
139 }
140 
141 impl ToI32 for i64 {
to_i32(&self) -> anyhow::Result<i32>142     fn to_i32(&self) -> anyhow::Result<i32> {
143         if *self <= i32::max_value() as i64 && *self >= i32::min_value() as i64 {
144             Ok(*self as i32)
145         } else {
146             Err(ParserError::IntegerOverflow.into())
147         }
148     }
149 }
150 
151 impl ToI64 for u64 {
to_i64(&self) -> anyhow::Result<i64>152     fn to_i64(&self) -> anyhow::Result<i64> {
153         if *self <= i64::max_value() as u64 {
154             Ok(*self as i64)
155         } else {
156             Err(ParserError::IntegerOverflow.into())
157         }
158     }
159 }
160 
161 impl ToChar for u8 {
to_char(&self) -> anyhow::Result<char>162     fn to_char(&self) -> anyhow::Result<char> {
163         if *self <= 0x7f {
164             Ok(*self as char)
165         } else {
166             Err(ParserError::NotUtf8.into())
167         }
168     }
169 }
170 
171 impl ToU8 for u32 {
to_u8(&self) -> anyhow::Result<u8>172     fn to_u8(&self) -> anyhow::Result<u8> {
173         if *self as u8 as u32 == *self {
174             Ok(*self as u8)
175         } else {
176             Err(ParserError::IntegerOverflow.into())
177         }
178     }
179 }
180 
181 #[derive(Clone)]
182 pub(crate) struct Parser<'a> {
183     pub tokenizer: Tokenizer<'a>,
184     syntax: Syntax,
185 }
186 
187 #[derive(Copy, Clone)]
188 enum MessageBodyParseMode {
189     MessageProto2,
190     MessageProto3,
191     Oneof,
192     ExtendProto2,
193     ExtendProto3,
194 }
195 
196 impl MessageBodyParseMode {
label_allowed(&self, label: Rule) -> bool197     fn label_allowed(&self, label: Rule) -> bool {
198         match label {
199             Rule::Repeated => match *self {
200                 MessageBodyParseMode::MessageProto2
201                 | MessageBodyParseMode::MessageProto3
202                 | MessageBodyParseMode::ExtendProto2
203                 | MessageBodyParseMode::ExtendProto3 => true,
204                 MessageBodyParseMode::Oneof => false,
205             },
206             Rule::Optional => match *self {
207                 MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::ExtendProto2 => true,
208                 MessageBodyParseMode::MessageProto3 | MessageBodyParseMode::ExtendProto3 => true,
209                 MessageBodyParseMode::Oneof => false,
210             },
211             Rule::Required => match *self {
212                 MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::ExtendProto2 => true,
213                 MessageBodyParseMode::MessageProto3 | MessageBodyParseMode::ExtendProto3 => false,
214                 MessageBodyParseMode::Oneof => false,
215             },
216         }
217     }
218 
some_label_required(&self) -> bool219     fn some_label_required(&self) -> bool {
220         match *self {
221             MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::ExtendProto2 => true,
222             MessageBodyParseMode::MessageProto3
223             | MessageBodyParseMode::ExtendProto3
224             | MessageBodyParseMode::Oneof => false,
225         }
226     }
227 
map_allowed(&self) -> bool228     fn map_allowed(&self) -> bool {
229         match *self {
230             MessageBodyParseMode::MessageProto2
231             | MessageBodyParseMode::MessageProto3
232             | MessageBodyParseMode::ExtendProto2
233             | MessageBodyParseMode::ExtendProto3 => true,
234             MessageBodyParseMode::Oneof => false,
235         }
236     }
237 
is_most_non_fields_allowed(&self) -> bool238     fn is_most_non_fields_allowed(&self) -> bool {
239         match *self {
240             MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::MessageProto3 => true,
241             MessageBodyParseMode::ExtendProto2
242             | MessageBodyParseMode::ExtendProto3
243             | MessageBodyParseMode::Oneof => false,
244         }
245     }
246 
is_option_allowed(&self) -> bool247     fn is_option_allowed(&self) -> bool {
248         match *self {
249             MessageBodyParseMode::MessageProto2
250             | MessageBodyParseMode::MessageProto3
251             | MessageBodyParseMode::Oneof => true,
252             MessageBodyParseMode::ExtendProto2 | MessageBodyParseMode::ExtendProto3 => false,
253         }
254     }
255 
is_extensions_allowed(&self) -> bool256     fn is_extensions_allowed(&self) -> bool {
257         match self {
258             MessageBodyParseMode::MessageProto2 => true,
259             _ => false,
260         }
261     }
262 }
263 
264 #[derive(Default)]
265 pub(crate) struct MessageBody {
266     pub fields: Vec<WithLoc<FieldOrOneOf>>,
267     pub reserved_nums: Vec<FieldNumberRange>,
268     pub reserved_names: Vec<String>,
269     pub messages: Vec<WithLoc<Message>>,
270     pub enums: Vec<WithLoc<Enumeration>>,
271     pub options: Vec<ProtobufOption>,
272     pub extension_ranges: Vec<FieldNumberRange>,
273     pub extensions: Vec<WithLoc<Extension>>,
274 }
275 
276 trait NumLitEx {
to_option_value(&self, sign_is_plus: bool) -> anyhow::Result<ProtobufConstant>277     fn to_option_value(&self, sign_is_plus: bool) -> anyhow::Result<ProtobufConstant>;
278 }
279 
280 impl NumLitEx for NumLit {
to_option_value(&self, sign_is_plus: bool) -> anyhow::Result<ProtobufConstant>281     fn to_option_value(&self, sign_is_plus: bool) -> anyhow::Result<ProtobufConstant> {
282         Ok(match (*self, sign_is_plus) {
283             (NumLit::U64(u), true) => ProtobufConstant::U64(u),
284             (NumLit::F64(f), true) => ProtobufConstant::F64(f),
285             (NumLit::U64(u), false) => {
286                 ProtobufConstant::I64(int::neg(u).map_err(|_| ParserError::IntegerOverflow)?)
287             }
288             (NumLit::F64(f), false) => ProtobufConstant::F64(-f),
289         })
290     }
291 }
292 
293 impl<'a> Parser<'a> {
new(input: &'a str) -> Parser<'a>294     pub(crate) fn new(input: &'a str) -> Parser<'a> {
295         Parser {
296             tokenizer: Tokenizer::new(input, ParserLanguage::Proto),
297             syntax: Syntax::Proto2,
298         }
299     }
300 
301     // Protobuf grammar
302 
303     // fullIdent = ident { "." ident }
next_full_ident(&mut self) -> anyhow::Result<ProtobufPath>304     fn next_full_ident(&mut self) -> anyhow::Result<ProtobufPath> {
305         let mut full_ident = String::new();
306         // https://github.com/google/protobuf/issues/4563
307         if self.tokenizer.next_symbol_if_eq('.')? {
308             full_ident.push('.');
309         }
310         full_ident.push_str(&self.tokenizer.next_ident()?);
311         while self.tokenizer.next_symbol_if_eq('.')? {
312             full_ident.push('.');
313             full_ident.push_str(&self.tokenizer.next_ident()?);
314         }
315         Ok(ProtobufPath::new(full_ident))
316     }
317 
318     // fullIdent = ident { "." ident }
next_full_ident_rel(&mut self) -> anyhow::Result<ProtobufRelPath>319     fn next_full_ident_rel(&mut self) -> anyhow::Result<ProtobufRelPath> {
320         let mut full_ident = String::new();
321         full_ident.push_str(&self.tokenizer.next_ident()?);
322         while self.tokenizer.next_symbol_if_eq('.')? {
323             full_ident.push('.');
324             full_ident.push_str(&self.tokenizer.next_ident()?);
325         }
326         Ok(ProtobufRelPath::new(full_ident))
327     }
328 
329     // emptyStatement = ";"
next_empty_statement_opt(&mut self) -> anyhow::Result<Option<()>>330     fn next_empty_statement_opt(&mut self) -> anyhow::Result<Option<()>> {
331         if self.tokenizer.next_symbol_if_eq(';')? {
332             Ok(Some(()))
333         } else {
334             Ok(None)
335         }
336     }
337 
338     // messageName = ident
339     // enumName = ident
340     // messageType = [ "." ] { ident "." } messageName
341     // enumType = [ "." ] { ident "." } enumName
next_message_or_enum_type(&mut self) -> anyhow::Result<ProtobufPath>342     fn next_message_or_enum_type(&mut self) -> anyhow::Result<ProtobufPath> {
343         self.next_full_ident()
344     }
345 
346     // groupName = capitalLetter { letter | decimalDigit | "_" }
next_group_name(&mut self) -> anyhow::Result<String>347     fn next_group_name(&mut self) -> anyhow::Result<String> {
348         // lexer cannot distinguish between group name and other ident
349         let mut clone = self.clone();
350         let ident = clone.tokenizer.next_ident()?;
351         if !ident.chars().next().unwrap().is_ascii_uppercase() {
352             return Err(ParserError::GroupNameShouldStartWithUpperCase.into());
353         }
354         *self = clone;
355         Ok(ident)
356     }
357 
358     // Boolean
359 
360     // boolLit = "true" | "false"
next_bool_lit_opt(&mut self) -> anyhow::Result<Option<bool>>361     fn next_bool_lit_opt(&mut self) -> anyhow::Result<Option<bool>> {
362         Ok(if self.tokenizer.next_ident_if_eq("true")? {
363             Some(true)
364         } else if self.tokenizer.next_ident_if_eq("false")? {
365             Some(false)
366         } else {
367             None
368         })
369     }
370 
371     // Constant
372 
next_num_lit(&mut self) -> anyhow::Result<NumLit>373     fn next_num_lit(&mut self) -> anyhow::Result<NumLit> {
374         self.tokenizer
375             .next_token_check_map(|token| Ok(token.to_num_lit()?))
376     }
377 
next_message_constant_field_name( &mut self, ) -> anyhow::Result<ProtobufConstantMessageFieldName>378     fn next_message_constant_field_name(
379         &mut self,
380     ) -> anyhow::Result<ProtobufConstantMessageFieldName> {
381         if self.tokenizer.next_symbol_if_eq('[')? {
382             let n = self.next_full_ident()?;
383             if self.tokenizer.next_symbol_if_eq('/')? {
384                 let prefix = format!("{}", n);
385                 let full_type_name = self.next_full_ident()?;
386                 self.tokenizer
387                     .next_symbol_expect_eq(']', "message constant")?;
388                 Ok(ProtobufConstantMessageFieldName::AnyTypeUrl(AnyTypeUrl {
389                     prefix,
390                     full_type_name,
391                 }))
392             } else {
393                 self.tokenizer
394                     .next_symbol_expect_eq(']', "message constant")?;
395                 Ok(ProtobufConstantMessageFieldName::Extension(n))
396             }
397         } else {
398             let n = self.tokenizer.next_ident()?;
399             Ok(ProtobufConstantMessageFieldName::Regular(n))
400         }
401     }
402 
next_message_constant(&mut self) -> anyhow::Result<ProtobufConstantMessage>403     fn next_message_constant(&mut self) -> anyhow::Result<ProtobufConstantMessage> {
404         let mut r = ProtobufConstantMessage::default();
405         self.tokenizer
406             .next_symbol_expect_eq('{', "message constant")?;
407         while !self.tokenizer.lookahead_is_symbol('}')? {
408             let n = self.next_message_constant_field_name()?;
409             let v = self.next_field_value()?;
410             r.fields.insert(n, v);
411         }
412         self.tokenizer
413             .next_symbol_expect_eq('}', "message constant")?;
414         Ok(r)
415     }
416 
417     // constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) |
418     //            strLit | boolLit
next_constant(&mut self) -> anyhow::Result<ProtobufConstant>419     fn next_constant(&mut self) -> anyhow::Result<ProtobufConstant> {
420         // https://github.com/google/protobuf/blob/a21f225824e994ebd35e8447382ea4e0cd165b3c/src/google/protobuf/unittest_custom_options.proto#L350
421         if self.tokenizer.lookahead_is_symbol('{')? {
422             return Ok(ProtobufConstant::Message(self.next_message_constant()?));
423         }
424 
425         if let Some(b) = self.next_bool_lit_opt()? {
426             return Ok(ProtobufConstant::Bool(b));
427         }
428 
429         if let &Token::Symbol(c) = self.tokenizer.lookahead_some()? {
430             if c == '+' || c == '-' {
431                 self.tokenizer.advance()?;
432                 let sign = c == '+';
433                 return Ok(self.next_num_lit()?.to_option_value(sign)?);
434             }
435         }
436 
437         if let Some(r) = self.tokenizer.next_token_if_map(|token| match token {
438             &Token::StrLit(ref s) => Some(ProtobufConstant::String(s.clone())),
439             _ => None,
440         })? {
441             return Ok(r);
442         }
443 
444         match self.tokenizer.lookahead_some()? {
445             &Token::IntLit(..) | &Token::FloatLit(..) => {
446                 return self.next_num_lit()?.to_option_value(true);
447             }
448             &Token::Ident(..) => {
449                 return Ok(ProtobufConstant::Ident(self.next_full_ident()?));
450             }
451             _ => {}
452         }
453 
454         Err(ParserError::ExpectConstant.into())
455     }
456 
next_field_value(&mut self) -> anyhow::Result<ProtobufConstant>457     fn next_field_value(&mut self) -> anyhow::Result<ProtobufConstant> {
458         if self.tokenizer.next_symbol_if_eq(':')? {
459             // Colon is optional when reading message constant.
460             self.next_constant()
461         } else {
462             Ok(ProtobufConstant::Message(self.next_message_constant()?))
463         }
464     }
465 
next_int_lit(&mut self) -> anyhow::Result<u64>466     fn next_int_lit(&mut self) -> anyhow::Result<u64> {
467         self.tokenizer.next_token_check_map(|token| match token {
468             &Token::IntLit(i) => Ok(i),
469             _ => Err(ParserError::IncorrectInput.into()),
470         })
471     }
472 
473     // Syntax
474 
475     // syntax = "syntax" "=" quote "proto2" quote ";"
476     // syntax = "syntax" "=" quote "proto3" quote ";"
next_syntax(&mut self) -> anyhow::Result<Option<Syntax>>477     fn next_syntax(&mut self) -> anyhow::Result<Option<Syntax>> {
478         if self.tokenizer.next_ident_if_eq("syntax")? {
479             self.tokenizer.next_symbol_expect_eq('=', "syntax")?;
480             let syntax_str = self.tokenizer.next_str_lit()?.decode_utf8()?;
481             let syntax = if syntax_str == "proto2" {
482                 Syntax::Proto2
483             } else if syntax_str == "proto3" {
484                 Syntax::Proto3
485             } else {
486                 return Err(ParserError::UnknownSyntax.into());
487             };
488             self.tokenizer.next_symbol_expect_eq(';', "syntax")?;
489             Ok(Some(syntax))
490         } else {
491             Ok(None)
492         }
493     }
494 
495     // Import Statement
496 
497     // import = "import" [ "weak" | "public" ] strLit ";"
next_import_opt(&mut self) -> anyhow::Result<Option<model::Import>>498     fn next_import_opt(&mut self) -> anyhow::Result<Option<model::Import>> {
499         if self.tokenizer.next_ident_if_eq("import")? {
500             let vis = if self.tokenizer.next_ident_if_eq("weak")? {
501                 ImportVis::Weak
502             } else if self.tokenizer.next_ident_if_eq("public")? {
503                 ImportVis::Public
504             } else {
505                 ImportVis::Default
506             };
507             let path = self.tokenizer.next_str_lit()?.decode_utf8()?;
508             self.tokenizer.next_symbol_expect_eq(';', "import")?;
509             let path = ProtoPathBuf::new(path)?;
510             Ok(Some(model::Import { path, vis }))
511         } else {
512             Ok(None)
513         }
514     }
515 
516     // Package
517 
518     // package = "package" fullIdent ";"
next_package_opt(&mut self) -> anyhow::Result<Option<ProtobufAbsPath>>519     fn next_package_opt(&mut self) -> anyhow::Result<Option<ProtobufAbsPath>> {
520         if self.tokenizer.next_ident_if_eq("package")? {
521             let package = self.next_full_ident_rel()?;
522             self.tokenizer.next_symbol_expect_eq(';', "package")?;
523             Ok(Some(package.into_absolute()))
524         } else {
525             Ok(None)
526         }
527     }
528 
529     // Option
530 
next_ident(&mut self) -> anyhow::Result<ProtobufIdent>531     fn next_ident(&mut self) -> anyhow::Result<ProtobufIdent> {
532         Ok(ProtobufIdent::from(self.tokenizer.next_ident()?))
533     }
534 
next_option_name_component(&mut self) -> anyhow::Result<ProtobufOptionNamePart>535     fn next_option_name_component(&mut self) -> anyhow::Result<ProtobufOptionNamePart> {
536         if self.tokenizer.next_symbol_if_eq('(')? {
537             let comp = self.next_full_ident()?;
538             self.tokenizer
539                 .next_symbol_expect_eq(')', "option name component")?;
540             Ok(ProtobufOptionNamePart::Ext(comp))
541         } else {
542             Ok(ProtobufOptionNamePart::Direct(self.next_ident()?))
543         }
544     }
545 
546     // https://github.com/google/protobuf/issues/4563
547     // optionName = ( ident | "(" fullIdent ")" ) { "." ident }
next_option_name(&mut self) -> anyhow::Result<ProtobufOptionName>548     fn next_option_name(&mut self) -> anyhow::Result<ProtobufOptionName> {
549         let mut components = Vec::new();
550         components.push(self.next_option_name_component()?);
551         while self.tokenizer.next_symbol_if_eq('.')? {
552             components.push(self.next_option_name_component()?);
553         }
554         if components.len() == 1 {
555             if let ProtobufOptionNamePart::Direct(n) = &components[0] {
556                 return Ok(ProtobufOptionName::Builtin(n.clone()));
557             }
558         }
559         Ok(ProtobufOptionName::Ext(ProtobufOptionNameExt(components)))
560     }
561 
562     // option = "option" optionName  "=" constant ";"
next_option_opt(&mut self) -> anyhow::Result<Option<ProtobufOption>>563     fn next_option_opt(&mut self) -> anyhow::Result<Option<ProtobufOption>> {
564         if self.tokenizer.next_ident_if_eq("option")? {
565             let name = self.next_option_name()?;
566             self.tokenizer.next_symbol_expect_eq('=', "option")?;
567             let value = self.next_constant()?;
568             self.tokenizer.next_symbol_expect_eq(';', "option")?;
569             Ok(Some(ProtobufOption { name, value }))
570         } else {
571             Ok(None)
572         }
573     }
574 
575     // Fields
576 
577     // label = "required" | "optional" | "repeated"
next_label(&mut self, mode: MessageBodyParseMode) -> anyhow::Result<Option<Rule>>578     fn next_label(&mut self, mode: MessageBodyParseMode) -> anyhow::Result<Option<Rule>> {
579         for rule in Rule::ALL {
580             let mut clone = self.clone();
581             if clone.tokenizer.next_ident_if_eq(rule.as_str())? {
582                 if !mode.label_allowed(rule) {
583                     return Err(ParserError::LabelNotAllowed.into());
584                 }
585 
586                 *self = clone;
587                 return Ok(Some(rule));
588             }
589         }
590 
591         if mode.some_label_required() {
592             Err(ParserError::LabelRequired.into())
593         } else {
594             Ok(None)
595         }
596     }
597 
next_field_type(&mut self) -> anyhow::Result<FieldType>598     fn next_field_type(&mut self) -> anyhow::Result<FieldType> {
599         let simple = &[
600             ("int32", FieldType::Int32),
601             ("int64", FieldType::Int64),
602             ("uint32", FieldType::Uint32),
603             ("uint64", FieldType::Uint64),
604             ("sint32", FieldType::Sint32),
605             ("sint64", FieldType::Sint64),
606             ("fixed32", FieldType::Fixed32),
607             ("sfixed32", FieldType::Sfixed32),
608             ("fixed64", FieldType::Fixed64),
609             ("sfixed64", FieldType::Sfixed64),
610             ("bool", FieldType::Bool),
611             ("string", FieldType::String),
612             ("bytes", FieldType::Bytes),
613             ("float", FieldType::Float),
614             ("double", FieldType::Double),
615         ];
616         for &(ref n, ref t) in simple {
617             if self.tokenizer.next_ident_if_eq(n)? {
618                 return Ok(t.clone());
619             }
620         }
621 
622         if let Some(t) = self.next_map_field_type_opt()? {
623             return Ok(t);
624         }
625 
626         let message_or_enum = self.next_message_or_enum_type()?;
627         Ok(FieldType::MessageOrEnum(message_or_enum))
628     }
629 
next_field_number(&mut self) -> anyhow::Result<i32>630     fn next_field_number(&mut self) -> anyhow::Result<i32> {
631         // TODO: not all integers are valid field numbers
632         self.tokenizer.next_token_check_map(|token| match token {
633             &Token::IntLit(i) => i.to_i32(),
634             _ => Err(ParserError::IncorrectInput.into()),
635         })
636     }
637 
638     // fieldOption = optionName "=" constant
next_field_option(&mut self) -> anyhow::Result<ProtobufOption>639     fn next_field_option(&mut self) -> anyhow::Result<ProtobufOption> {
640         let name = self.next_option_name()?;
641         self.tokenizer.next_symbol_expect_eq('=', "field option")?;
642         let value = self.next_constant()?;
643         Ok(ProtobufOption { name, value })
644     }
645 
646     // fieldOptions = fieldOption { ","  fieldOption }
next_field_options(&mut self) -> anyhow::Result<Vec<ProtobufOption>>647     fn next_field_options(&mut self) -> anyhow::Result<Vec<ProtobufOption>> {
648         let mut options = Vec::new();
649 
650         options.push(self.next_field_option()?);
651 
652         while self.tokenizer.next_symbol_if_eq(',')? {
653             options.push(self.next_field_option()?);
654         }
655 
656         Ok(options)
657     }
658 
659     // field = label type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
660     // group = label "group" groupName "=" fieldNumber messageBody
next_field(&mut self, mode: MessageBodyParseMode) -> anyhow::Result<WithLoc<Field>>661     fn next_field(&mut self, mode: MessageBodyParseMode) -> anyhow::Result<WithLoc<Field>> {
662         let loc = self.tokenizer.lookahead_loc();
663         let rule = if self.clone().tokenizer.next_ident_if_eq("map")? {
664             if !mode.map_allowed() {
665                 return Err(ParserError::MapFieldNotAllowed.into());
666             }
667             None
668         } else {
669             self.next_label(mode)?
670         };
671         if self.tokenizer.next_ident_if_eq("group")? {
672             let name = self.next_group_name()?.to_owned();
673             self.tokenizer.next_symbol_expect_eq('=', "group")?;
674             let number = self.next_field_number()?;
675 
676             let mode = match self.syntax {
677                 Syntax::Proto2 => MessageBodyParseMode::MessageProto2,
678                 Syntax::Proto3 => MessageBodyParseMode::MessageProto3,
679             };
680 
681             let MessageBody { fields, .. } = self.next_message_body(mode)?;
682 
683             let fields = fields
684                 .into_iter()
685                 .map(|fo| match fo.t {
686                     FieldOrOneOf::Field(f) => Ok(f),
687                     FieldOrOneOf::OneOf(_) => Err(ParserError::OneOfInGroup),
688                 })
689                 .collect::<Result<_, ParserError>>()?;
690 
691             let field = Field {
692                 // The field name is a lowercased version of the type name
693                 // (which has been verified to start with an uppercase letter).
694                 // https://git.io/JvxAP
695                 name: name.to_ascii_lowercase(),
696                 rule,
697                 typ: FieldType::Group(Group { name, fields }),
698                 number,
699                 options: Vec::new(),
700             };
701             Ok(WithLoc { t: field, loc })
702         } else {
703             let typ = self.next_field_type()?;
704             let name = self.tokenizer.next_ident()?.to_owned();
705             self.tokenizer.next_symbol_expect_eq('=', "field")?;
706             let number = self.next_field_number()?;
707 
708             let mut options = Vec::new();
709 
710             if self.tokenizer.next_symbol_if_eq('[')? {
711                 for o in self.next_field_options()? {
712                     options.push(o);
713                 }
714                 self.tokenizer.next_symbol_expect_eq(']', "field")?;
715             }
716             self.tokenizer.next_symbol_expect_eq(';', "field")?;
717             let field = Field {
718                 name,
719                 rule,
720                 typ,
721                 number,
722                 options,
723             };
724             Ok(WithLoc { t: field, loc })
725         }
726     }
727 
728     // oneof = "oneof" oneofName "{" { oneofField | emptyStatement } "}"
729     // oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
next_oneof_opt(&mut self) -> anyhow::Result<Option<OneOf>>730     fn next_oneof_opt(&mut self) -> anyhow::Result<Option<OneOf>> {
731         if self.tokenizer.next_ident_if_eq("oneof")? {
732             let name = self.tokenizer.next_ident()?.to_owned();
733             let MessageBody {
734                 fields, options, ..
735             } = self.next_message_body(MessageBodyParseMode::Oneof)?;
736             let fields = fields
737                 .into_iter()
738                 .map(|fo| match fo.t {
739                     FieldOrOneOf::Field(f) => Ok(f),
740                     FieldOrOneOf::OneOf(_) => Err(ParserError::OneOfInOneOf),
741                 })
742                 .collect::<Result<_, ParserError>>()?;
743             Ok(Some(OneOf {
744                 name,
745                 fields,
746                 options,
747             }))
748         } else {
749             Ok(None)
750         }
751     }
752 
753     // mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";"
754     // keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" |
755     //           "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string"
next_map_field_type_opt(&mut self) -> anyhow::Result<Option<FieldType>>756     fn next_map_field_type_opt(&mut self) -> anyhow::Result<Option<FieldType>> {
757         if self.tokenizer.next_ident_if_eq("map")? {
758             self.tokenizer
759                 .next_symbol_expect_eq('<', "map field type")?;
760             // TODO: restrict key types
761             let key = self.next_field_type()?;
762             self.tokenizer
763                 .next_symbol_expect_eq(',', "map field type")?;
764             let value = self.next_field_type()?;
765             self.tokenizer
766                 .next_symbol_expect_eq('>', "map field type")?;
767             Ok(Some(FieldType::Map(Box::new((key, value)))))
768         } else {
769             Ok(None)
770         }
771     }
772 
773     // Extensions and Reserved
774 
775     // Extensions
776 
777     // range =  intLit [ "to" ( intLit | "max" ) ]
next_range(&mut self) -> anyhow::Result<FieldNumberRange>778     fn next_range(&mut self) -> anyhow::Result<FieldNumberRange> {
779         let from = self.next_field_number()?;
780         let to = if self.tokenizer.next_ident_if_eq("to")? {
781             if self.tokenizer.next_ident_if_eq("max")? {
782                 0x20000000 - 1
783             } else {
784                 self.next_field_number()?
785             }
786         } else {
787             from
788         };
789         Ok(FieldNumberRange { from, to })
790     }
791 
792     // ranges = range { "," range }
next_ranges(&mut self) -> anyhow::Result<Vec<FieldNumberRange>>793     fn next_ranges(&mut self) -> anyhow::Result<Vec<FieldNumberRange>> {
794         let mut ranges = Vec::new();
795         ranges.push(self.next_range()?);
796         while self.tokenizer.next_symbol_if_eq(',')? {
797             ranges.push(self.next_range()?);
798         }
799         Ok(ranges)
800     }
801 
802     // extensions = "extensions" ranges ";"
next_extensions_opt(&mut self) -> anyhow::Result<Option<Vec<FieldNumberRange>>>803     fn next_extensions_opt(&mut self) -> anyhow::Result<Option<Vec<FieldNumberRange>>> {
804         if self.tokenizer.next_ident_if_eq("extensions")? {
805             Ok(Some(self.next_ranges()?))
806         } else {
807             Ok(None)
808         }
809     }
810 
811     // Reserved
812 
813     // Grammar is incorrect: https://github.com/google/protobuf/issues/4558
814     // reserved = "reserved" ( ranges | fieldNames ) ";"
815     // fieldNames = fieldName { "," fieldName }
next_reserved_opt( &mut self, ) -> anyhow::Result<Option<(Vec<FieldNumberRange>, Vec<String>)>>816     fn next_reserved_opt(
817         &mut self,
818     ) -> anyhow::Result<Option<(Vec<FieldNumberRange>, Vec<String>)>> {
819         if self.tokenizer.next_ident_if_eq("reserved")? {
820             let (ranges, names) = if let &Token::StrLit(..) = self.tokenizer.lookahead_some()? {
821                 let mut names = Vec::new();
822                 names.push(self.tokenizer.next_str_lit()?.decode_utf8()?);
823                 while self.tokenizer.next_symbol_if_eq(',')? {
824                     names.push(self.tokenizer.next_str_lit()?.decode_utf8()?);
825                 }
826                 (Vec::new(), names)
827             } else {
828                 (self.next_ranges()?, Vec::new())
829             };
830 
831             self.tokenizer.next_symbol_expect_eq(';', "reserved")?;
832 
833             Ok(Some((ranges, names)))
834         } else {
835             Ok(None)
836         }
837     }
838 
839     // Top Level definitions
840 
841     // Enum definition
842 
843     // enumValueOption = optionName "=" constant
next_enum_value_option(&mut self) -> anyhow::Result<ProtobufOption>844     fn next_enum_value_option(&mut self) -> anyhow::Result<ProtobufOption> {
845         let name = self.next_option_name()?;
846         self.tokenizer
847             .next_symbol_expect_eq('=', "enum value option")?;
848         let value = self.next_constant()?;
849         Ok(ProtobufOption { name, value })
850     }
851 
852     // https://github.com/google/protobuf/issues/4561
next_enum_value(&mut self) -> anyhow::Result<i32>853     fn next_enum_value(&mut self) -> anyhow::Result<i32> {
854         let minus = self.tokenizer.next_symbol_if_eq('-')?;
855         let lit = self.next_int_lit()?;
856         Ok(if minus {
857             let unsigned = lit.to_i64()?;
858             match unsigned.checked_neg() {
859                 Some(neg) => neg.to_i32()?,
860                 None => return Err(ParserError::IntegerOverflow.into()),
861             }
862         } else {
863             lit.to_i32()?
864         })
865     }
866 
867     // enumField = ident "=" intLit [ "[" enumValueOption { ","  enumValueOption } "]" ]";"
next_enum_field(&mut self) -> anyhow::Result<EnumValue>868     fn next_enum_field(&mut self) -> anyhow::Result<EnumValue> {
869         let name = self.tokenizer.next_ident()?.to_owned();
870         self.tokenizer.next_symbol_expect_eq('=', "enum field")?;
871         let number = self.next_enum_value()?;
872         let mut options = Vec::new();
873         if self.tokenizer.next_symbol_if_eq('[')? {
874             options.push(self.next_enum_value_option()?);
875             while self.tokenizer.next_symbol_if_eq(',')? {
876                 options.push(self.next_enum_value_option()?);
877             }
878             self.tokenizer.next_symbol_expect_eq(']', "enum field")?;
879         }
880 
881         Ok(EnumValue {
882             name,
883             number,
884             options,
885         })
886     }
887 
888     // enum = "enum" enumName enumBody
889     // enumBody = "{" { option | enumField | emptyStatement } "}"
next_enum_opt(&mut self) -> anyhow::Result<Option<WithLoc<Enumeration>>>890     fn next_enum_opt(&mut self) -> anyhow::Result<Option<WithLoc<Enumeration>>> {
891         let loc = self.tokenizer.lookahead_loc();
892 
893         if self.tokenizer.next_ident_if_eq("enum")? {
894             let name = self.tokenizer.next_ident()?.to_owned();
895 
896             let mut values = Vec::new();
897             let mut options = Vec::new();
898 
899             self.tokenizer.next_symbol_expect_eq('{', "enum")?;
900             while self.tokenizer.lookahead_if_symbol()? != Some('}') {
901                 // emptyStatement
902                 if self.tokenizer.next_symbol_if_eq(';')? {
903                     continue;
904                 }
905 
906                 if let Some(o) = self.next_option_opt()? {
907                     options.push(o);
908                     continue;
909                 }
910 
911                 values.push(self.next_enum_field()?);
912             }
913             self.tokenizer.next_symbol_expect_eq('}', "enum")?;
914             let enumeration = Enumeration {
915                 name,
916                 values,
917                 options,
918             };
919             Ok(Some(WithLoc {
920                 loc,
921                 t: enumeration,
922             }))
923         } else {
924             Ok(None)
925         }
926     }
927 
928     // Message definition
929 
930     // messageBody = "{" { field | enum | message | extend | extensions | group |
931     //               option | oneof | mapField | reserved | emptyStatement } "}"
next_message_body(&mut self, mode: MessageBodyParseMode) -> anyhow::Result<MessageBody>932     fn next_message_body(&mut self, mode: MessageBodyParseMode) -> anyhow::Result<MessageBody> {
933         self.tokenizer.next_symbol_expect_eq('{', "message body")?;
934 
935         let mut r = MessageBody::default();
936 
937         while self.tokenizer.lookahead_if_symbol()? != Some('}') {
938             let loc = self.tokenizer.lookahead_loc();
939 
940             // emptyStatement
941             if self.tokenizer.next_symbol_if_eq(';')? {
942                 continue;
943             }
944 
945             if mode.is_most_non_fields_allowed() {
946                 if let Some((field_nums, field_names)) = self.next_reserved_opt()? {
947                     r.reserved_nums.extend(field_nums);
948                     r.reserved_names.extend(field_names);
949                     continue;
950                 }
951 
952                 if let Some(oneof) = self.next_oneof_opt()? {
953                     let one_of = FieldOrOneOf::OneOf(oneof);
954                     r.fields.push(WithLoc { t: one_of, loc });
955                     continue;
956                 }
957 
958                 if let Some(extensions) = self.next_extend_opt()? {
959                     r.extensions.extend(extensions);
960                     continue;
961                 }
962 
963                 if let Some(nested_message) = self.next_message_opt()? {
964                     r.messages.push(nested_message);
965                     continue;
966                 }
967 
968                 if let Some(nested_enum) = self.next_enum_opt()? {
969                     r.enums.push(nested_enum);
970                     continue;
971                 }
972             } else {
973                 self.tokenizer.next_ident_if_eq_error("reserved")?;
974                 self.tokenizer.next_ident_if_eq_error("oneof")?;
975                 self.tokenizer.next_ident_if_eq_error("extend")?;
976                 self.tokenizer.next_ident_if_eq_error("message")?;
977                 self.tokenizer.next_ident_if_eq_error("enum")?;
978             }
979 
980             if mode.is_extensions_allowed() {
981                 if let Some(extension_ranges) = self.next_extensions_opt()? {
982                     r.extension_ranges.extend(extension_ranges);
983                     continue;
984                 }
985             } else {
986                 self.tokenizer.next_ident_if_eq_error("extensions")?;
987             }
988 
989             if mode.is_option_allowed() {
990                 if let Some(option) = self.next_option_opt()? {
991                     r.options.push(option);
992                     continue;
993                 }
994             } else {
995                 self.tokenizer.next_ident_if_eq_error("option")?;
996             }
997 
998             let field = FieldOrOneOf::Field(self.next_field(mode)?);
999             r.fields.push(WithLoc { t: field, loc });
1000         }
1001 
1002         self.tokenizer.next_symbol_expect_eq('}', "message body")?;
1003 
1004         Ok(r)
1005     }
1006 
1007     // message = "message" messageName messageBody
next_message_opt(&mut self) -> anyhow::Result<Option<WithLoc<Message>>>1008     fn next_message_opt(&mut self) -> anyhow::Result<Option<WithLoc<Message>>> {
1009         let loc = self.tokenizer.lookahead_loc();
1010 
1011         if self.tokenizer.next_ident_if_eq("message")? {
1012             let name = self.tokenizer.next_ident()?.to_owned();
1013 
1014             let mode = match self.syntax {
1015                 Syntax::Proto2 => MessageBodyParseMode::MessageProto2,
1016                 Syntax::Proto3 => MessageBodyParseMode::MessageProto3,
1017             };
1018 
1019             let MessageBody {
1020                 fields,
1021                 reserved_nums,
1022                 reserved_names,
1023                 messages,
1024                 enums,
1025                 options,
1026                 extensions,
1027                 extension_ranges,
1028             } = self.next_message_body(mode)?;
1029 
1030             let message = Message {
1031                 name,
1032                 fields,
1033                 reserved_nums,
1034                 reserved_names,
1035                 messages,
1036                 enums,
1037                 options,
1038                 extensions,
1039                 extension_ranges,
1040             };
1041             Ok(Some(WithLoc { t: message, loc }))
1042         } else {
1043             Ok(None)
1044         }
1045     }
1046 
1047     // Extend
1048 
1049     // extend = "extend" messageType "{" {field | group | emptyStatement} "}"
next_extend_opt(&mut self) -> anyhow::Result<Option<Vec<WithLoc<Extension>>>>1050     fn next_extend_opt(&mut self) -> anyhow::Result<Option<Vec<WithLoc<Extension>>>> {
1051         let mut clone = self.clone();
1052         if clone.tokenizer.next_ident_if_eq("extend")? {
1053             // According to spec `extend` is only for `proto2`, but it is used in `proto3`
1054             // https://github.com/google/protobuf/issues/4610
1055 
1056             *self = clone;
1057 
1058             let extendee = self.next_message_or_enum_type()?;
1059 
1060             let mode = match self.syntax {
1061                 Syntax::Proto2 => MessageBodyParseMode::ExtendProto2,
1062                 Syntax::Proto3 => MessageBodyParseMode::ExtendProto3,
1063             };
1064 
1065             let MessageBody { fields, .. } = self.next_message_body(mode)?;
1066 
1067             // TODO: is oneof allowed in extend?
1068             let fields: Vec<WithLoc<Field>> = fields
1069                 .into_iter()
1070                 .map(|fo| match fo.t {
1071                     FieldOrOneOf::Field(f) => Ok(f),
1072                     FieldOrOneOf::OneOf(_) => Err(ParserError::OneOfInExtend),
1073                 })
1074                 .collect::<Result<_, ParserError>>()?;
1075 
1076             let extensions = fields
1077                 .into_iter()
1078                 .map(|field| {
1079                     let extendee = extendee.clone();
1080                     let loc = field.loc;
1081                     let extension = Extension { extendee, field };
1082                     WithLoc { t: extension, loc }
1083                 })
1084                 .collect();
1085 
1086             Ok(Some(extensions))
1087         } else {
1088             Ok(None)
1089         }
1090     }
1091 
1092     // Service definition
1093 
next_options_or_colon(&mut self) -> anyhow::Result<Vec<ProtobufOption>>1094     fn next_options_or_colon(&mut self) -> anyhow::Result<Vec<ProtobufOption>> {
1095         let mut options = Vec::new();
1096         if self.tokenizer.next_symbol_if_eq('{')? {
1097             while self.tokenizer.lookahead_if_symbol()? != Some('}') {
1098                 if let Some(option) = self.next_option_opt()? {
1099                     options.push(option);
1100                     continue;
1101                 }
1102 
1103                 if let Some(()) = self.next_empty_statement_opt()? {
1104                     continue;
1105                 }
1106 
1107                 return Err(ParserError::IncorrectInput.into());
1108             }
1109             self.tokenizer.next_symbol_expect_eq('}', "option")?;
1110         } else {
1111             self.tokenizer.next_symbol_expect_eq(';', "option")?;
1112         }
1113 
1114         Ok(options)
1115     }
1116 
1117     // stream = "stream" streamName "(" messageType "," messageType ")"
1118     //        (( "{" { option | emptyStatement } "}") | ";" )
next_stream_opt(&mut self) -> anyhow::Result<Option<Method>>1119     fn next_stream_opt(&mut self) -> anyhow::Result<Option<Method>> {
1120         assert_eq!(Syntax::Proto2, self.syntax);
1121         if self.tokenizer.next_ident_if_eq("stream")? {
1122             let name = self.tokenizer.next_ident()?;
1123             self.tokenizer.next_symbol_expect_eq('(', "stream")?;
1124             let input_type = self.next_message_or_enum_type()?;
1125             self.tokenizer.next_symbol_expect_eq(',', "stream")?;
1126             let output_type = self.next_message_or_enum_type()?;
1127             self.tokenizer.next_symbol_expect_eq(')', "stream")?;
1128             let options = self.next_options_or_colon()?;
1129             Ok(Some(Method {
1130                 name,
1131                 input_type,
1132                 output_type,
1133                 client_streaming: true,
1134                 server_streaming: true,
1135                 options,
1136             }))
1137         } else {
1138             Ok(None)
1139         }
1140     }
1141 
1142     // rpc = "rpc" rpcName "(" [ "stream" ] messageType ")"
1143     //     "returns" "(" [ "stream" ] messageType ")"
1144     //     (( "{" { option | emptyStatement } "}" ) | ";" )
next_rpc_opt(&mut self) -> anyhow::Result<Option<Method>>1145     fn next_rpc_opt(&mut self) -> anyhow::Result<Option<Method>> {
1146         if self.tokenizer.next_ident_if_eq("rpc")? {
1147             let name = self.tokenizer.next_ident()?;
1148             self.tokenizer.next_symbol_expect_eq('(', "rpc")?;
1149             let client_streaming = self.tokenizer.next_ident_if_eq("stream")?;
1150             let input_type = self.next_message_or_enum_type()?;
1151             self.tokenizer.next_symbol_expect_eq(')', "rpc")?;
1152             self.tokenizer.next_ident_expect_eq("returns")?;
1153             self.tokenizer.next_symbol_expect_eq('(', "rpc")?;
1154             let server_streaming = self.tokenizer.next_ident_if_eq("stream")?;
1155             let output_type = self.next_message_or_enum_type()?;
1156             self.tokenizer.next_symbol_expect_eq(')', "rpc")?;
1157             let options = self.next_options_or_colon()?;
1158             Ok(Some(Method {
1159                 name,
1160                 input_type,
1161                 output_type,
1162                 client_streaming,
1163                 server_streaming,
1164                 options,
1165             }))
1166         } else {
1167             Ok(None)
1168         }
1169     }
1170 
1171     // proto2:
1172     // service = "service" serviceName "{" { option | rpc | stream | emptyStatement } "}"
1173     //
1174     // proto3:
1175     // service = "service" serviceName "{" { option | rpc | emptyStatement } "}"
next_service_opt(&mut self) -> anyhow::Result<Option<WithLoc<Service>>>1176     fn next_service_opt(&mut self) -> anyhow::Result<Option<WithLoc<Service>>> {
1177         let loc = self.tokenizer.lookahead_loc();
1178 
1179         if self.tokenizer.next_ident_if_eq("service")? {
1180             let name = self.tokenizer.next_ident()?;
1181             let mut methods = Vec::new();
1182             let mut options = Vec::new();
1183             self.tokenizer.next_symbol_expect_eq('{', "service")?;
1184             while self.tokenizer.lookahead_if_symbol()? != Some('}') {
1185                 if let Some(method) = self.next_rpc_opt()? {
1186                     methods.push(method);
1187                     continue;
1188                 }
1189 
1190                 if self.syntax == Syntax::Proto2 {
1191                     if let Some(method) = self.next_stream_opt()? {
1192                         methods.push(method);
1193                         continue;
1194                     }
1195                 }
1196 
1197                 if let Some(o) = self.next_option_opt()? {
1198                     options.push(o);
1199                     continue;
1200                 }
1201 
1202                 if let Some(()) = self.next_empty_statement_opt()? {
1203                     continue;
1204                 }
1205 
1206                 return Err(ParserError::IncorrectInput.into());
1207             }
1208             self.tokenizer.next_symbol_expect_eq('}', "service")?;
1209             Ok(Some(WithLoc {
1210                 loc,
1211                 t: Service {
1212                     name,
1213                     methods,
1214                     options,
1215                 },
1216             }))
1217         } else {
1218             Ok(None)
1219         }
1220     }
1221 
1222     // Proto file
1223 
1224     // proto = syntax { import | package | option | topLevelDef | emptyStatement }
1225     // topLevelDef = message | enum | extend | service
next_proto(&mut self) -> anyhow::Result<FileDescriptor>1226     pub fn next_proto(&mut self) -> anyhow::Result<FileDescriptor> {
1227         let syntax = self.next_syntax()?.unwrap_or(Syntax::Proto2);
1228         self.syntax = syntax;
1229 
1230         let mut imports = Vec::new();
1231         let mut package = ProtobufAbsPath::root();
1232         let mut messages = Vec::new();
1233         let mut enums = Vec::new();
1234         let mut extensions = Vec::new();
1235         let mut options = Vec::new();
1236         let mut services = Vec::new();
1237 
1238         while !self.tokenizer.syntax_eof()? {
1239             if let Some(import) = self.next_import_opt()? {
1240                 imports.push(import);
1241                 continue;
1242             }
1243 
1244             if let Some(next_package) = self.next_package_opt()? {
1245                 package = next_package;
1246                 continue;
1247             }
1248 
1249             if let Some(option) = self.next_option_opt()? {
1250                 options.push(option);
1251                 continue;
1252             }
1253 
1254             if let Some(message) = self.next_message_opt()? {
1255                 messages.push(message);
1256                 continue;
1257             }
1258 
1259             if let Some(enumeration) = self.next_enum_opt()? {
1260                 enums.push(enumeration);
1261                 continue;
1262             }
1263 
1264             if let Some(more_extensions) = self.next_extend_opt()? {
1265                 extensions.extend(more_extensions);
1266                 continue;
1267             }
1268 
1269             if let Some(service) = self.next_service_opt()? {
1270                 services.push(service);
1271                 continue;
1272             }
1273 
1274             if self.tokenizer.next_symbol_if_eq(';')? {
1275                 continue;
1276             }
1277 
1278             return Err(ParserError::IncorrectInput.into());
1279         }
1280 
1281         Ok(FileDescriptor {
1282             imports,
1283             package,
1284             syntax,
1285             messages,
1286             enums,
1287             extensions,
1288             services,
1289             options,
1290         })
1291     }
1292 }
1293 
1294 #[cfg(test)]
1295 mod test {
1296     use super::*;
1297 
parse<P, R>(input: &str, parse_what: P) -> R where P: FnOnce(&mut Parser) -> anyhow::Result<R>,1298     fn parse<P, R>(input: &str, parse_what: P) -> R
1299     where
1300         P: FnOnce(&mut Parser) -> anyhow::Result<R>,
1301     {
1302         let mut parser = Parser::new(input);
1303         let r =
1304             parse_what(&mut parser).expect(&format!("parse failed at {}", parser.tokenizer.loc()));
1305         let eof = parser
1306             .tokenizer
1307             .syntax_eof()
1308             .expect(&format!("check eof failed at {}", parser.tokenizer.loc()));
1309         assert!(eof, "{}", parser.tokenizer.loc());
1310         r
1311     }
1312 
parse_opt<P, R>(input: &str, parse_what: P) -> R where P: FnOnce(&mut Parser) -> anyhow::Result<Option<R>>,1313     fn parse_opt<P, R>(input: &str, parse_what: P) -> R
1314     where
1315         P: FnOnce(&mut Parser) -> anyhow::Result<Option<R>>,
1316     {
1317         let mut parser = Parser::new(input);
1318         let o =
1319             parse_what(&mut parser).expect(&format!("parse failed at {}", parser.tokenizer.loc()));
1320         let r = o.expect(&format!(
1321             "parser returned none at {}",
1322             parser.tokenizer.loc()
1323         ));
1324         assert!(parser.tokenizer.syntax_eof().unwrap());
1325         r
1326     }
1327 
1328     #[test]
test_syntax()1329     fn test_syntax() {
1330         let msg = r#"  syntax = "proto3";  "#;
1331         let mess = parse_opt(msg, |p| p.next_syntax());
1332         assert_eq!(Syntax::Proto3, mess);
1333     }
1334 
1335     #[test]
test_field_default_value_int()1336     fn test_field_default_value_int() {
1337         let msg = r#"  optional int64 f = 4 [default = 12];  "#;
1338         let mess = parse(msg, |p| p.next_field(MessageBodyParseMode::MessageProto2));
1339         assert_eq!("f", mess.t.name);
1340         assert_eq!(
1341             ProtobufOptionName::simple("default"),
1342             mess.t.options[0].name
1343         );
1344         assert_eq!("12", mess.t.options[0].value.format());
1345     }
1346 
1347     #[test]
test_field_default_value_float()1348     fn test_field_default_value_float() {
1349         let msg = r#"  optional float f = 2 [default = 10.0];  "#;
1350         let mess = parse(msg, |p| p.next_field(MessageBodyParseMode::MessageProto2));
1351         assert_eq!("f", mess.t.name);
1352         assert_eq!(
1353             ProtobufOptionName::simple("default"),
1354             mess.t.options[0].name
1355         );
1356         assert_eq!("10", mess.t.options[0].value.format());
1357     }
1358 
1359     #[test]
test_message()1360     fn test_message() {
1361         let msg = r#"message ReferenceData
1362     {
1363         repeated ScenarioInfo  scenarioSet = 1;
1364         repeated CalculatedObjectInfo calculatedObjectSet = 2;
1365         repeated RiskFactorList riskFactorListSet = 3;
1366         repeated RiskMaturityInfo riskMaturitySet = 4;
1367         repeated IndicatorInfo indicatorSet = 5;
1368         repeated RiskStrikeInfo riskStrikeSet = 6;
1369         repeated FreeProjectionList freeProjectionListSet = 7;
1370         repeated ValidationProperty ValidationSet = 8;
1371         repeated CalcProperties calcPropertiesSet = 9;
1372         repeated MaturityInfo maturitySet = 10;
1373     }"#;
1374 
1375         let mess = parse_opt(msg, |p| p.next_message_opt());
1376         assert_eq!(10, mess.t.fields.len());
1377     }
1378 
1379     #[test]
test_enum()1380     fn test_enum() {
1381         let msg = r#"enum PairingStatus {
1382                 DEALPAIRED        = 0;
1383                 INVENTORYORPHAN   = 1;
1384                 CALCULATEDORPHAN  = 2;
1385                 CANCELED          = 3;
1386     }"#;
1387 
1388         let enumeration = parse_opt(msg, |p| p.next_enum_opt());
1389         assert_eq!(4, enumeration.values.len());
1390     }
1391 
1392     #[test]
test_ignore()1393     fn test_ignore() {
1394         let msg = r#"option optimize_for = SPEED;"#;
1395 
1396         parse_opt(msg, |p| p.next_option_opt());
1397     }
1398 
1399     #[test]
test_import()1400     fn test_import() {
1401         let msg = r#"syntax = "proto3";
1402 
1403     import "test_import_nested_imported_pb.proto";
1404 
1405     message ContainsImportedNested {
1406         ContainerForNested.NestedMessage m = 1;
1407         ContainerForNested.NestedEnum e = 2;
1408     }
1409     "#;
1410         let desc = parse(msg, |p| p.next_proto());
1411 
1412         assert_eq!(
1413             vec!["test_import_nested_imported_pb.proto"],
1414             desc.imports
1415                 .into_iter()
1416                 .map(|i| i.path.to_str().to_owned())
1417                 .collect::<Vec<_>>()
1418         );
1419     }
1420 
1421     #[test]
test_nested_message()1422     fn test_nested_message() {
1423         let msg = r#"message A
1424     {
1425         message B {
1426             repeated int32 a = 1;
1427             optional string b = 2;
1428         }
1429         optional string b = 1;
1430     }"#;
1431 
1432         let mess = parse_opt(msg, |p| p.next_message_opt());
1433         assert_eq!(1, mess.t.messages.len());
1434     }
1435 
1436     #[test]
test_map()1437     fn test_map() {
1438         let msg = r#"message A
1439     {
1440         optional map<string, int32> b = 1;
1441     }"#;
1442 
1443         let mess = parse_opt(msg, |p| p.next_message_opt());
1444         assert_eq!(1, mess.t.fields.len());
1445         match mess.t.regular_fields_for_test()[0].typ {
1446             FieldType::Map(ref f) => match &**f {
1447                 &(FieldType::String, FieldType::Int32) => (),
1448                 ref f => panic!("Expecting Map<String, Int32> found {:?}", f),
1449             },
1450             ref f => panic!("Expecting map, got {:?}", f),
1451         }
1452     }
1453 
1454     #[test]
test_oneof()1455     fn test_oneof() {
1456         let msg = r#"message A
1457     {
1458         optional int32 a1 = 1;
1459         oneof a_oneof {
1460             string a2 = 2;
1461             int32 a3 = 3;
1462             bytes a4 = 4;
1463         }
1464         repeated bool a5 = 5;
1465     }"#;
1466 
1467         let mess = parse_opt(msg, |p| p.next_message_opt());
1468         assert_eq!(1, mess.t.oneofs().len());
1469         assert_eq!(3, mess.t.oneofs()[0].fields.len());
1470     }
1471 
1472     #[test]
test_reserved()1473     fn test_reserved() {
1474         let msg = r#"message Sample {
1475        reserved 4, 15, 17 to 20, 30;
1476        reserved "foo", "bar";
1477        optional uint64 age =1;
1478        required bytes name =2;
1479     }"#;
1480 
1481         let mess = parse_opt(msg, |p| p.next_message_opt());
1482         assert_eq!(
1483             vec![
1484                 FieldNumberRange { from: 4, to: 4 },
1485                 FieldNumberRange { from: 15, to: 15 },
1486                 FieldNumberRange { from: 17, to: 20 },
1487                 FieldNumberRange { from: 30, to: 30 }
1488             ],
1489             mess.t.reserved_nums
1490         );
1491         assert_eq!(
1492             vec!["foo".to_string(), "bar".to_string()],
1493             mess.t.reserved_names
1494         );
1495         assert_eq!(2, mess.t.fields.len());
1496     }
1497 
1498     #[test]
test_default_value_int()1499     fn test_default_value_int() {
1500         let msg = r#"message Sample {
1501             optional int32 x = 1 [default = 17];
1502         }"#;
1503 
1504         let mess = parse_opt(msg, |p| p.next_message_opt());
1505         assert_eq!(
1506             ProtobufOptionName::simple("default"),
1507             mess.t.regular_fields_for_test()[0].options[0].name
1508         );
1509         assert_eq!(
1510             "17",
1511             mess.t.regular_fields_for_test()[0].options[0]
1512                 .value
1513                 .format()
1514         );
1515     }
1516 
1517     #[test]
test_default_value_string()1518     fn test_default_value_string() {
1519         let msg = r#"message Sample {
1520             optional string x = 1 [default = "ab\nc d\"g\'h\0\"z"];
1521         }"#;
1522 
1523         let mess = parse_opt(msg, |p| p.next_message_opt());
1524         assert_eq!(
1525             r#""ab\nc d\"g\'h\0\"z""#,
1526             mess.t.regular_fields_for_test()[0].options[0]
1527                 .value
1528                 .format()
1529         );
1530     }
1531 
1532     #[test]
test_default_value_bytes()1533     fn test_default_value_bytes() {
1534         let msg = r#"message Sample {
1535             optional bytes x = 1 [default = "ab\nc d\xfeE\"g\'h\0\"z"];
1536         }"#;
1537 
1538         let mess = parse_opt(msg, |p| p.next_message_opt());
1539         assert_eq!(
1540             r#""ab\nc d\xfeE\"g\'h\0\"z""#,
1541             mess.t.regular_fields_for_test()[0].options[0]
1542                 .value
1543                 .format()
1544         );
1545     }
1546 
1547     #[test]
test_group()1548     fn test_group() {
1549         let msg = r#"message MessageWithGroup {
1550             optional string aaa = 1;
1551 
1552             repeated group Identifier = 18 {
1553                 optional int32 iii = 19;
1554                 optional string sss = 20;
1555             }
1556 
1557             required int bbb = 3;
1558         }"#;
1559         let mess = parse_opt(msg, |p| p.next_message_opt());
1560 
1561         assert_eq!("identifier", mess.t.regular_fields_for_test()[1].name);
1562         if let FieldType::Group(Group { fields, .. }) = &mess.t.regular_fields_for_test()[1].typ {
1563             assert_eq!(2, fields.len());
1564         } else {
1565             panic!("expecting group");
1566         }
1567 
1568         assert_eq!("bbb", mess.t.regular_fields_for_test()[2].name);
1569     }
1570 
1571     #[test]
test_incorrect_file_descriptor()1572     fn test_incorrect_file_descriptor() {
1573         let msg = r#"
1574             message Foo {}
1575 
1576             dfgdg
1577         "#;
1578 
1579         let err = FileDescriptor::parse(msg).err().expect("err");
1580         assert_eq!(4, err.line);
1581     }
1582 }
1583