1 use std::str; 2 3 use protobuf_support::lexer::int; 4 use protobuf_support::lexer::lexer_impl::LexerError; 5 use protobuf_support::lexer::num_lit::NumLit; 6 use protobuf_support::lexer::parser_language::ParserLanguage; 7 use protobuf_support::lexer::str_lit::StrLitDecodeError; 8 use protobuf_support::lexer::token::Token; 9 use protobuf_support::lexer::tokenizer::Tokenizer; 10 use protobuf_support::lexer::tokenizer::TokenizerError; 11 12 use crate::model::AnyTypeUrl; 13 use crate::model::ProtobufConstantMessageFieldName; 14 use crate::proto_path::ProtoPathBuf; 15 use crate::protobuf_abs_path::ProtobufAbsPath; 16 use crate::protobuf_ident::ProtobufIdent; 17 use crate::protobuf_path::ProtobufPath; 18 use crate::protobuf_rel_path::ProtobufRelPath; 19 use crate::pure::model; 20 use crate::pure::model::EnumValue; 21 use crate::pure::model::Enumeration; 22 use crate::pure::model::Extension; 23 use crate::pure::model::Field; 24 use crate::pure::model::FieldNumberRange; 25 use crate::pure::model::FieldOrOneOf; 26 use crate::pure::model::FieldType; 27 use crate::pure::model::FileDescriptor; 28 use crate::pure::model::Group; 29 use crate::pure::model::ImportVis; 30 use crate::pure::model::Message; 31 use crate::pure::model::Method; 32 use crate::pure::model::OneOf; 33 use crate::pure::model::ProtobufConstant; 34 use crate::pure::model::ProtobufConstantMessage; 35 use crate::pure::model::ProtobufOption; 36 use crate::pure::model::ProtobufOptionName; 37 use crate::pure::model::ProtobufOptionNameExt; 38 use crate::pure::model::ProtobufOptionNamePart; 39 use crate::pure::model::Rule; 40 use crate::pure::model::Service; 41 use crate::pure::model::Syntax; 42 use crate::pure::model::WithLoc; 43 44 /// Basic information about parsing error. 45 #[derive(Debug, thiserror::Error)] 46 pub(crate) enum ParserError { 47 #[error("{0}")] 48 TokenizerError(#[source] TokenizerError), 49 // TODO 50 #[error("incorrect input")] 51 IncorrectInput, 52 #[error("not UTF-8")] 53 NotUtf8, 54 #[error("expecting a constant")] 55 ExpectConstant, 56 #[error("unknown syntax")] 57 UnknownSyntax, 58 #[error("integer overflow")] 59 IntegerOverflow, 60 #[error("label not allowed")] 61 LabelNotAllowed, 62 #[error("label required")] 63 LabelRequired, 64 #[error("group name should start with upper case")] 65 GroupNameShouldStartWithUpperCase, 66 #[error("map field not allowed")] 67 MapFieldNotAllowed, 68 #[error("string literal decode error: {0}")] 69 StrLitDecodeError(#[source] StrLitDecodeError), 70 #[error("lexer error: {0}")] 71 LexerError(#[source] LexerError), 72 #[error("oneof in group")] 73 OneOfInGroup, 74 #[error("oneof in oneof")] 75 OneOfInOneOf, 76 #[error("oneof in extend")] 77 OneOfInExtend, 78 } 79 80 impl From<TokenizerError> for ParserError { from(e: TokenizerError) -> Self81 fn from(e: TokenizerError) -> Self { 82 ParserError::TokenizerError(e) 83 } 84 } 85 86 impl From<StrLitDecodeError> for ParserError { from(e: StrLitDecodeError) -> Self87 fn from(e: StrLitDecodeError) -> Self { 88 ParserError::StrLitDecodeError(e) 89 } 90 } 91 92 impl From<LexerError> for ParserError { from(e: LexerError) -> Self93 fn from(e: LexerError) -> Self { 94 ParserError::LexerError(e) 95 } 96 } 97 98 impl From<int::Overflow> for ParserError { from(_: int::Overflow) -> Self99 fn from(_: int::Overflow) -> Self { 100 ParserError::IntegerOverflow 101 } 102 } 103 104 #[derive(Debug, thiserror::Error)] 105 #[error("at {line}:{col}: {error}")] 106 pub struct ParserErrorWithLocation { 107 #[source] 108 pub error: anyhow::Error, 109 /// 1-based 110 pub line: u32, 111 /// 1-based 112 pub col: u32, 113 } 114 115 trait ToU8 { to_u8(&self) -> anyhow::Result<u8>116 fn to_u8(&self) -> anyhow::Result<u8>; 117 } 118 119 trait ToI32 { to_i32(&self) -> anyhow::Result<i32>120 fn to_i32(&self) -> anyhow::Result<i32>; 121 } 122 123 trait ToI64 { to_i64(&self) -> anyhow::Result<i64>124 fn to_i64(&self) -> anyhow::Result<i64>; 125 } 126 127 trait ToChar { to_char(&self) -> anyhow::Result<char>128 fn to_char(&self) -> anyhow::Result<char>; 129 } 130 131 impl ToI32 for u64 { to_i32(&self) -> anyhow::Result<i32>132 fn to_i32(&self) -> anyhow::Result<i32> { 133 if *self <= i32::max_value() as u64 { 134 Ok(*self as i32) 135 } else { 136 Err(ParserError::IntegerOverflow.into()) 137 } 138 } 139 } 140 141 impl ToI32 for i64 { to_i32(&self) -> anyhow::Result<i32>142 fn to_i32(&self) -> anyhow::Result<i32> { 143 if *self <= i32::max_value() as i64 && *self >= i32::min_value() as i64 { 144 Ok(*self as i32) 145 } else { 146 Err(ParserError::IntegerOverflow.into()) 147 } 148 } 149 } 150 151 impl ToI64 for u64 { to_i64(&self) -> anyhow::Result<i64>152 fn to_i64(&self) -> anyhow::Result<i64> { 153 if *self <= i64::max_value() as u64 { 154 Ok(*self as i64) 155 } else { 156 Err(ParserError::IntegerOverflow.into()) 157 } 158 } 159 } 160 161 impl ToChar for u8 { to_char(&self) -> anyhow::Result<char>162 fn to_char(&self) -> anyhow::Result<char> { 163 if *self <= 0x7f { 164 Ok(*self as char) 165 } else { 166 Err(ParserError::NotUtf8.into()) 167 } 168 } 169 } 170 171 impl ToU8 for u32 { to_u8(&self) -> anyhow::Result<u8>172 fn to_u8(&self) -> anyhow::Result<u8> { 173 if *self as u8 as u32 == *self { 174 Ok(*self as u8) 175 } else { 176 Err(ParserError::IntegerOverflow.into()) 177 } 178 } 179 } 180 181 #[derive(Clone)] 182 pub(crate) struct Parser<'a> { 183 pub tokenizer: Tokenizer<'a>, 184 syntax: Syntax, 185 } 186 187 #[derive(Copy, Clone)] 188 enum MessageBodyParseMode { 189 MessageProto2, 190 MessageProto3, 191 Oneof, 192 ExtendProto2, 193 ExtendProto3, 194 } 195 196 impl MessageBodyParseMode { label_allowed(&self, label: Rule) -> bool197 fn label_allowed(&self, label: Rule) -> bool { 198 match label { 199 Rule::Repeated => match *self { 200 MessageBodyParseMode::MessageProto2 201 | MessageBodyParseMode::MessageProto3 202 | MessageBodyParseMode::ExtendProto2 203 | MessageBodyParseMode::ExtendProto3 => true, 204 MessageBodyParseMode::Oneof => false, 205 }, 206 Rule::Optional => match *self { 207 MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::ExtendProto2 => true, 208 MessageBodyParseMode::MessageProto3 | MessageBodyParseMode::ExtendProto3 => true, 209 MessageBodyParseMode::Oneof => false, 210 }, 211 Rule::Required => match *self { 212 MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::ExtendProto2 => true, 213 MessageBodyParseMode::MessageProto3 | MessageBodyParseMode::ExtendProto3 => false, 214 MessageBodyParseMode::Oneof => false, 215 }, 216 } 217 } 218 some_label_required(&self) -> bool219 fn some_label_required(&self) -> bool { 220 match *self { 221 MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::ExtendProto2 => true, 222 MessageBodyParseMode::MessageProto3 223 | MessageBodyParseMode::ExtendProto3 224 | MessageBodyParseMode::Oneof => false, 225 } 226 } 227 map_allowed(&self) -> bool228 fn map_allowed(&self) -> bool { 229 match *self { 230 MessageBodyParseMode::MessageProto2 231 | MessageBodyParseMode::MessageProto3 232 | MessageBodyParseMode::ExtendProto2 233 | MessageBodyParseMode::ExtendProto3 => true, 234 MessageBodyParseMode::Oneof => false, 235 } 236 } 237 is_most_non_fields_allowed(&self) -> bool238 fn is_most_non_fields_allowed(&self) -> bool { 239 match *self { 240 MessageBodyParseMode::MessageProto2 | MessageBodyParseMode::MessageProto3 => true, 241 MessageBodyParseMode::ExtendProto2 242 | MessageBodyParseMode::ExtendProto3 243 | MessageBodyParseMode::Oneof => false, 244 } 245 } 246 is_option_allowed(&self) -> bool247 fn is_option_allowed(&self) -> bool { 248 match *self { 249 MessageBodyParseMode::MessageProto2 250 | MessageBodyParseMode::MessageProto3 251 | MessageBodyParseMode::Oneof => true, 252 MessageBodyParseMode::ExtendProto2 | MessageBodyParseMode::ExtendProto3 => false, 253 } 254 } 255 is_extensions_allowed(&self) -> bool256 fn is_extensions_allowed(&self) -> bool { 257 match self { 258 MessageBodyParseMode::MessageProto2 => true, 259 _ => false, 260 } 261 } 262 } 263 264 #[derive(Default)] 265 pub(crate) struct MessageBody { 266 pub fields: Vec<WithLoc<FieldOrOneOf>>, 267 pub reserved_nums: Vec<FieldNumberRange>, 268 pub reserved_names: Vec<String>, 269 pub messages: Vec<WithLoc<Message>>, 270 pub enums: Vec<WithLoc<Enumeration>>, 271 pub options: Vec<ProtobufOption>, 272 pub extension_ranges: Vec<FieldNumberRange>, 273 pub extensions: Vec<WithLoc<Extension>>, 274 } 275 276 trait NumLitEx { to_option_value(&self, sign_is_plus: bool) -> anyhow::Result<ProtobufConstant>277 fn to_option_value(&self, sign_is_plus: bool) -> anyhow::Result<ProtobufConstant>; 278 } 279 280 impl NumLitEx for NumLit { to_option_value(&self, sign_is_plus: bool) -> anyhow::Result<ProtobufConstant>281 fn to_option_value(&self, sign_is_plus: bool) -> anyhow::Result<ProtobufConstant> { 282 Ok(match (*self, sign_is_plus) { 283 (NumLit::U64(u), true) => ProtobufConstant::U64(u), 284 (NumLit::F64(f), true) => ProtobufConstant::F64(f), 285 (NumLit::U64(u), false) => { 286 ProtobufConstant::I64(int::neg(u).map_err(|_| ParserError::IntegerOverflow)?) 287 } 288 (NumLit::F64(f), false) => ProtobufConstant::F64(-f), 289 }) 290 } 291 } 292 293 impl<'a> Parser<'a> { new(input: &'a str) -> Parser<'a>294 pub(crate) fn new(input: &'a str) -> Parser<'a> { 295 Parser { 296 tokenizer: Tokenizer::new(input, ParserLanguage::Proto), 297 syntax: Syntax::Proto2, 298 } 299 } 300 301 // Protobuf grammar 302 303 // fullIdent = ident { "." ident } next_full_ident(&mut self) -> anyhow::Result<ProtobufPath>304 fn next_full_ident(&mut self) -> anyhow::Result<ProtobufPath> { 305 let mut full_ident = String::new(); 306 // https://github.com/google/protobuf/issues/4563 307 if self.tokenizer.next_symbol_if_eq('.')? { 308 full_ident.push('.'); 309 } 310 full_ident.push_str(&self.tokenizer.next_ident()?); 311 while self.tokenizer.next_symbol_if_eq('.')? { 312 full_ident.push('.'); 313 full_ident.push_str(&self.tokenizer.next_ident()?); 314 } 315 Ok(ProtobufPath::new(full_ident)) 316 } 317 318 // fullIdent = ident { "." ident } next_full_ident_rel(&mut self) -> anyhow::Result<ProtobufRelPath>319 fn next_full_ident_rel(&mut self) -> anyhow::Result<ProtobufRelPath> { 320 let mut full_ident = String::new(); 321 full_ident.push_str(&self.tokenizer.next_ident()?); 322 while self.tokenizer.next_symbol_if_eq('.')? { 323 full_ident.push('.'); 324 full_ident.push_str(&self.tokenizer.next_ident()?); 325 } 326 Ok(ProtobufRelPath::new(full_ident)) 327 } 328 329 // emptyStatement = ";" next_empty_statement_opt(&mut self) -> anyhow::Result<Option<()>>330 fn next_empty_statement_opt(&mut self) -> anyhow::Result<Option<()>> { 331 if self.tokenizer.next_symbol_if_eq(';')? { 332 Ok(Some(())) 333 } else { 334 Ok(None) 335 } 336 } 337 338 // messageName = ident 339 // enumName = ident 340 // messageType = [ "." ] { ident "." } messageName 341 // enumType = [ "." ] { ident "." } enumName next_message_or_enum_type(&mut self) -> anyhow::Result<ProtobufPath>342 fn next_message_or_enum_type(&mut self) -> anyhow::Result<ProtobufPath> { 343 self.next_full_ident() 344 } 345 346 // groupName = capitalLetter { letter | decimalDigit | "_" } next_group_name(&mut self) -> anyhow::Result<String>347 fn next_group_name(&mut self) -> anyhow::Result<String> { 348 // lexer cannot distinguish between group name and other ident 349 let mut clone = self.clone(); 350 let ident = clone.tokenizer.next_ident()?; 351 if !ident.chars().next().unwrap().is_ascii_uppercase() { 352 return Err(ParserError::GroupNameShouldStartWithUpperCase.into()); 353 } 354 *self = clone; 355 Ok(ident) 356 } 357 358 // Boolean 359 360 // boolLit = "true" | "false" next_bool_lit_opt(&mut self) -> anyhow::Result<Option<bool>>361 fn next_bool_lit_opt(&mut self) -> anyhow::Result<Option<bool>> { 362 Ok(if self.tokenizer.next_ident_if_eq("true")? { 363 Some(true) 364 } else if self.tokenizer.next_ident_if_eq("false")? { 365 Some(false) 366 } else { 367 None 368 }) 369 } 370 371 // Constant 372 next_num_lit(&mut self) -> anyhow::Result<NumLit>373 fn next_num_lit(&mut self) -> anyhow::Result<NumLit> { 374 self.tokenizer 375 .next_token_check_map(|token| Ok(token.to_num_lit()?)) 376 } 377 next_message_constant_field_name( &mut self, ) -> anyhow::Result<ProtobufConstantMessageFieldName>378 fn next_message_constant_field_name( 379 &mut self, 380 ) -> anyhow::Result<ProtobufConstantMessageFieldName> { 381 if self.tokenizer.next_symbol_if_eq('[')? { 382 let n = self.next_full_ident()?; 383 if self.tokenizer.next_symbol_if_eq('/')? { 384 let prefix = format!("{}", n); 385 let full_type_name = self.next_full_ident()?; 386 self.tokenizer 387 .next_symbol_expect_eq(']', "message constant")?; 388 Ok(ProtobufConstantMessageFieldName::AnyTypeUrl(AnyTypeUrl { 389 prefix, 390 full_type_name, 391 })) 392 } else { 393 self.tokenizer 394 .next_symbol_expect_eq(']', "message constant")?; 395 Ok(ProtobufConstantMessageFieldName::Extension(n)) 396 } 397 } else { 398 let n = self.tokenizer.next_ident()?; 399 Ok(ProtobufConstantMessageFieldName::Regular(n)) 400 } 401 } 402 next_message_constant(&mut self) -> anyhow::Result<ProtobufConstantMessage>403 fn next_message_constant(&mut self) -> anyhow::Result<ProtobufConstantMessage> { 404 let mut r = ProtobufConstantMessage::default(); 405 self.tokenizer 406 .next_symbol_expect_eq('{', "message constant")?; 407 while !self.tokenizer.lookahead_is_symbol('}')? { 408 let n = self.next_message_constant_field_name()?; 409 let v = self.next_field_value()?; 410 r.fields.insert(n, v); 411 } 412 self.tokenizer 413 .next_symbol_expect_eq('}', "message constant")?; 414 Ok(r) 415 } 416 417 // constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | 418 // strLit | boolLit next_constant(&mut self) -> anyhow::Result<ProtobufConstant>419 fn next_constant(&mut self) -> anyhow::Result<ProtobufConstant> { 420 // https://github.com/google/protobuf/blob/a21f225824e994ebd35e8447382ea4e0cd165b3c/src/google/protobuf/unittest_custom_options.proto#L350 421 if self.tokenizer.lookahead_is_symbol('{')? { 422 return Ok(ProtobufConstant::Message(self.next_message_constant()?)); 423 } 424 425 if let Some(b) = self.next_bool_lit_opt()? { 426 return Ok(ProtobufConstant::Bool(b)); 427 } 428 429 if let &Token::Symbol(c) = self.tokenizer.lookahead_some()? { 430 if c == '+' || c == '-' { 431 self.tokenizer.advance()?; 432 let sign = c == '+'; 433 return Ok(self.next_num_lit()?.to_option_value(sign)?); 434 } 435 } 436 437 if let Some(r) = self.tokenizer.next_token_if_map(|token| match token { 438 &Token::StrLit(ref s) => Some(ProtobufConstant::String(s.clone())), 439 _ => None, 440 })? { 441 return Ok(r); 442 } 443 444 match self.tokenizer.lookahead_some()? { 445 &Token::IntLit(..) | &Token::FloatLit(..) => { 446 return self.next_num_lit()?.to_option_value(true); 447 } 448 &Token::Ident(..) => { 449 return Ok(ProtobufConstant::Ident(self.next_full_ident()?)); 450 } 451 _ => {} 452 } 453 454 Err(ParserError::ExpectConstant.into()) 455 } 456 next_field_value(&mut self) -> anyhow::Result<ProtobufConstant>457 fn next_field_value(&mut self) -> anyhow::Result<ProtobufConstant> { 458 if self.tokenizer.next_symbol_if_eq(':')? { 459 // Colon is optional when reading message constant. 460 self.next_constant() 461 } else { 462 Ok(ProtobufConstant::Message(self.next_message_constant()?)) 463 } 464 } 465 next_int_lit(&mut self) -> anyhow::Result<u64>466 fn next_int_lit(&mut self) -> anyhow::Result<u64> { 467 self.tokenizer.next_token_check_map(|token| match token { 468 &Token::IntLit(i) => Ok(i), 469 _ => Err(ParserError::IncorrectInput.into()), 470 }) 471 } 472 473 // Syntax 474 475 // syntax = "syntax" "=" quote "proto2" quote ";" 476 // syntax = "syntax" "=" quote "proto3" quote ";" next_syntax(&mut self) -> anyhow::Result<Option<Syntax>>477 fn next_syntax(&mut self) -> anyhow::Result<Option<Syntax>> { 478 if self.tokenizer.next_ident_if_eq("syntax")? { 479 self.tokenizer.next_symbol_expect_eq('=', "syntax")?; 480 let syntax_str = self.tokenizer.next_str_lit()?.decode_utf8()?; 481 let syntax = if syntax_str == "proto2" { 482 Syntax::Proto2 483 } else if syntax_str == "proto3" { 484 Syntax::Proto3 485 } else { 486 return Err(ParserError::UnknownSyntax.into()); 487 }; 488 self.tokenizer.next_symbol_expect_eq(';', "syntax")?; 489 Ok(Some(syntax)) 490 } else { 491 Ok(None) 492 } 493 } 494 495 // Import Statement 496 497 // import = "import" [ "weak" | "public" ] strLit ";" next_import_opt(&mut self) -> anyhow::Result<Option<model::Import>>498 fn next_import_opt(&mut self) -> anyhow::Result<Option<model::Import>> { 499 if self.tokenizer.next_ident_if_eq("import")? { 500 let vis = if self.tokenizer.next_ident_if_eq("weak")? { 501 ImportVis::Weak 502 } else if self.tokenizer.next_ident_if_eq("public")? { 503 ImportVis::Public 504 } else { 505 ImportVis::Default 506 }; 507 let path = self.tokenizer.next_str_lit()?.decode_utf8()?; 508 self.tokenizer.next_symbol_expect_eq(';', "import")?; 509 let path = ProtoPathBuf::new(path)?; 510 Ok(Some(model::Import { path, vis })) 511 } else { 512 Ok(None) 513 } 514 } 515 516 // Package 517 518 // package = "package" fullIdent ";" next_package_opt(&mut self) -> anyhow::Result<Option<ProtobufAbsPath>>519 fn next_package_opt(&mut self) -> anyhow::Result<Option<ProtobufAbsPath>> { 520 if self.tokenizer.next_ident_if_eq("package")? { 521 let package = self.next_full_ident_rel()?; 522 self.tokenizer.next_symbol_expect_eq(';', "package")?; 523 Ok(Some(package.into_absolute())) 524 } else { 525 Ok(None) 526 } 527 } 528 529 // Option 530 next_ident(&mut self) -> anyhow::Result<ProtobufIdent>531 fn next_ident(&mut self) -> anyhow::Result<ProtobufIdent> { 532 Ok(ProtobufIdent::from(self.tokenizer.next_ident()?)) 533 } 534 next_option_name_component(&mut self) -> anyhow::Result<ProtobufOptionNamePart>535 fn next_option_name_component(&mut self) -> anyhow::Result<ProtobufOptionNamePart> { 536 if self.tokenizer.next_symbol_if_eq('(')? { 537 let comp = self.next_full_ident()?; 538 self.tokenizer 539 .next_symbol_expect_eq(')', "option name component")?; 540 Ok(ProtobufOptionNamePart::Ext(comp)) 541 } else { 542 Ok(ProtobufOptionNamePart::Direct(self.next_ident()?)) 543 } 544 } 545 546 // https://github.com/google/protobuf/issues/4563 547 // optionName = ( ident | "(" fullIdent ")" ) { "." ident } next_option_name(&mut self) -> anyhow::Result<ProtobufOptionName>548 fn next_option_name(&mut self) -> anyhow::Result<ProtobufOptionName> { 549 let mut components = Vec::new(); 550 components.push(self.next_option_name_component()?); 551 while self.tokenizer.next_symbol_if_eq('.')? { 552 components.push(self.next_option_name_component()?); 553 } 554 if components.len() == 1 { 555 if let ProtobufOptionNamePart::Direct(n) = &components[0] { 556 return Ok(ProtobufOptionName::Builtin(n.clone())); 557 } 558 } 559 Ok(ProtobufOptionName::Ext(ProtobufOptionNameExt(components))) 560 } 561 562 // option = "option" optionName "=" constant ";" next_option_opt(&mut self) -> anyhow::Result<Option<ProtobufOption>>563 fn next_option_opt(&mut self) -> anyhow::Result<Option<ProtobufOption>> { 564 if self.tokenizer.next_ident_if_eq("option")? { 565 let name = self.next_option_name()?; 566 self.tokenizer.next_symbol_expect_eq('=', "option")?; 567 let value = self.next_constant()?; 568 self.tokenizer.next_symbol_expect_eq(';', "option")?; 569 Ok(Some(ProtobufOption { name, value })) 570 } else { 571 Ok(None) 572 } 573 } 574 575 // Fields 576 577 // label = "required" | "optional" | "repeated" next_label(&mut self, mode: MessageBodyParseMode) -> anyhow::Result<Option<Rule>>578 fn next_label(&mut self, mode: MessageBodyParseMode) -> anyhow::Result<Option<Rule>> { 579 for rule in Rule::ALL { 580 let mut clone = self.clone(); 581 if clone.tokenizer.next_ident_if_eq(rule.as_str())? { 582 if !mode.label_allowed(rule) { 583 return Err(ParserError::LabelNotAllowed.into()); 584 } 585 586 *self = clone; 587 return Ok(Some(rule)); 588 } 589 } 590 591 if mode.some_label_required() { 592 Err(ParserError::LabelRequired.into()) 593 } else { 594 Ok(None) 595 } 596 } 597 next_field_type(&mut self) -> anyhow::Result<FieldType>598 fn next_field_type(&mut self) -> anyhow::Result<FieldType> { 599 let simple = &[ 600 ("int32", FieldType::Int32), 601 ("int64", FieldType::Int64), 602 ("uint32", FieldType::Uint32), 603 ("uint64", FieldType::Uint64), 604 ("sint32", FieldType::Sint32), 605 ("sint64", FieldType::Sint64), 606 ("fixed32", FieldType::Fixed32), 607 ("sfixed32", FieldType::Sfixed32), 608 ("fixed64", FieldType::Fixed64), 609 ("sfixed64", FieldType::Sfixed64), 610 ("bool", FieldType::Bool), 611 ("string", FieldType::String), 612 ("bytes", FieldType::Bytes), 613 ("float", FieldType::Float), 614 ("double", FieldType::Double), 615 ]; 616 for &(ref n, ref t) in simple { 617 if self.tokenizer.next_ident_if_eq(n)? { 618 return Ok(t.clone()); 619 } 620 } 621 622 if let Some(t) = self.next_map_field_type_opt()? { 623 return Ok(t); 624 } 625 626 let message_or_enum = self.next_message_or_enum_type()?; 627 Ok(FieldType::MessageOrEnum(message_or_enum)) 628 } 629 next_field_number(&mut self) -> anyhow::Result<i32>630 fn next_field_number(&mut self) -> anyhow::Result<i32> { 631 // TODO: not all integers are valid field numbers 632 self.tokenizer.next_token_check_map(|token| match token { 633 &Token::IntLit(i) => i.to_i32(), 634 _ => Err(ParserError::IncorrectInput.into()), 635 }) 636 } 637 638 // fieldOption = optionName "=" constant next_field_option(&mut self) -> anyhow::Result<ProtobufOption>639 fn next_field_option(&mut self) -> anyhow::Result<ProtobufOption> { 640 let name = self.next_option_name()?; 641 self.tokenizer.next_symbol_expect_eq('=', "field option")?; 642 let value = self.next_constant()?; 643 Ok(ProtobufOption { name, value }) 644 } 645 646 // fieldOptions = fieldOption { "," fieldOption } next_field_options(&mut self) -> anyhow::Result<Vec<ProtobufOption>>647 fn next_field_options(&mut self) -> anyhow::Result<Vec<ProtobufOption>> { 648 let mut options = Vec::new(); 649 650 options.push(self.next_field_option()?); 651 652 while self.tokenizer.next_symbol_if_eq(',')? { 653 options.push(self.next_field_option()?); 654 } 655 656 Ok(options) 657 } 658 659 // field = label type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" 660 // group = label "group" groupName "=" fieldNumber messageBody next_field(&mut self, mode: MessageBodyParseMode) -> anyhow::Result<WithLoc<Field>>661 fn next_field(&mut self, mode: MessageBodyParseMode) -> anyhow::Result<WithLoc<Field>> { 662 let loc = self.tokenizer.lookahead_loc(); 663 let rule = if self.clone().tokenizer.next_ident_if_eq("map")? { 664 if !mode.map_allowed() { 665 return Err(ParserError::MapFieldNotAllowed.into()); 666 } 667 None 668 } else { 669 self.next_label(mode)? 670 }; 671 if self.tokenizer.next_ident_if_eq("group")? { 672 let name = self.next_group_name()?.to_owned(); 673 self.tokenizer.next_symbol_expect_eq('=', "group")?; 674 let number = self.next_field_number()?; 675 676 let mode = match self.syntax { 677 Syntax::Proto2 => MessageBodyParseMode::MessageProto2, 678 Syntax::Proto3 => MessageBodyParseMode::MessageProto3, 679 }; 680 681 let MessageBody { fields, .. } = self.next_message_body(mode)?; 682 683 let fields = fields 684 .into_iter() 685 .map(|fo| match fo.t { 686 FieldOrOneOf::Field(f) => Ok(f), 687 FieldOrOneOf::OneOf(_) => Err(ParserError::OneOfInGroup), 688 }) 689 .collect::<Result<_, ParserError>>()?; 690 691 let field = Field { 692 // The field name is a lowercased version of the type name 693 // (which has been verified to start with an uppercase letter). 694 // https://git.io/JvxAP 695 name: name.to_ascii_lowercase(), 696 rule, 697 typ: FieldType::Group(Group { name, fields }), 698 number, 699 options: Vec::new(), 700 }; 701 Ok(WithLoc { t: field, loc }) 702 } else { 703 let typ = self.next_field_type()?; 704 let name = self.tokenizer.next_ident()?.to_owned(); 705 self.tokenizer.next_symbol_expect_eq('=', "field")?; 706 let number = self.next_field_number()?; 707 708 let mut options = Vec::new(); 709 710 if self.tokenizer.next_symbol_if_eq('[')? { 711 for o in self.next_field_options()? { 712 options.push(o); 713 } 714 self.tokenizer.next_symbol_expect_eq(']', "field")?; 715 } 716 self.tokenizer.next_symbol_expect_eq(';', "field")?; 717 let field = Field { 718 name, 719 rule, 720 typ, 721 number, 722 options, 723 }; 724 Ok(WithLoc { t: field, loc }) 725 } 726 } 727 728 // oneof = "oneof" oneofName "{" { oneofField | emptyStatement } "}" 729 // oneofField = type fieldName "=" fieldNumber [ "[" fieldOptions "]" ] ";" next_oneof_opt(&mut self) -> anyhow::Result<Option<OneOf>>730 fn next_oneof_opt(&mut self) -> anyhow::Result<Option<OneOf>> { 731 if self.tokenizer.next_ident_if_eq("oneof")? { 732 let name = self.tokenizer.next_ident()?.to_owned(); 733 let MessageBody { 734 fields, options, .. 735 } = self.next_message_body(MessageBodyParseMode::Oneof)?; 736 let fields = fields 737 .into_iter() 738 .map(|fo| match fo.t { 739 FieldOrOneOf::Field(f) => Ok(f), 740 FieldOrOneOf::OneOf(_) => Err(ParserError::OneOfInOneOf), 741 }) 742 .collect::<Result<_, ParserError>>()?; 743 Ok(Some(OneOf { 744 name, 745 fields, 746 options, 747 })) 748 } else { 749 Ok(None) 750 } 751 } 752 753 // mapField = "map" "<" keyType "," type ">" mapName "=" fieldNumber [ "[" fieldOptions "]" ] ";" 754 // keyType = "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | 755 // "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" next_map_field_type_opt(&mut self) -> anyhow::Result<Option<FieldType>>756 fn next_map_field_type_opt(&mut self) -> anyhow::Result<Option<FieldType>> { 757 if self.tokenizer.next_ident_if_eq("map")? { 758 self.tokenizer 759 .next_symbol_expect_eq('<', "map field type")?; 760 // TODO: restrict key types 761 let key = self.next_field_type()?; 762 self.tokenizer 763 .next_symbol_expect_eq(',', "map field type")?; 764 let value = self.next_field_type()?; 765 self.tokenizer 766 .next_symbol_expect_eq('>', "map field type")?; 767 Ok(Some(FieldType::Map(Box::new((key, value))))) 768 } else { 769 Ok(None) 770 } 771 } 772 773 // Extensions and Reserved 774 775 // Extensions 776 777 // range = intLit [ "to" ( intLit | "max" ) ] next_range(&mut self) -> anyhow::Result<FieldNumberRange>778 fn next_range(&mut self) -> anyhow::Result<FieldNumberRange> { 779 let from = self.next_field_number()?; 780 let to = if self.tokenizer.next_ident_if_eq("to")? { 781 if self.tokenizer.next_ident_if_eq("max")? { 782 0x20000000 - 1 783 } else { 784 self.next_field_number()? 785 } 786 } else { 787 from 788 }; 789 Ok(FieldNumberRange { from, to }) 790 } 791 792 // ranges = range { "," range } next_ranges(&mut self) -> anyhow::Result<Vec<FieldNumberRange>>793 fn next_ranges(&mut self) -> anyhow::Result<Vec<FieldNumberRange>> { 794 let mut ranges = Vec::new(); 795 ranges.push(self.next_range()?); 796 while self.tokenizer.next_symbol_if_eq(',')? { 797 ranges.push(self.next_range()?); 798 } 799 Ok(ranges) 800 } 801 802 // extensions = "extensions" ranges ";" next_extensions_opt(&mut self) -> anyhow::Result<Option<Vec<FieldNumberRange>>>803 fn next_extensions_opt(&mut self) -> anyhow::Result<Option<Vec<FieldNumberRange>>> { 804 if self.tokenizer.next_ident_if_eq("extensions")? { 805 Ok(Some(self.next_ranges()?)) 806 } else { 807 Ok(None) 808 } 809 } 810 811 // Reserved 812 813 // Grammar is incorrect: https://github.com/google/protobuf/issues/4558 814 // reserved = "reserved" ( ranges | fieldNames ) ";" 815 // fieldNames = fieldName { "," fieldName } next_reserved_opt( &mut self, ) -> anyhow::Result<Option<(Vec<FieldNumberRange>, Vec<String>)>>816 fn next_reserved_opt( 817 &mut self, 818 ) -> anyhow::Result<Option<(Vec<FieldNumberRange>, Vec<String>)>> { 819 if self.tokenizer.next_ident_if_eq("reserved")? { 820 let (ranges, names) = if let &Token::StrLit(..) = self.tokenizer.lookahead_some()? { 821 let mut names = Vec::new(); 822 names.push(self.tokenizer.next_str_lit()?.decode_utf8()?); 823 while self.tokenizer.next_symbol_if_eq(',')? { 824 names.push(self.tokenizer.next_str_lit()?.decode_utf8()?); 825 } 826 (Vec::new(), names) 827 } else { 828 (self.next_ranges()?, Vec::new()) 829 }; 830 831 self.tokenizer.next_symbol_expect_eq(';', "reserved")?; 832 833 Ok(Some((ranges, names))) 834 } else { 835 Ok(None) 836 } 837 } 838 839 // Top Level definitions 840 841 // Enum definition 842 843 // enumValueOption = optionName "=" constant next_enum_value_option(&mut self) -> anyhow::Result<ProtobufOption>844 fn next_enum_value_option(&mut self) -> anyhow::Result<ProtobufOption> { 845 let name = self.next_option_name()?; 846 self.tokenizer 847 .next_symbol_expect_eq('=', "enum value option")?; 848 let value = self.next_constant()?; 849 Ok(ProtobufOption { name, value }) 850 } 851 852 // https://github.com/google/protobuf/issues/4561 next_enum_value(&mut self) -> anyhow::Result<i32>853 fn next_enum_value(&mut self) -> anyhow::Result<i32> { 854 let minus = self.tokenizer.next_symbol_if_eq('-')?; 855 let lit = self.next_int_lit()?; 856 Ok(if minus { 857 let unsigned = lit.to_i64()?; 858 match unsigned.checked_neg() { 859 Some(neg) => neg.to_i32()?, 860 None => return Err(ParserError::IntegerOverflow.into()), 861 } 862 } else { 863 lit.to_i32()? 864 }) 865 } 866 867 // enumField = ident "=" intLit [ "[" enumValueOption { "," enumValueOption } "]" ]";" next_enum_field(&mut self) -> anyhow::Result<EnumValue>868 fn next_enum_field(&mut self) -> anyhow::Result<EnumValue> { 869 let name = self.tokenizer.next_ident()?.to_owned(); 870 self.tokenizer.next_symbol_expect_eq('=', "enum field")?; 871 let number = self.next_enum_value()?; 872 let mut options = Vec::new(); 873 if self.tokenizer.next_symbol_if_eq('[')? { 874 options.push(self.next_enum_value_option()?); 875 while self.tokenizer.next_symbol_if_eq(',')? { 876 options.push(self.next_enum_value_option()?); 877 } 878 self.tokenizer.next_symbol_expect_eq(']', "enum field")?; 879 } 880 881 Ok(EnumValue { 882 name, 883 number, 884 options, 885 }) 886 } 887 888 // enum = "enum" enumName enumBody 889 // enumBody = "{" { option | enumField | emptyStatement } "}" next_enum_opt(&mut self) -> anyhow::Result<Option<WithLoc<Enumeration>>>890 fn next_enum_opt(&mut self) -> anyhow::Result<Option<WithLoc<Enumeration>>> { 891 let loc = self.tokenizer.lookahead_loc(); 892 893 if self.tokenizer.next_ident_if_eq("enum")? { 894 let name = self.tokenizer.next_ident()?.to_owned(); 895 896 let mut values = Vec::new(); 897 let mut options = Vec::new(); 898 899 self.tokenizer.next_symbol_expect_eq('{', "enum")?; 900 while self.tokenizer.lookahead_if_symbol()? != Some('}') { 901 // emptyStatement 902 if self.tokenizer.next_symbol_if_eq(';')? { 903 continue; 904 } 905 906 if let Some(o) = self.next_option_opt()? { 907 options.push(o); 908 continue; 909 } 910 911 values.push(self.next_enum_field()?); 912 } 913 self.tokenizer.next_symbol_expect_eq('}', "enum")?; 914 let enumeration = Enumeration { 915 name, 916 values, 917 options, 918 }; 919 Ok(Some(WithLoc { 920 loc, 921 t: enumeration, 922 })) 923 } else { 924 Ok(None) 925 } 926 } 927 928 // Message definition 929 930 // messageBody = "{" { field | enum | message | extend | extensions | group | 931 // option | oneof | mapField | reserved | emptyStatement } "}" next_message_body(&mut self, mode: MessageBodyParseMode) -> anyhow::Result<MessageBody>932 fn next_message_body(&mut self, mode: MessageBodyParseMode) -> anyhow::Result<MessageBody> { 933 self.tokenizer.next_symbol_expect_eq('{', "message body")?; 934 935 let mut r = MessageBody::default(); 936 937 while self.tokenizer.lookahead_if_symbol()? != Some('}') { 938 let loc = self.tokenizer.lookahead_loc(); 939 940 // emptyStatement 941 if self.tokenizer.next_symbol_if_eq(';')? { 942 continue; 943 } 944 945 if mode.is_most_non_fields_allowed() { 946 if let Some((field_nums, field_names)) = self.next_reserved_opt()? { 947 r.reserved_nums.extend(field_nums); 948 r.reserved_names.extend(field_names); 949 continue; 950 } 951 952 if let Some(oneof) = self.next_oneof_opt()? { 953 let one_of = FieldOrOneOf::OneOf(oneof); 954 r.fields.push(WithLoc { t: one_of, loc }); 955 continue; 956 } 957 958 if let Some(extensions) = self.next_extend_opt()? { 959 r.extensions.extend(extensions); 960 continue; 961 } 962 963 if let Some(nested_message) = self.next_message_opt()? { 964 r.messages.push(nested_message); 965 continue; 966 } 967 968 if let Some(nested_enum) = self.next_enum_opt()? { 969 r.enums.push(nested_enum); 970 continue; 971 } 972 } else { 973 self.tokenizer.next_ident_if_eq_error("reserved")?; 974 self.tokenizer.next_ident_if_eq_error("oneof")?; 975 self.tokenizer.next_ident_if_eq_error("extend")?; 976 self.tokenizer.next_ident_if_eq_error("message")?; 977 self.tokenizer.next_ident_if_eq_error("enum")?; 978 } 979 980 if mode.is_extensions_allowed() { 981 if let Some(extension_ranges) = self.next_extensions_opt()? { 982 r.extension_ranges.extend(extension_ranges); 983 continue; 984 } 985 } else { 986 self.tokenizer.next_ident_if_eq_error("extensions")?; 987 } 988 989 if mode.is_option_allowed() { 990 if let Some(option) = self.next_option_opt()? { 991 r.options.push(option); 992 continue; 993 } 994 } else { 995 self.tokenizer.next_ident_if_eq_error("option")?; 996 } 997 998 let field = FieldOrOneOf::Field(self.next_field(mode)?); 999 r.fields.push(WithLoc { t: field, loc }); 1000 } 1001 1002 self.tokenizer.next_symbol_expect_eq('}', "message body")?; 1003 1004 Ok(r) 1005 } 1006 1007 // message = "message" messageName messageBody next_message_opt(&mut self) -> anyhow::Result<Option<WithLoc<Message>>>1008 fn next_message_opt(&mut self) -> anyhow::Result<Option<WithLoc<Message>>> { 1009 let loc = self.tokenizer.lookahead_loc(); 1010 1011 if self.tokenizer.next_ident_if_eq("message")? { 1012 let name = self.tokenizer.next_ident()?.to_owned(); 1013 1014 let mode = match self.syntax { 1015 Syntax::Proto2 => MessageBodyParseMode::MessageProto2, 1016 Syntax::Proto3 => MessageBodyParseMode::MessageProto3, 1017 }; 1018 1019 let MessageBody { 1020 fields, 1021 reserved_nums, 1022 reserved_names, 1023 messages, 1024 enums, 1025 options, 1026 extensions, 1027 extension_ranges, 1028 } = self.next_message_body(mode)?; 1029 1030 let message = Message { 1031 name, 1032 fields, 1033 reserved_nums, 1034 reserved_names, 1035 messages, 1036 enums, 1037 options, 1038 extensions, 1039 extension_ranges, 1040 }; 1041 Ok(Some(WithLoc { t: message, loc })) 1042 } else { 1043 Ok(None) 1044 } 1045 } 1046 1047 // Extend 1048 1049 // extend = "extend" messageType "{" {field | group | emptyStatement} "}" next_extend_opt(&mut self) -> anyhow::Result<Option<Vec<WithLoc<Extension>>>>1050 fn next_extend_opt(&mut self) -> anyhow::Result<Option<Vec<WithLoc<Extension>>>> { 1051 let mut clone = self.clone(); 1052 if clone.tokenizer.next_ident_if_eq("extend")? { 1053 // According to spec `extend` is only for `proto2`, but it is used in `proto3` 1054 // https://github.com/google/protobuf/issues/4610 1055 1056 *self = clone; 1057 1058 let extendee = self.next_message_or_enum_type()?; 1059 1060 let mode = match self.syntax { 1061 Syntax::Proto2 => MessageBodyParseMode::ExtendProto2, 1062 Syntax::Proto3 => MessageBodyParseMode::ExtendProto3, 1063 }; 1064 1065 let MessageBody { fields, .. } = self.next_message_body(mode)?; 1066 1067 // TODO: is oneof allowed in extend? 1068 let fields: Vec<WithLoc<Field>> = fields 1069 .into_iter() 1070 .map(|fo| match fo.t { 1071 FieldOrOneOf::Field(f) => Ok(f), 1072 FieldOrOneOf::OneOf(_) => Err(ParserError::OneOfInExtend), 1073 }) 1074 .collect::<Result<_, ParserError>>()?; 1075 1076 let extensions = fields 1077 .into_iter() 1078 .map(|field| { 1079 let extendee = extendee.clone(); 1080 let loc = field.loc; 1081 let extension = Extension { extendee, field }; 1082 WithLoc { t: extension, loc } 1083 }) 1084 .collect(); 1085 1086 Ok(Some(extensions)) 1087 } else { 1088 Ok(None) 1089 } 1090 } 1091 1092 // Service definition 1093 next_options_or_colon(&mut self) -> anyhow::Result<Vec<ProtobufOption>>1094 fn next_options_or_colon(&mut self) -> anyhow::Result<Vec<ProtobufOption>> { 1095 let mut options = Vec::new(); 1096 if self.tokenizer.next_symbol_if_eq('{')? { 1097 while self.tokenizer.lookahead_if_symbol()? != Some('}') { 1098 if let Some(option) = self.next_option_opt()? { 1099 options.push(option); 1100 continue; 1101 } 1102 1103 if let Some(()) = self.next_empty_statement_opt()? { 1104 continue; 1105 } 1106 1107 return Err(ParserError::IncorrectInput.into()); 1108 } 1109 self.tokenizer.next_symbol_expect_eq('}', "option")?; 1110 } else { 1111 self.tokenizer.next_symbol_expect_eq(';', "option")?; 1112 } 1113 1114 Ok(options) 1115 } 1116 1117 // stream = "stream" streamName "(" messageType "," messageType ")" 1118 // (( "{" { option | emptyStatement } "}") | ";" ) next_stream_opt(&mut self) -> anyhow::Result<Option<Method>>1119 fn next_stream_opt(&mut self) -> anyhow::Result<Option<Method>> { 1120 assert_eq!(Syntax::Proto2, self.syntax); 1121 if self.tokenizer.next_ident_if_eq("stream")? { 1122 let name = self.tokenizer.next_ident()?; 1123 self.tokenizer.next_symbol_expect_eq('(', "stream")?; 1124 let input_type = self.next_message_or_enum_type()?; 1125 self.tokenizer.next_symbol_expect_eq(',', "stream")?; 1126 let output_type = self.next_message_or_enum_type()?; 1127 self.tokenizer.next_symbol_expect_eq(')', "stream")?; 1128 let options = self.next_options_or_colon()?; 1129 Ok(Some(Method { 1130 name, 1131 input_type, 1132 output_type, 1133 client_streaming: true, 1134 server_streaming: true, 1135 options, 1136 })) 1137 } else { 1138 Ok(None) 1139 } 1140 } 1141 1142 // rpc = "rpc" rpcName "(" [ "stream" ] messageType ")" 1143 // "returns" "(" [ "stream" ] messageType ")" 1144 // (( "{" { option | emptyStatement } "}" ) | ";" ) next_rpc_opt(&mut self) -> anyhow::Result<Option<Method>>1145 fn next_rpc_opt(&mut self) -> anyhow::Result<Option<Method>> { 1146 if self.tokenizer.next_ident_if_eq("rpc")? { 1147 let name = self.tokenizer.next_ident()?; 1148 self.tokenizer.next_symbol_expect_eq('(', "rpc")?; 1149 let client_streaming = self.tokenizer.next_ident_if_eq("stream")?; 1150 let input_type = self.next_message_or_enum_type()?; 1151 self.tokenizer.next_symbol_expect_eq(')', "rpc")?; 1152 self.tokenizer.next_ident_expect_eq("returns")?; 1153 self.tokenizer.next_symbol_expect_eq('(', "rpc")?; 1154 let server_streaming = self.tokenizer.next_ident_if_eq("stream")?; 1155 let output_type = self.next_message_or_enum_type()?; 1156 self.tokenizer.next_symbol_expect_eq(')', "rpc")?; 1157 let options = self.next_options_or_colon()?; 1158 Ok(Some(Method { 1159 name, 1160 input_type, 1161 output_type, 1162 client_streaming, 1163 server_streaming, 1164 options, 1165 })) 1166 } else { 1167 Ok(None) 1168 } 1169 } 1170 1171 // proto2: 1172 // service = "service" serviceName "{" { option | rpc | stream | emptyStatement } "}" 1173 // 1174 // proto3: 1175 // service = "service" serviceName "{" { option | rpc | emptyStatement } "}" next_service_opt(&mut self) -> anyhow::Result<Option<WithLoc<Service>>>1176 fn next_service_opt(&mut self) -> anyhow::Result<Option<WithLoc<Service>>> { 1177 let loc = self.tokenizer.lookahead_loc(); 1178 1179 if self.tokenizer.next_ident_if_eq("service")? { 1180 let name = self.tokenizer.next_ident()?; 1181 let mut methods = Vec::new(); 1182 let mut options = Vec::new(); 1183 self.tokenizer.next_symbol_expect_eq('{', "service")?; 1184 while self.tokenizer.lookahead_if_symbol()? != Some('}') { 1185 if let Some(method) = self.next_rpc_opt()? { 1186 methods.push(method); 1187 continue; 1188 } 1189 1190 if self.syntax == Syntax::Proto2 { 1191 if let Some(method) = self.next_stream_opt()? { 1192 methods.push(method); 1193 continue; 1194 } 1195 } 1196 1197 if let Some(o) = self.next_option_opt()? { 1198 options.push(o); 1199 continue; 1200 } 1201 1202 if let Some(()) = self.next_empty_statement_opt()? { 1203 continue; 1204 } 1205 1206 return Err(ParserError::IncorrectInput.into()); 1207 } 1208 self.tokenizer.next_symbol_expect_eq('}', "service")?; 1209 Ok(Some(WithLoc { 1210 loc, 1211 t: Service { 1212 name, 1213 methods, 1214 options, 1215 }, 1216 })) 1217 } else { 1218 Ok(None) 1219 } 1220 } 1221 1222 // Proto file 1223 1224 // proto = syntax { import | package | option | topLevelDef | emptyStatement } 1225 // topLevelDef = message | enum | extend | service next_proto(&mut self) -> anyhow::Result<FileDescriptor>1226 pub fn next_proto(&mut self) -> anyhow::Result<FileDescriptor> { 1227 let syntax = self.next_syntax()?.unwrap_or(Syntax::Proto2); 1228 self.syntax = syntax; 1229 1230 let mut imports = Vec::new(); 1231 let mut package = ProtobufAbsPath::root(); 1232 let mut messages = Vec::new(); 1233 let mut enums = Vec::new(); 1234 let mut extensions = Vec::new(); 1235 let mut options = Vec::new(); 1236 let mut services = Vec::new(); 1237 1238 while !self.tokenizer.syntax_eof()? { 1239 if let Some(import) = self.next_import_opt()? { 1240 imports.push(import); 1241 continue; 1242 } 1243 1244 if let Some(next_package) = self.next_package_opt()? { 1245 package = next_package; 1246 continue; 1247 } 1248 1249 if let Some(option) = self.next_option_opt()? { 1250 options.push(option); 1251 continue; 1252 } 1253 1254 if let Some(message) = self.next_message_opt()? { 1255 messages.push(message); 1256 continue; 1257 } 1258 1259 if let Some(enumeration) = self.next_enum_opt()? { 1260 enums.push(enumeration); 1261 continue; 1262 } 1263 1264 if let Some(more_extensions) = self.next_extend_opt()? { 1265 extensions.extend(more_extensions); 1266 continue; 1267 } 1268 1269 if let Some(service) = self.next_service_opt()? { 1270 services.push(service); 1271 continue; 1272 } 1273 1274 if self.tokenizer.next_symbol_if_eq(';')? { 1275 continue; 1276 } 1277 1278 return Err(ParserError::IncorrectInput.into()); 1279 } 1280 1281 Ok(FileDescriptor { 1282 imports, 1283 package, 1284 syntax, 1285 messages, 1286 enums, 1287 extensions, 1288 services, 1289 options, 1290 }) 1291 } 1292 } 1293 1294 #[cfg(test)] 1295 mod test { 1296 use super::*; 1297 parse<P, R>(input: &str, parse_what: P) -> R where P: FnOnce(&mut Parser) -> anyhow::Result<R>,1298 fn parse<P, R>(input: &str, parse_what: P) -> R 1299 where 1300 P: FnOnce(&mut Parser) -> anyhow::Result<R>, 1301 { 1302 let mut parser = Parser::new(input); 1303 let r = 1304 parse_what(&mut parser).expect(&format!("parse failed at {}", parser.tokenizer.loc())); 1305 let eof = parser 1306 .tokenizer 1307 .syntax_eof() 1308 .expect(&format!("check eof failed at {}", parser.tokenizer.loc())); 1309 assert!(eof, "{}", parser.tokenizer.loc()); 1310 r 1311 } 1312 parse_opt<P, R>(input: &str, parse_what: P) -> R where P: FnOnce(&mut Parser) -> anyhow::Result<Option<R>>,1313 fn parse_opt<P, R>(input: &str, parse_what: P) -> R 1314 where 1315 P: FnOnce(&mut Parser) -> anyhow::Result<Option<R>>, 1316 { 1317 let mut parser = Parser::new(input); 1318 let o = 1319 parse_what(&mut parser).expect(&format!("parse failed at {}", parser.tokenizer.loc())); 1320 let r = o.expect(&format!( 1321 "parser returned none at {}", 1322 parser.tokenizer.loc() 1323 )); 1324 assert!(parser.tokenizer.syntax_eof().unwrap()); 1325 r 1326 } 1327 1328 #[test] test_syntax()1329 fn test_syntax() { 1330 let msg = r#" syntax = "proto3"; "#; 1331 let mess = parse_opt(msg, |p| p.next_syntax()); 1332 assert_eq!(Syntax::Proto3, mess); 1333 } 1334 1335 #[test] test_field_default_value_int()1336 fn test_field_default_value_int() { 1337 let msg = r#" optional int64 f = 4 [default = 12]; "#; 1338 let mess = parse(msg, |p| p.next_field(MessageBodyParseMode::MessageProto2)); 1339 assert_eq!("f", mess.t.name); 1340 assert_eq!( 1341 ProtobufOptionName::simple("default"), 1342 mess.t.options[0].name 1343 ); 1344 assert_eq!("12", mess.t.options[0].value.format()); 1345 } 1346 1347 #[test] test_field_default_value_float()1348 fn test_field_default_value_float() { 1349 let msg = r#" optional float f = 2 [default = 10.0]; "#; 1350 let mess = parse(msg, |p| p.next_field(MessageBodyParseMode::MessageProto2)); 1351 assert_eq!("f", mess.t.name); 1352 assert_eq!( 1353 ProtobufOptionName::simple("default"), 1354 mess.t.options[0].name 1355 ); 1356 assert_eq!("10", mess.t.options[0].value.format()); 1357 } 1358 1359 #[test] test_message()1360 fn test_message() { 1361 let msg = r#"message ReferenceData 1362 { 1363 repeated ScenarioInfo scenarioSet = 1; 1364 repeated CalculatedObjectInfo calculatedObjectSet = 2; 1365 repeated RiskFactorList riskFactorListSet = 3; 1366 repeated RiskMaturityInfo riskMaturitySet = 4; 1367 repeated IndicatorInfo indicatorSet = 5; 1368 repeated RiskStrikeInfo riskStrikeSet = 6; 1369 repeated FreeProjectionList freeProjectionListSet = 7; 1370 repeated ValidationProperty ValidationSet = 8; 1371 repeated CalcProperties calcPropertiesSet = 9; 1372 repeated MaturityInfo maturitySet = 10; 1373 }"#; 1374 1375 let mess = parse_opt(msg, |p| p.next_message_opt()); 1376 assert_eq!(10, mess.t.fields.len()); 1377 } 1378 1379 #[test] test_enum()1380 fn test_enum() { 1381 let msg = r#"enum PairingStatus { 1382 DEALPAIRED = 0; 1383 INVENTORYORPHAN = 1; 1384 CALCULATEDORPHAN = 2; 1385 CANCELED = 3; 1386 }"#; 1387 1388 let enumeration = parse_opt(msg, |p| p.next_enum_opt()); 1389 assert_eq!(4, enumeration.values.len()); 1390 } 1391 1392 #[test] test_ignore()1393 fn test_ignore() { 1394 let msg = r#"option optimize_for = SPEED;"#; 1395 1396 parse_opt(msg, |p| p.next_option_opt()); 1397 } 1398 1399 #[test] test_import()1400 fn test_import() { 1401 let msg = r#"syntax = "proto3"; 1402 1403 import "test_import_nested_imported_pb.proto"; 1404 1405 message ContainsImportedNested { 1406 ContainerForNested.NestedMessage m = 1; 1407 ContainerForNested.NestedEnum e = 2; 1408 } 1409 "#; 1410 let desc = parse(msg, |p| p.next_proto()); 1411 1412 assert_eq!( 1413 vec!["test_import_nested_imported_pb.proto"], 1414 desc.imports 1415 .into_iter() 1416 .map(|i| i.path.to_str().to_owned()) 1417 .collect::<Vec<_>>() 1418 ); 1419 } 1420 1421 #[test] test_nested_message()1422 fn test_nested_message() { 1423 let msg = r#"message A 1424 { 1425 message B { 1426 repeated int32 a = 1; 1427 optional string b = 2; 1428 } 1429 optional string b = 1; 1430 }"#; 1431 1432 let mess = parse_opt(msg, |p| p.next_message_opt()); 1433 assert_eq!(1, mess.t.messages.len()); 1434 } 1435 1436 #[test] test_map()1437 fn test_map() { 1438 let msg = r#"message A 1439 { 1440 optional map<string, int32> b = 1; 1441 }"#; 1442 1443 let mess = parse_opt(msg, |p| p.next_message_opt()); 1444 assert_eq!(1, mess.t.fields.len()); 1445 match mess.t.regular_fields_for_test()[0].typ { 1446 FieldType::Map(ref f) => match &**f { 1447 &(FieldType::String, FieldType::Int32) => (), 1448 ref f => panic!("Expecting Map<String, Int32> found {:?}", f), 1449 }, 1450 ref f => panic!("Expecting map, got {:?}", f), 1451 } 1452 } 1453 1454 #[test] test_oneof()1455 fn test_oneof() { 1456 let msg = r#"message A 1457 { 1458 optional int32 a1 = 1; 1459 oneof a_oneof { 1460 string a2 = 2; 1461 int32 a3 = 3; 1462 bytes a4 = 4; 1463 } 1464 repeated bool a5 = 5; 1465 }"#; 1466 1467 let mess = parse_opt(msg, |p| p.next_message_opt()); 1468 assert_eq!(1, mess.t.oneofs().len()); 1469 assert_eq!(3, mess.t.oneofs()[0].fields.len()); 1470 } 1471 1472 #[test] test_reserved()1473 fn test_reserved() { 1474 let msg = r#"message Sample { 1475 reserved 4, 15, 17 to 20, 30; 1476 reserved "foo", "bar"; 1477 optional uint64 age =1; 1478 required bytes name =2; 1479 }"#; 1480 1481 let mess = parse_opt(msg, |p| p.next_message_opt()); 1482 assert_eq!( 1483 vec![ 1484 FieldNumberRange { from: 4, to: 4 }, 1485 FieldNumberRange { from: 15, to: 15 }, 1486 FieldNumberRange { from: 17, to: 20 }, 1487 FieldNumberRange { from: 30, to: 30 } 1488 ], 1489 mess.t.reserved_nums 1490 ); 1491 assert_eq!( 1492 vec!["foo".to_string(), "bar".to_string()], 1493 mess.t.reserved_names 1494 ); 1495 assert_eq!(2, mess.t.fields.len()); 1496 } 1497 1498 #[test] test_default_value_int()1499 fn test_default_value_int() { 1500 let msg = r#"message Sample { 1501 optional int32 x = 1 [default = 17]; 1502 }"#; 1503 1504 let mess = parse_opt(msg, |p| p.next_message_opt()); 1505 assert_eq!( 1506 ProtobufOptionName::simple("default"), 1507 mess.t.regular_fields_for_test()[0].options[0].name 1508 ); 1509 assert_eq!( 1510 "17", 1511 mess.t.regular_fields_for_test()[0].options[0] 1512 .value 1513 .format() 1514 ); 1515 } 1516 1517 #[test] test_default_value_string()1518 fn test_default_value_string() { 1519 let msg = r#"message Sample { 1520 optional string x = 1 [default = "ab\nc d\"g\'h\0\"z"]; 1521 }"#; 1522 1523 let mess = parse_opt(msg, |p| p.next_message_opt()); 1524 assert_eq!( 1525 r#""ab\nc d\"g\'h\0\"z""#, 1526 mess.t.regular_fields_for_test()[0].options[0] 1527 .value 1528 .format() 1529 ); 1530 } 1531 1532 #[test] test_default_value_bytes()1533 fn test_default_value_bytes() { 1534 let msg = r#"message Sample { 1535 optional bytes x = 1 [default = "ab\nc d\xfeE\"g\'h\0\"z"]; 1536 }"#; 1537 1538 let mess = parse_opt(msg, |p| p.next_message_opt()); 1539 assert_eq!( 1540 r#""ab\nc d\xfeE\"g\'h\0\"z""#, 1541 mess.t.regular_fields_for_test()[0].options[0] 1542 .value 1543 .format() 1544 ); 1545 } 1546 1547 #[test] test_group()1548 fn test_group() { 1549 let msg = r#"message MessageWithGroup { 1550 optional string aaa = 1; 1551 1552 repeated group Identifier = 18 { 1553 optional int32 iii = 19; 1554 optional string sss = 20; 1555 } 1556 1557 required int bbb = 3; 1558 }"#; 1559 let mess = parse_opt(msg, |p| p.next_message_opt()); 1560 1561 assert_eq!("identifier", mess.t.regular_fields_for_test()[1].name); 1562 if let FieldType::Group(Group { fields, .. }) = &mess.t.regular_fields_for_test()[1].typ { 1563 assert_eq!(2, fields.len()); 1564 } else { 1565 panic!("expecting group"); 1566 } 1567 1568 assert_eq!("bbb", mess.t.regular_fields_for_test()[2].name); 1569 } 1570 1571 #[test] test_incorrect_file_descriptor()1572 fn test_incorrect_file_descriptor() { 1573 let msg = r#" 1574 message Foo {} 1575 1576 dfgdg 1577 "#; 1578 1579 let err = FileDescriptor::parse(msg).err().expect("err"); 1580 assert_eq!(4, err.line); 1581 } 1582 } 1583