1 //! This crate contains parser combinators, roughly based on the Haskell libraries 2 //! [parsec](http://hackage.haskell.org/package/parsec) and 3 //! [attoparsec](https://hackage.haskell.org/package/attoparsec). 4 //! 5 //! A parser in this library can be described as a function which takes some input and if it 6 //! is successful, returns a value together with the remaining input. 7 //! A parser combinator is a function which takes one or more parsers and returns a new parser. 8 //! For instance the [`many`] parser can be used to convert a parser for single digits into one that 9 //! parses multiple digits. By modeling parsers in this way it becomes easy to compose complex 10 //! parsers in an almost declarative way. 11 //! 12 //! # Overview 13 //! 14 //! `combine` limits itself to creating [LL(1) parsers](https://en.wikipedia.org/wiki/LL_parser) 15 //! (it is possible to opt-in to LL(k) parsing using the [`attempt`] combinator) which makes the 16 //! parsers easy to reason about in both function and performance while sacrificing 17 //! some generality. In addition to you being able to reason better about the parsers you 18 //! construct `combine` the library also takes the knowledge of being an LL parser and uses it to 19 //! automatically construct good error messages. 20 //! 21 //! ```rust 22 //! extern crate combine; 23 //! use combine::{Parser, EasyParser}; 24 //! use combine::stream::position; 25 //! use combine::parser::char::{digit, letter}; 26 //! const MSG: &'static str = r#"Parse error at line: 1, column: 1 27 //! Unexpected `|` 28 //! Expected digit or letter 29 //! "#; 30 //! 31 //! fn main() { 32 //! // Wrapping a `&str` with `State` provides automatic line and column tracking. If `State` 33 //! // was not used the positions would instead only be pointers into the `&str` 34 //! if let Err(err) = digit().or(letter()).easy_parse(position::Stream::new("|")) { 35 //! assert_eq!(MSG, format!("{}", err)); 36 //! } 37 //! } 38 //! ``` 39 //! 40 //! This library is currently split into a few core modules: 41 //! 42 //! * [`parser`][mod parser] is where you will find all the parsers that combine provides. It contains the core 43 //! [`Parser`] trait as well as several submodules such as `sequence` or `choice` which each 44 //! contain several parsers aimed at a specific niche. 45 //! 46 //! * [`stream`] contains the second most important trait next to [`Parser`]. Streams represent the 47 //! data source which is being parsed such as `&[u8]`, `&str` or iterators. 48 //! 49 //! * [`easy`] contains combine's default "easy" error and stream handling. If you use the 50 //! `easy_parse` method to start your parsing these are the types that are used. 51 //! 52 //! * [`error`] contains the types and traits that make up combine's error handling. Unless you 53 //! need to customize the errors your parsers return you should not need to use this module much. 54 //! 55 //! 56 //! # Examples 57 //! 58 //! ``` 59 //! extern crate combine; 60 //! use combine::parser::char::{spaces, digit, char}; 61 //! use combine::{many1, sep_by, Parser, EasyParser}; 62 //! use combine::stream::easy; 63 //! 64 //! fn main() { 65 //! //Parse spaces first and use the with method to only keep the result of the next parser 66 //! let integer = spaces() 67 //! //parse a string of digits into an i32 68 //! .with(many1(digit()).map(|string: String| string.parse::<i32>().unwrap())); 69 //! 70 //! //Parse integers separated by commas, skipping whitespace 71 //! let mut integer_list = sep_by(integer, spaces().skip(char(','))); 72 //! 73 //! //Call parse with the input to execute the parser 74 //! let input = "1234, 45,78"; 75 //! let result: Result<(Vec<i32>, &str), easy::ParseError<&str>> = 76 //! integer_list.easy_parse(input); 77 //! match result { 78 //! Ok((value, _remaining_input)) => println!("{:?}", value), 79 //! Err(err) => println!("{}", err) 80 //! } 81 //! } 82 //! ``` 83 //! 84 //! If we need a parser that is mutually recursive or if we want to export a reusable parser the 85 //! [`parser!`] macro can be used. In effect it makes it possible to return a parser without naming 86 //! the type of the parser (which can be very large due to combine's trait based approach). While 87 //! it is possible to do avoid naming the type without the macro those solutions require either allocation 88 //! (`Box<dyn Parser< Input, Output = O, PartialState = P>>`) or nightly rust via `impl Trait`. The 89 //! macro thus threads the needle and makes it possible to have non-allocating, anonymous parsers 90 //! on stable rust. 91 //! 92 //! ``` 93 //! #[macro_use] 94 //! extern crate combine; 95 //! use combine::parser::char::{char, letter, spaces}; 96 //! use combine::{between, choice, many1, parser, sep_by, Parser, EasyParser}; 97 //! use combine::error::{ParseError, StdParseResult}; 98 //! use combine::stream::{Stream, Positioned}; 99 //! use combine::stream::position; 100 //! 101 //! #[derive(Debug, PartialEq)] 102 //! pub enum Expr { 103 //! Id(String), 104 //! Array(Vec<Expr>), 105 //! Pair(Box<Expr>, Box<Expr>) 106 //! } 107 //! 108 //! // `impl Parser` can be used to create reusable parsers with zero overhead 109 //! fn expr_<Input>() -> impl Parser< Input, Output = Expr> 110 //! where Input: Stream<Token = char>, 111 //! // Necessary due to rust-lang/rust#24159 112 //! Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, 113 //! { 114 //! let word = many1(letter()); 115 //! 116 //! // A parser which skips past whitespace. 117 //! // Since we aren't interested in knowing that our expression parser 118 //! // could have accepted additional whitespace between the tokens we also silence the error. 119 //! let skip_spaces = || spaces().silent(); 120 //! 121 //! //Creates a parser which parses a char and skips any trailing whitespace 122 //! let lex_char = |c| char(c).skip(skip_spaces()); 123 //! 124 //! let comma_list = sep_by(expr(), lex_char(',')); 125 //! let array = between(lex_char('['), lex_char(']'), comma_list); 126 //! 127 //! //We can use tuples to run several parsers in sequence 128 //! //The resulting type is a tuple containing each parsers output 129 //! let pair = (lex_char('('), 130 //! expr(), 131 //! lex_char(','), 132 //! expr(), 133 //! lex_char(')')) 134 //! .map(|t| Expr::Pair(Box::new(t.1), Box::new(t.3))); 135 //! 136 //! choice(( 137 //! word.map(Expr::Id), 138 //! array.map(Expr::Array), 139 //! pair, 140 //! )) 141 //! .skip(skip_spaces()) 142 //! } 143 //! 144 //! // As this expression parser needs to be able to call itself recursively `impl Parser` can't 145 //! // be used on its own as that would cause an infinitely large type. We can avoid this by using 146 //! // the `parser!` macro which erases the inner type and the size of that type entirely which 147 //! // lets it be used recursively. 148 //! // 149 //! // (This macro does not use `impl Trait` which means it can be used in rust < 1.26 as well to 150 //! // emulate `impl Parser`) 151 //! parser!{ 152 //! fn expr[Input]()(Input) -> Expr 153 //! where [Input: Stream<Token = char>] 154 //! { 155 //! expr_() 156 //! } 157 //! } 158 //! 159 //! fn main() { 160 //! let result = expr() 161 //! .parse("[[], (hello, world), [rust]]"); 162 //! let expr = Expr::Array(vec![ 163 //! Expr::Array(Vec::new()) 164 //! , Expr::Pair(Box::new(Expr::Id("hello".to_string())), 165 //! Box::new(Expr::Id("world".to_string()))) 166 //! , Expr::Array(vec![Expr::Id("rust".to_string())]) 167 //! ]); 168 //! assert_eq!(result, Ok((expr, ""))); 169 //! } 170 //! ``` 171 //! 172 //! [`combinator`]: combinator/index.html 173 //! [mod parser]: parser/index.html 174 //! [`easy`]: easy/index.html 175 //! [`error`]: error/index.html 176 //! [`char`]: parser/char/index.html 177 //! [`byte`]: parser/byte/index.html 178 //! [`range`]: parser/range/index.html 179 //! [`many`]: parser/repeat/fn.many.html 180 //! [`attempt`]: parser/combinator/fn.attempt.html 181 //! [`satisfy`]: parser/token/fn.satisfy.html 182 //! [`or`]: parser/trait.Parser.html#method.or 183 //! [`Stream`]: stream/trait.Stream.html 184 //! [`RangeStream`]: stream/trait.RangeStream.html 185 //! [`Parser`]: parser/trait.Parser.html 186 //! [fn parser]: parser/function/fn.parser.html 187 //! [`parser!`]: macro.parser.html 188 // inline is only used on trivial functions returning parsers 189 #![allow( 190 clippy::inline_always, 191 clippy::type_complexity, 192 clippy::too_many_arguments, 193 clippy::match_like_matches_macro 194 )] 195 #![cfg_attr(not(feature = "std"), no_std)] 196 #![cfg_attr(docsrs, feature(doc_cfg))] 197 198 #[cfg(feature = "alloc")] 199 extern crate alloc; 200 201 #[doc(inline)] 202 pub use crate::error::{ParseError, ParseResult, StdParseResult}; 203 204 #[cfg(feature = "std")] 205 #[doc(inline)] 206 pub use crate::parser::EasyParser; 207 208 #[doc(inline)] 209 pub use crate::parser::Parser; 210 211 #[doc(inline)] 212 pub use crate::stream::{Positioned, RangeStream, RangeStreamOnce, Stream, StreamOnce}; 213 214 #[doc(inline)] 215 pub use crate::parser::{ 216 choice::optional, 217 combinator::{attempt, look_ahead, not_followed_by}, 218 error::{unexpected, unexpected_any}, 219 function::parser, 220 repeat::{ 221 chainl1, chainr1, count, count_min_max, many, many1, sep_by, sep_by1, sep_end_by, 222 sep_end_by1, skip_count, skip_count_min_max, skip_many, skip_many1, 223 }, 224 sequence::between, 225 token::{ 226 any, eof, none_of, one_of, position, produce, satisfy, satisfy_map, token, tokens, value, 227 }, 228 }; 229 230 #[doc(inline)] 231 pub use crate::parser::choice::choice; 232 233 #[doc(inline)] 234 pub use crate::parser::combinator::from_str; 235 236 #[doc(inline)] 237 pub use crate::parser::token::tokens_cmp; 238 239 /// Declares a named parser which can easily be reused. 240 /// 241 /// The expression which creates the parser should have no side effects as it may be called 242 /// multiple times even during a single parse attempt. 243 /// 244 /// NOTE: If you are using rust nightly you can use `impl Trait` instead. See the [json parser][] for 245 /// an example. 246 /// 247 /// [json parser]:https://github.com/Marwes/combine/blob/master/benches/json.rs 248 /// 249 /// ``` 250 /// #[macro_use] 251 /// extern crate combine; 252 /// use combine::parser::char::digit; 253 /// use combine::{any, choice, from_str, many1, Parser, EasyParser, Stream}; 254 /// use combine::error::ParseError; 255 /// 256 /// parser!{ 257 /// /// `[Input]` represents a normal type parameters and lifetime declaration for the function 258 /// /// It gets expanded to `<Input>` 259 /// fn integer[Input]()(Input) -> i32 260 /// where [ 261 /// Input: Stream<Token = char>, 262 /// Input::Error: ParseError<char, Input::Range, Input::Position>, 263 /// <Input::Error as ParseError<Input::Token, Input::Range, Input::Position>>::StreamError: 264 /// From<::std::num::ParseIntError>, 265 /// ] 266 /// { 267 /// // The body must be a block body ( `{ <block body> }`) which ends with an expression 268 /// // which evaluates to a parser 269 /// from_str(many1::<String, _, _>(digit())) 270 /// } 271 /// } 272 /// 273 /// #[derive(Debug, PartialEq)] 274 /// pub enum IntOrString { 275 /// Int(i32), 276 /// String(String), 277 /// } 278 /// // prefix with `pub` to declare a public parser 279 /// parser!{ 280 /// // Documentation comments works as well 281 /// 282 /// /// Parses an integer or a string (any characters) 283 /// pub fn integer_or_string[Input]()(Input) -> IntOrString 284 /// where [ 285 /// Input: Stream<Token = char>, 286 /// Input::Error: ParseError<char, Input::Range, Input::Position>, 287 /// <Input::Error as ParseError<Input::Token, Input::Range, Input::Position>>::StreamError: 288 /// From<::std::num::ParseIntError>, 289 /// ] 290 /// { 291 /// choice!( 292 /// integer().map(IntOrString::Int), 293 /// many1(any()).map(IntOrString::String) 294 /// ) 295 /// } 296 /// } 297 /// 298 /// parser!{ 299 /// // Give the created type a unique name 300 /// #[derive(Clone)] 301 /// pub struct Twice; 302 /// pub fn twice[Input, F, P](f: F)(Input) -> (P::Output, P::Output) 303 /// where [P: Parser<Input>, 304 /// F: FnMut() -> P] 305 /// { 306 /// (f(), f()) 307 /// } 308 /// } 309 /// 310 /// fn main() { 311 /// assert_eq!(integer().easy_parse("123"), Ok((123, ""))); 312 /// assert!(integer().easy_parse("!").is_err()); 313 /// 314 /// assert_eq!( 315 /// integer_or_string().easy_parse("123"), 316 /// Ok((IntOrString::Int(123), "")) 317 /// ); 318 /// assert_eq!( 319 /// integer_or_string().easy_parse("abc"), 320 /// Ok((IntOrString::String("abc".to_string()), "")) 321 /// ); 322 /// assert_eq!(twice(|| digit()).parse("123"), Ok((('1', '2'), "3"))); 323 /// } 324 /// ``` 325 #[macro_export] 326 macro_rules! parser { 327 ( 328 type PartialState = $partial_state: ty; 329 $(#[$attr:meta])* 330 $fn_vis: vis fn $name: ident [$($type_params: tt)*]( $($arg: ident : $arg_type: ty),*) 331 ($input_type: ty) -> $output_type: ty 332 where [$($where_clause: tt)*] 333 $parser: block 334 ) => { 335 $crate::combine_parser_impl!{ 336 #[allow(non_camel_case_types)] 337 #[doc(hidden)] 338 $fn_vis struct $name; 339 (type PartialState = ($partial_state);) 340 $(#[$attr])* 341 $fn_vis fn $name [$($type_params)*]($($arg : $arg_type),*)($input_type) -> $output_type 342 where [$($where_clause)*] 343 $parser 344 } 345 }; 346 ( 347 $(#[$derive:meta])* 348 $struct_vis: vis struct $type_name: ident; 349 type PartialState = $partial_state: ty; 350 $(#[$attr:meta])* 351 $fn_vis: vis fn $name: ident [$($type_params: tt)*]( $($arg: ident : $arg_type: ty),* ) 352 ($input_type: ty) -> $output_type: ty 353 where [$($where_clause: tt)*] 354 $parser: block 355 ) => { 356 $crate::combine_parser_impl!{ 357 $(#[$derive])* 358 $struct_vis struct $type_name; 359 (type PartialState = ($partial_state);) 360 $(#[$attr])* 361 $fn_vis fn $name [$($type_params)*]($($arg : $arg_type),*)($input_type) -> $output_type 362 where [$($where_clause)*] 363 $parser 364 } 365 }; 366 ( 367 $(#[$attr:meta])* 368 $fn_vis: vis fn $name: ident [$($type_params: tt)*]( $($arg: ident : $arg_type: ty),*) 369 ($input_type: ty) -> $output_type: ty 370 where [$($where_clause: tt)*] 371 $parser: block 372 ) => { 373 $crate::combine_parser_impl!{ 374 #[allow(non_camel_case_types)] 375 #[doc(hidden)] 376 $fn_vis struct $name; 377 (type PartialState = (());) 378 $(#[$attr])* 379 $fn_vis fn $name [$($type_params)*]($($arg : $arg_type),*)($input_type) -> $output_type 380 where [$($where_clause)*] 381 $parser 382 } 383 }; 384 ( 385 $(#[$derive:meta])* 386 $struct_vis: vis struct $type_name: ident; 387 $(#[$attr:meta])* 388 $fn_vis: vis fn $name: ident [$($type_params: tt)*]( $($arg: ident : $arg_type: ty),* ) 389 ($input_type: ty) -> $output_type: ty 390 where [$($where_clause: tt)*] 391 $parser: block 392 ) => { 393 $crate::combine_parser_impl!{ 394 $(#[$derive])* 395 $struct_vis struct $type_name; 396 (type PartialState = (());) 397 $(#[$attr])* 398 $fn_vis fn $name [$($type_params)*]($($arg : $arg_type),*)($input_type) -> $output_type 399 where [$($where_clause)*] 400 $parser 401 } 402 }; 403 } 404 405 #[doc(hidden)] 406 #[macro_export] 407 macro_rules! combine_parse_partial { 408 ((()) $mode:ident $input:ident $state:ident $parser:block) => {{ 409 let _ = $state; 410 let mut state = Default::default(); 411 let state = &mut state; 412 $parser.parse_mode($mode, $input, state) 413 }}; 414 (($ignored:ty) $mode:ident $input:ident $state:ident $parser:block) => { 415 $parser.parse_mode($mode, $input, $state) 416 }; 417 } 418 419 #[doc(hidden)] 420 #[macro_export] 421 macro_rules! combine_parser_impl { 422 ( 423 $(#[$derive:meta])* 424 $struct_vis: vis struct $type_name: ident; 425 (type PartialState = ($($partial_state: tt)*);) 426 $(#[$attr:meta])* 427 $fn_vis: vis fn $name: ident [$($type_params: tt)*]( $($arg: ident : $arg_type: ty),*) 428 ($input_type: ty) -> $output_type: ty 429 where [$($where_clause: tt)*] 430 $parser: block 431 ) => { 432 433 $(#[$derive])* 434 $struct_vis struct $type_name<$($type_params)*> 435 where <$input_type as $crate::stream::StreamOnce>::Error: 436 $crate::error::ParseError< 437 <$input_type as $crate::stream::StreamOnce>::Token, 438 <$input_type as $crate::stream::StreamOnce>::Range, 439 <$input_type as $crate::stream::StreamOnce>::Position 440 >, 441 $input_type: $crate::stream::Stream, 442 $($where_clause)* 443 { 444 $(pub $arg : $arg_type,)* 445 __marker: $crate::lib::marker::PhantomData<fn ($input_type) -> $output_type> 446 } 447 448 // We want this to work on older compilers, at least for a while 449 #[allow(non_shorthand_field_patterns)] 450 impl<$($type_params)*> $crate::Parser<$input_type> for $type_name<$($type_params)*> 451 where <$input_type as $crate::stream::StreamOnce>::Error: 452 $crate::error::ParseError< 453 <$input_type as $crate::stream::StreamOnce>::Token, 454 <$input_type as $crate::stream::StreamOnce>::Range, 455 <$input_type as $crate::stream::StreamOnce>::Position 456 >, 457 $input_type: $crate::stream::Stream, 458 $($where_clause)* 459 { 460 461 type Output = $output_type; 462 type PartialState = $($partial_state)*; 463 464 $crate::parse_mode!($input_type); 465 #[inline] 466 fn parse_mode_impl<M>( 467 &mut self, 468 mode: M, 469 input: &mut $input_type, 470 state: &mut Self::PartialState, 471 ) -> $crate::error::ParseResult<$output_type, <$input_type as $crate::stream::StreamOnce>::Error> 472 where M: $crate::parser::ParseMode 473 { 474 let $type_name { $( $arg: ref mut $arg,)* .. } = *self; 475 $crate::combine_parse_partial!(($($partial_state)*) mode input state $parser) 476 } 477 478 #[inline] 479 fn add_error( 480 &mut self, 481 errors: &mut $crate::error::Tracked< 482 <$input_type as $crate::stream::StreamOnce>::Error 483 >) 484 { 485 let $type_name { $( $arg : ref mut $arg,)* .. } = *self; 486 let mut parser = $parser; 487 { 488 let _: &mut dyn $crate::Parser< $input_type, Output = $output_type, PartialState = _> = &mut parser; 489 } 490 parser.add_error(errors) 491 } 492 493 fn add_committed_expected_error( 494 &mut self, 495 errors: &mut $crate::error::Tracked< 496 <$input_type as $crate::stream::StreamOnce>::Error 497 >) 498 { 499 let $type_name { $( $arg : ref mut $arg,)* .. } = *self; 500 let mut parser = $parser; 501 { 502 let _: &mut dyn $crate::Parser< $input_type, Output = $output_type, PartialState = _> = &mut parser; 503 } 504 parser.add_committed_expected_error(errors) 505 } 506 } 507 508 $(#[$attr])* 509 #[inline] 510 $fn_vis fn $name< $($type_params)* >( 511 $($arg : $arg_type),* 512 ) -> $type_name<$($type_params)*> 513 where <$input_type as $crate::stream::StreamOnce>::Error: 514 $crate::error::ParseError< 515 <$input_type as $crate::stream::StreamOnce>::Token, 516 <$input_type as $crate::stream::StreamOnce>::Range, 517 <$input_type as $crate::stream::StreamOnce>::Position 518 >, 519 $input_type: $crate::stream::Stream, 520 $($where_clause)* 521 { 522 $type_name { 523 $($arg,)* 524 __marker: $crate::lib::marker::PhantomData 525 } 526 } 527 }; 528 } 529 530 /// Internal API. May break without a semver bump 531 macro_rules! forward_parser { 532 ($input: ty, $method: ident $( $methods: ident)*, $($field: tt)*) => { 533 forward_parser!($input, $method $($field)+); 534 forward_parser!($input, $($methods)*, $($field)+); 535 }; 536 ($input: ty, parse_mode $($field: tt)+) => { 537 #[inline] 538 fn parse_mode_impl<M>( 539 &mut self, 540 mode: M, 541 input: &mut $input, 542 state: &mut Self::PartialState, 543 ) -> ParseResult<Self::Output, <$input as $crate::StreamOnce>::Error> 544 where 545 M: ParseMode, 546 { 547 self.$($field)+.parse_mode(mode, input, state).map(|(a, _)| a) 548 } 549 }; 550 ($input: ty, parse_lazy $($field: tt)+) => { 551 fn parse_lazy( 552 &mut self, 553 input: &mut $input, 554 ) -> ParseResult<Self::Output, <$input as $crate::StreamOnce>::Error> { 555 self.$($field)+.parse_lazy(input) 556 } 557 }; 558 ($input: ty, parse_first $($field: tt)+) => { 559 fn parse_first( 560 &mut self, 561 input: &mut $input, 562 state: &mut Self::PartialState, 563 ) -> ParseResult<Self::Output, <$input as $crate::StreamOnce>::Error> { 564 self.$($field)+.parse_first(input, state) 565 } 566 }; 567 ($input: ty, parse_partial $($field: tt)+) => { 568 fn parse_partial( 569 &mut self, 570 input: &mut $input, 571 state: &mut Self::PartialState, 572 ) -> ParseResult<Self::Output, <$input as $crate::StreamOnce>::Error> { 573 self.$($field)+.parse_partial(input, state) 574 } 575 }; 576 ($input: ty, add_error $($field: tt)+) => { 577 578 fn add_error(&mut self, error: &mut $crate::error::Tracked<<$input as $crate::StreamOnce>::Error>) { 579 self.$($field)+.add_error(error) 580 } 581 }; 582 ($input: ty, add_committed_expected_error $($field: tt)+) => { 583 fn add_committed_expected_error(&mut self, error: &mut $crate::error::Tracked<<$input as $crate::StreamOnce>::Error>) { 584 self.$($field)+.add_committed_expected_error(error) 585 } 586 }; 587 ($input: ty, parser_count $($field: tt)+) => { 588 fn parser_count(&self) -> $crate::ErrorOffset { 589 self.$($field)+.parser_count() 590 } 591 }; 592 ($input: ty, $field: tt) => { 593 forward_parser!($input, parse_lazy parse_first parse_partial add_error add_committed_expected_error parser_count, $field); 594 }; 595 ($input: ty, $($field: tt)+) => { 596 }; 597 } 598 599 // Facade over the core types we need 600 // Public but hidden to be accessible in macros 601 #[doc(hidden)] 602 pub mod lib { 603 #[cfg(not(feature = "std"))] 604 pub use core::*; 605 606 #[cfg(feature = "std")] 607 pub use std::*; 608 } 609 610 #[cfg(feature = "std")] 611 #[doc(inline)] 612 pub use crate::stream::easy; 613 614 /// Error types and traits which define what kind of errors combine parsers may emit 615 #[macro_use] 616 pub mod error; 617 #[macro_use] 618 pub mod stream; 619 #[macro_use] 620 pub mod parser; 621 622 #[cfg(feature = "futures-core-03")] 623 pub mod future_ext; 624 625 #[doc(hidden)] 626 #[derive(Clone, PartialOrd, PartialEq, Debug, Copy)] 627 pub struct ErrorOffset(u8); 628 629 #[cfg(test)] 630 mod tests { 631 632 use crate::parser::char::{char, string}; 633 634 use super::*; 635 636 #[test] chainl1_error_consume()637 fn chainl1_error_consume() { 638 fn first<T, U>(t: T, _: U) -> T { 639 t 640 } 641 let mut p = chainl1(string("abc"), char(',').map(|_| first)); 642 assert!(p.parse("abc,ab").is_err()); 643 } 644 645 #[test] choice_strings()646 fn choice_strings() { 647 let mut fruits = [ 648 attempt(string("Apple")), 649 attempt(string("Banana")), 650 attempt(string("Cherry")), 651 attempt(string("Date")), 652 attempt(string("Fig")), 653 attempt(string("Grape")), 654 ]; 655 let mut parser = choice(&mut fruits); 656 assert_eq!(parser.parse("Apple"), Ok(("Apple", ""))); 657 assert_eq!(parser.parse("Banana"), Ok(("Banana", ""))); 658 assert_eq!(parser.parse("Cherry"), Ok(("Cherry", ""))); 659 assert_eq!(parser.parse("DateABC"), Ok(("Date", "ABC"))); 660 assert_eq!(parser.parse("Fig123"), Ok(("Fig", "123"))); 661 assert_eq!(parser.parse("GrapeApple"), Ok(("Grape", "Apple"))); 662 } 663 } 664 665 #[cfg(all(feature = "std", test))] 666 mod std_tests { 667 668 use crate::{ 669 error::StdParseResult, 670 parser::char::{alpha_num, char, digit, letter, spaces, string}, 671 stream::{ 672 easy, 673 position::{self, SourcePosition}, 674 }, 675 }; 676 677 use super::{easy::Error, error::Commit, stream::IteratorStream, *}; 678 679 #[test] optional_error_consume()680 fn optional_error_consume() { 681 let mut p = optional(string("abc")); 682 let err = p.easy_parse(position::Stream::new("ab")).unwrap_err(); 683 assert_eq!(err.position, SourcePosition { line: 1, column: 1 }); 684 } 685 follow<Input>(input: &mut Input) -> StdParseResult<(), Input> where Input: Stream<Token = char, Error = easy::ParseError<Input>>, Input::Position: Default, Input::Error: std::fmt::Debug, Input::Token: PartialEq, Input::Range: PartialEq,686 fn follow<Input>(input: &mut Input) -> StdParseResult<(), Input> 687 where 688 Input: Stream<Token = char, Error = easy::ParseError<Input>>, 689 Input::Position: Default, 690 Input::Error: std::fmt::Debug, 691 Input::Token: PartialEq, 692 Input::Range: PartialEq, 693 { 694 let before = input.checkpoint(); 695 match input.uncons() { 696 Ok(c) => { 697 if c.is_alphanumeric() { 698 input.reset(before).unwrap(); 699 let e = Error::Unexpected(c.into()); 700 Err(Commit::Peek(easy::Errors::new(input.position(), e).into())) 701 } else { 702 Ok(((), Commit::Peek(()))) 703 } 704 } 705 Err(_) => Ok(((), Commit::Peek(()))), 706 } 707 } 708 integer<Input>(input: &mut Input) -> StdParseResult<i64, Input> where Input: Stream<Token = char>, Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,709 fn integer<Input>(input: &mut Input) -> StdParseResult<i64, Input> 710 where 711 Input: Stream<Token = char>, 712 Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, 713 { 714 let (s, input) = many1::<String, _, _>(digit()) 715 .expected("integer") 716 .parse_stream(input) 717 .into_result()?; 718 let mut n = 0; 719 for c in s.chars() { 720 n = n * 10 + (c as i64 - '0' as i64); 721 } 722 Ok((n, input)) 723 } 724 725 #[test] test_integer()726 fn test_integer() { 727 let result = parser(integer).parse("123"); 728 assert_eq!(result, Ok((123i64, ""))); 729 } 730 #[test] list()731 fn list() { 732 let mut p = sep_by(parser(integer), char(',')); 733 let result = p.parse("123,4,56"); 734 assert_eq!(result, Ok((vec![123i64, 4, 56], ""))); 735 } 736 737 #[test] iterator()738 fn iterator() { 739 let result = parser(integer) 740 .parse(position::Stream::new(IteratorStream::new("123".chars()))) 741 .map(|(i, mut input)| (i, input.uncons().is_err())); 742 assert_eq!(result, Ok((123i64, true))); 743 } 744 745 #[test] field()746 fn field() { 747 let word = || many(alpha_num()); 748 let c_decl = (word(), spaces(), char(':'), spaces(), word()) 749 .map(|t| (t.0, t.4)) 750 .parse("x: int"); 751 assert_eq!(c_decl, Ok((("x".to_string(), "int".to_string()), ""))); 752 } 753 754 #[test] source_position()755 fn source_position() { 756 let source = r" 757 123 758 "; 759 let mut parsed_state = position::Stream::with_positioner(source, SourcePosition::new()); 760 let result = (spaces(), parser(integer), spaces()) 761 .map(|t| t.1) 762 .parse_stream(&mut parsed_state) 763 .into_result(); 764 let state = Commit::Commit(position::Stream { 765 positioner: SourcePosition { line: 3, column: 1 }, 766 input: "", 767 }); 768 assert_eq!( 769 result.map(|(x, c)| (x, c.map(|_| parsed_state))), 770 Ok((123i64, state)) 771 ); 772 } 773 774 #[derive(Debug, PartialEq)] 775 pub enum Expr { 776 Id(String), 777 Int(i64), 778 Array(Vec<Expr>), 779 Plus(Box<Expr>, Box<Expr>), 780 Times(Box<Expr>, Box<Expr>), 781 } 782 783 parser! { 784 fn expr[Input]()(Input) -> Expr 785 where 786 [Input: Stream<Token = char>,] 787 { 788 let word = many1(letter()).expected("identifier"); 789 let integer = parser(integer); 790 let array = between(char('['), char(']'), sep_by(expr(), char(','))).expected("["); 791 let paren_expr = between(char('('), char(')'), parser(term)).expected("("); 792 spaces() 793 .silent() 794 .with( 795 word.map(Expr::Id) 796 .or(integer.map(Expr::Int)) 797 .or(array.map(Expr::Array)) 798 .or(paren_expr), 799 ) 800 .skip(spaces().silent()) 801 } 802 } 803 804 #[test] expression_basic()805 fn expression_basic() { 806 let result = sep_by(expr(), char(',')).parse("int, 100, [[], 123]"); 807 let exprs = vec![ 808 Expr::Id("int".to_string()), 809 Expr::Int(100), 810 Expr::Array(vec![Expr::Array(vec![]), Expr::Int(123)]), 811 ]; 812 assert_eq!(result, Ok((exprs, ""))); 813 } 814 815 #[test] expression_error()816 fn expression_error() { 817 let input = r" 818 ,123 819 "; 820 let result = expr().easy_parse(position::Stream::new(input)); 821 let err = easy::Errors { 822 position: SourcePosition { line: 2, column: 1 }, 823 errors: vec![ 824 Error::Unexpected(','.into()), 825 Error::Expected("integer".into()), 826 Error::Expected("identifier".into()), 827 Error::Expected("[".into()), 828 Error::Expected("(".into()), 829 ], 830 }; 831 assert_eq!(result, Err(err)); 832 } 833 term<Input>(input: &mut Input) -> StdParseResult<Expr, Input> where Input: Stream<Token = char>, Input::Error: ParseError<Input::Token, Input::Range, Input::Position>,834 fn term<Input>(input: &mut Input) -> StdParseResult<Expr, Input> 835 where 836 Input: Stream<Token = char>, 837 Input::Error: ParseError<Input::Token, Input::Range, Input::Position>, 838 { 839 fn times(l: Expr, r: Expr) -> Expr { 840 Expr::Times(Box::new(l), Box::new(r)) 841 } 842 fn plus(l: Expr, r: Expr) -> Expr { 843 Expr::Plus(Box::new(l), Box::new(r)) 844 } 845 let mul = char('*').map(|_| times); 846 let add = char('+').map(|_| plus); 847 let factor = chainl1(expr(), mul); 848 chainl1(factor, add).parse_stream(input).into() 849 } 850 851 #[test] operators()852 fn operators() { 853 let input = r" 854 1 * 2 + 3 * test 855 "; 856 let (result, _) = parser(term).parse(position::Stream::new(input)).unwrap(); 857 858 let e1 = Expr::Times(Box::new(Expr::Int(1)), Box::new(Expr::Int(2))); 859 let e2 = Expr::Times( 860 Box::new(Expr::Int(3)), 861 Box::new(Expr::Id("test".to_string())), 862 ); 863 assert_eq!(result, Expr::Plus(Box::new(e1), Box::new(e2))); 864 } 865 866 #[test] error_position()867 fn error_position() { 868 let mut p = string("let") 869 .skip(parser(follow)) 870 .map(|x| x.to_string()) 871 .or(many1(digit())); 872 match p.easy_parse(position::Stream::new("le123")) { 873 Ok(_) => panic!(), 874 Err(err) => assert_eq!(err.position, SourcePosition { line: 1, column: 1 }), 875 } 876 match p.easy_parse(position::Stream::new("let1")) { 877 Ok(_) => panic!(), 878 Err(err) => assert_eq!(err.position, SourcePosition { line: 1, column: 4 }), 879 } 880 } 881 882 #[test] sep_by_error_consume()883 fn sep_by_error_consume() { 884 let mut p = sep_by::<Vec<_>, _, _, _>(string("abc"), char(',')); 885 let err = p.easy_parse(position::Stream::new("ab,abc")).unwrap_err(); 886 assert_eq!(err.position, SourcePosition { line: 1, column: 1 }); 887 } 888 889 #[test] inner_error_consume()890 fn inner_error_consume() { 891 let mut p = many::<Vec<_>, _, _>(between(char('['), char(']'), digit())); 892 let result = p.easy_parse(position::Stream::new("[1][2][]")); 893 assert!(result.is_err(), "{:?}", result); 894 let error = result.map(|x| format!("{:?}", x)).unwrap_err(); 895 assert_eq!(error.position, SourcePosition { line: 1, column: 8 }); 896 } 897 898 #[test] infinite_recursion_in_box_parser()899 fn infinite_recursion_in_box_parser() { 900 let _: Result<(Vec<_>, _), _> = (many(Box::new(digit()))).parse("1"); 901 } 902 903 #[test] unsized_parser()904 fn unsized_parser() { 905 let mut parser: Box<dyn Parser<_, Output = char, PartialState = _>> = Box::new(digit()); 906 let borrow_parser = &mut *parser; 907 assert_eq!(borrow_parser.parse("1"), Ok(('1', ""))); 908 } 909 910 #[test] std_error()911 fn std_error() { 912 use std::error::Error as StdError; 913 914 use std::fmt; 915 916 #[derive(Debug)] 917 struct Error; 918 impl fmt::Display for Error { 919 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 920 write!(f, "error") 921 } 922 } 923 impl StdError for Error { 924 fn description(&self) -> &str { 925 "error" 926 } 927 } 928 let result: Result<((), _), easy::Errors<char, &str, _>> = 929 EasyParser::easy_parse(&mut string("abc").and_then(|_| Err(Error)), "abc"); 930 assert!(result.is_err()); 931 // Test that ParseError can be coerced to a StdError 932 let _ = result.map_err(|err| { 933 let err: Box<dyn StdError> = Box::new(err); 934 err 935 }); 936 } 937 938 #[test] extract_std_error()939 fn extract_std_error() { 940 // The previous test verified that we could map a ParseError to a StdError by dropping 941 // the internal error details. 942 // This test verifies that we can map a ParseError to a StdError 943 // without dropping the internal error details. Consumers using `error-chain` will 944 // appreciate this. For technical reasons this is pretty janky; see the discussion in 945 // https://github.com/Marwes/combine/issues/86, and excuse the test with significant 946 // boilerplate! 947 use std::error::Error as StdError; 948 949 use std::fmt; 950 951 #[derive(Clone, PartialEq, Debug)] 952 struct CloneOnly(String); 953 954 #[derive(Debug)] 955 struct DisplayVec<T>(Vec<T>); 956 957 #[derive(Debug)] 958 struct ExtractedError(usize, DisplayVec<Error<CloneOnly, DisplayVec<CloneOnly>>>); 959 960 impl StdError for ExtractedError { 961 fn description(&self) -> &str { 962 "extracted error" 963 } 964 } 965 966 impl fmt::Display for CloneOnly { 967 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 968 write!(f, "{}", self.0) 969 } 970 } 971 972 impl<T: fmt::Debug> fmt::Display for DisplayVec<T> { 973 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 974 write!(f, "[{:?}]", self.0) 975 } 976 } 977 978 impl fmt::Display for ExtractedError { 979 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 980 writeln!(f, "Parse error at {}", self.0)?; 981 Error::fmt_errors(&(self.1).0, f) 982 } 983 } 984 985 let input = &[CloneOnly("x".to_string()), CloneOnly("y".to_string())][..]; 986 let result = token(CloneOnly("z".to_string())) 987 .easy_parse(input) 988 .map_err(|e| e.map_position(|p| p.translate_position(input))) 989 .map_err(|e| { 990 ExtractedError( 991 e.position, 992 DisplayVec( 993 e.errors 994 .into_iter() 995 .map(|e| e.map_range(|r| DisplayVec(r.to_owned()))) 996 .collect(), 997 ), 998 ) 999 }); 1000 1001 assert!(result.is_err()); 1002 // Test that the fresh ExtractedError is Display, so that the internal errors can be 1003 // inspected by consuming code; and that the ExtractedError can be coerced to StdError. 1004 let _ = result.map_err(|err| { 1005 let s = format!("{}", err); 1006 assert!(s.starts_with("Parse error at 0")); 1007 assert!(s.contains("Expected")); 1008 let err: Box<dyn StdError> = Box::new(err); 1009 err 1010 }); 1011 } 1012 } 1013