• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 //! The core functionality of parsing grammar.
11 //! State of parser during the process of rules handling.
12 
13 use alloc::borrow::ToOwned;
14 use alloc::boxed::Box;
15 use alloc::collections::BTreeSet;
16 use alloc::rc::Rc;
17 use alloc::string::String;
18 use alloc::vec;
19 use alloc::vec::Vec;
20 use core::fmt::{Debug, Display, Formatter};
21 use core::num::NonZeroUsize;
22 use core::ops::Range;
23 use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
24 
25 use crate::error::{Error, ErrorVariant};
26 use crate::iterators::pairs::new;
27 use crate::iterators::{pairs, QueueableToken};
28 use crate::position::Position;
29 use crate::span::Span;
30 use crate::stack::Stack;
31 use crate::RuleType;
32 
33 /// The current lookahead status of a [`ParserState`].
34 ///
35 /// [`ParserState`]: struct.ParserState.html
36 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
37 pub enum Lookahead {
38     /// The positive predicate, written as an ampersand &,
39     /// attempts to match its inner expression.
40     /// If the inner expression succeeds, parsing continues,
41     /// but at the same position as the predicate —
42     /// &foo ~ bar is thus a kind of "AND" statement:
43     /// "the input string must match foo AND bar".
44     /// If the inner expression fails,
45     /// the whole expression fails too.
46     Positive,
47     /// The negative predicate, written as an exclamation mark !,
48     /// attempts to match its inner expression.
49     /// If the inner expression fails, the predicate succeeds
50     /// and parsing continues at the same position as the predicate.
51     /// If the inner expression succeeds, the predicate fails —
52     /// !foo ~ bar is thus a kind of "NOT" statement:
53     /// "the input string must match bar but NOT foo".
54     Negative,
55     /// No lookahead (i.e. it will consume input).
56     None,
57 }
58 
59 /// The current atomicity of a [`ParserState`].
60 ///
61 /// [`ParserState`]: struct.ParserState.html
62 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
63 pub enum Atomicity {
64     /// prevents implicit whitespace: inside an atomic rule,
65     /// the tilde ~ means "immediately followed by",
66     /// and repetition operators (asterisk * and plus sign +)
67     /// have no implicit separation. In addition, all other rules
68     /// called from an atomic rule are also treated as atomic.
69     /// (interior matching rules are silent)
70     Atomic,
71     /// The same as atomic, but inner tokens are produced as normal.
72     CompoundAtomic,
73     /// implicit whitespace is enabled
74     NonAtomic,
75 }
76 
77 /// Type alias to simplify specifying the return value of chained closures.
78 pub type ParseResult<S> = Result<S, S>;
79 
80 /// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`.
81 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
82 pub enum MatchDir {
83     /// from the bottom to the top of the stack
84     BottomToTop,
85     /// from the top to the bottom of the stack
86     TopToBottom,
87 }
88 
89 static CALL_LIMIT: AtomicUsize = AtomicUsize::new(0);
90 
91 /// Sets the maximum call limit for the parser state
92 /// to prevent stack overflows or excessive execution times
93 /// in some grammars.
94 /// If set, the calls are tracked as a running total
95 /// over all non-terminal rules that can nest closures
96 /// (which are passed to transform the parser state).
97 ///
98 /// # Arguments
99 ///
100 /// * `limit` - The maximum number of calls. If None,
101 ///             the number of calls is unlimited.
set_call_limit(limit: Option<NonZeroUsize>)102 pub fn set_call_limit(limit: Option<NonZeroUsize>) {
103     CALL_LIMIT.store(limit.map(|f| f.get()).unwrap_or(0), Ordering::Relaxed);
104 }
105 
106 static ERROR_DETAIL: AtomicBool = AtomicBool::new(false);
107 
108 /// Sets whether information for more error details
109 /// should be collected. This is useful for debugging
110 /// parser errors (as it leads to more comprehensive
111 /// error messages), but it has a higher performance cost.
112 /// (hence, it's off by default)
113 ///
114 /// # Arguments
115 ///
116 /// * `enabled` - Whether to enable the collection for
117 ///               more error details.
set_error_detail(enabled: bool)118 pub fn set_error_detail(enabled: bool) {
119     ERROR_DETAIL.store(enabled, Ordering::Relaxed);
120 }
121 
122 #[derive(Debug)]
123 struct CallLimitTracker {
124     current_call_limit: Option<(usize, usize)>,
125 }
126 
127 impl Default for CallLimitTracker {
default() -> Self128     fn default() -> Self {
129         let limit = CALL_LIMIT.load(Ordering::Relaxed);
130         let current_call_limit = if limit > 0 { Some((0, limit)) } else { None };
131         Self { current_call_limit }
132     }
133 }
134 
135 impl CallLimitTracker {
limit_reached(&self) -> bool136     fn limit_reached(&self) -> bool {
137         self.current_call_limit
138             .map_or(false, |(current, limit)| current >= limit)
139     }
140 
increment_depth(&mut self)141     fn increment_depth(&mut self) {
142         if let Some((current, _)) = &mut self.current_call_limit {
143             *current += 1;
144         }
145     }
146 }
147 
148 /// Number of call stacks that may result from a sequence of rules parsing.
149 const CALL_STACK_INITIAL_CAPACITY: usize = 20;
150 /// Max (un)expected number of tokens that we may see on the parsing error position.
151 const EXPECTED_TOKENS_INITIAL_CAPACITY: usize = 30;
152 /// Max rule children number for which we'll extend calls stacks.
153 ///
154 /// In case rule we're working with has too many children rules that failed in parsing,
155 /// we don't want to store long stacks for all of them. If rule has more than this number
156 /// of failed children, they all will be collapsed in a parent rule.
157 const CALL_STACK_CHILDREN_THRESHOLD: usize = 4;
158 
159 /// Structure tracking errored parsing call (associated with specific `ParserState` function).
160 #[derive(Debug, Hash, PartialEq, Eq, Clone, PartialOrd, Ord)]
161 pub enum ParseAttempt<R> {
162     /// Call of `rule` errored.
163     Rule(R),
164     /// Call of token element (e.g., `match_string` or `match_insensitive`) errored.
165     /// Works as indicator of that leaf node is not a rule. In order to get the token value we
166     /// can address `ParseAttempts` `(un)expected_tokens`.
167     Token,
168 }
169 
170 impl<R> ParseAttempt<R> {
get_rule(&self) -> Option<&R>171     pub fn get_rule(&self) -> Option<&R> {
172         match self {
173             ParseAttempt::Rule(r) => Some(r),
174             ParseAttempt::Token => None,
175         }
176     }
177 }
178 
179 /// Rules call stack.
180 /// Contains sequence of rule calls that resulted in new parsing attempt.
181 #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
182 pub struct RulesCallStack<R> {
183     /// Deepest rule caused a parsing error (ParseAttempt::Token transformed into a rule).
184     pub deepest: ParseAttempt<R>,
185     /// Most top rule covering `deepest`.
186     pub parent: Option<R>,
187 }
188 
189 impl<R> RulesCallStack<R> {
new(deepest: ParseAttempt<R>) -> RulesCallStack<R>190     fn new(deepest: ParseAttempt<R>) -> RulesCallStack<R> {
191         RulesCallStack {
192             deepest,
193             parent: None,
194         }
195     }
196 }
197 
198 #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
199 pub enum ParsingToken {
200     Sensitive { token: String },
201     Insensitive { token: String },
202     Range { start: char, end: char },
203     BuiltInRule,
204 }
205 
206 impl Display for ParsingToken {
fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result207     fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
208         match self {
209             ParsingToken::Sensitive { token } => write!(f, "{token}"),
210             ParsingToken::Insensitive { token } => write!(f, "{}", token.to_uppercase()),
211             ParsingToken::Range { start, end } => write!(f, "{start}..{end}"),
212             ParsingToken::BuiltInRule => write!(f, "BUILTIN_RULE"),
213         }
214     }
215 }
216 
217 /// Structure that tracks all the parsing attempts made on the max position.
218 /// We want to give an error hint about parsing rules that succeeded
219 /// at the farthest input position.
220 /// The intuition is such rules will be most likely the query user initially wanted to write.
221 #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
222 pub struct ParseAttempts<R> {
223     /// Indicates whether the parsing attempts are tracked.
224     enabled: bool,
225     /// Vec of rule calls sequences awaiting tokens at the same `max_position`.
226     /// If there are several stacks in vec, it means all those rule stacks are "equal"
227     /// because their attempts occurred on the same position.
228     pub call_stacks: Vec<RulesCallStack<R>>,
229     /// Tokens that could be putted at `max_position`
230     /// in order to get a valid grammar query.
231     expected_tokens: Vec<ParsingToken>,
232     /// Tokens that we've prohibited to be putted at `max_position`
233     /// in order to get a valid grammar query.
234     unexpected_tokens: Vec<ParsingToken>,
235     /// Max position at which we were expecting to see one of `expected_tokens`.
236     pub max_position: usize,
237 }
238 
239 impl<R: RuleType> ParseAttempts<R> {
240     /// Create new `ParseAttempts` instance with `call_stacks` and `expected_tokens`
241     /// initialized with capacity.
new() -> Self242     pub fn new() -> Self {
243         Self {
244             call_stacks: Vec::with_capacity(CALL_STACK_INITIAL_CAPACITY),
245             expected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
246             unexpected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
247             max_position: 0,
248             enabled: ERROR_DETAIL.load(Ordering::Relaxed),
249         }
250     }
251 
252     /// Get number of currently present call stacks.
call_stacks_number(&self) -> usize253     fn call_stacks_number(&self) -> usize {
254         self.call_stacks.len()
255     }
256 
expected_tokens(&self) -> Vec<ParsingToken>257     pub fn expected_tokens(&self) -> Vec<ParsingToken> {
258         self.expected_tokens
259             .iter()
260             .cloned()
261             .collect::<BTreeSet<_>>()
262             .into_iter()
263             .collect()
264     }
265 
unexpected_tokens(&self) -> Vec<ParsingToken>266     pub fn unexpected_tokens(&self) -> Vec<ParsingToken> {
267         self.unexpected_tokens
268             .iter()
269             .cloned()
270             .collect::<BTreeSet<_>>()
271             .into_iter()
272             .collect()
273     }
274 
275     /// Retrieve call stacks.
call_stacks(&self) -> Vec<RulesCallStack<R>>276     pub fn call_stacks(&self) -> Vec<RulesCallStack<R>> {
277         self.call_stacks
278             .iter()
279             .cloned()
280             .collect::<BTreeSet<_>>()
281             .into_iter()
282             .collect()
283     }
284 
285     /// In case we've tried to parse a rule, which start position is bigger than previous
286     /// `max_position` it means that we've advanced in our parsing and found better candidate.
287     ///
288     /// `start_index` is:
289     /// * Number of call stacks present in state at the moment current `rule` was called. The idea
290     ///   is that we'd like to update only those stacks that originated from the current `rule` and
291     ///   not from those that were called previously.
292     /// * 0 in case we've successfully parsed some token since the moment `rule` was called.
try_add_new_stack_rule(&mut self, rule: R, start_index: usize)293     fn try_add_new_stack_rule(&mut self, rule: R, start_index: usize) {
294         let mut non_token_call_stacks = Vec::new();
295         let mut token_call_stack_met = false;
296         for call_stack in self.call_stacks.iter().skip(start_index) {
297             if matches!(call_stack.deepest, ParseAttempt::Token) {
298                 token_call_stack_met = true;
299             } else {
300                 non_token_call_stacks.push(call_stack.clone())
301             }
302         }
303         if token_call_stack_met && non_token_call_stacks.is_empty() {
304             // If `non_token_call_stacks` is not empty we wouldn't like to add a new standalone
305             // `RulesCallStack::new(ParseAttempt::Token)` (that will later be transformed into a
306             // rule) as soon as it doesn't give us any useful additional info.
307             non_token_call_stacks.push(RulesCallStack::new(ParseAttempt::Token));
308         }
309         self.call_stacks
310             .splice(start_index.., non_token_call_stacks);
311 
312         let children_number_over_threshold =
313             self.call_stacks_number() - start_index >= CALL_STACK_CHILDREN_THRESHOLD;
314         if children_number_over_threshold {
315             self.call_stacks.truncate(start_index);
316             self.call_stacks
317                 .push(RulesCallStack::new(ParseAttempt::Rule(rule)));
318         } else {
319             for call_stack in self.call_stacks.iter_mut().skip(start_index) {
320                 if matches!(call_stack.deepest, ParseAttempt::Token) {
321                     call_stack.deepest = ParseAttempt::Rule(rule);
322                 } else {
323                     call_stack.parent = Some(rule);
324                 }
325             }
326         }
327     }
328 
329     /// If `expected` flag is set to false, it means we've successfully parsed token being in the
330     /// state of negative lookahead and want to track `token` in the `unexpected_tokens`. Otherwise,
331     /// we want to track it the `expected_tokens`. Let's call chosen vec a `target_vec`.
332     ///
333     /// In case `position` is:
334     /// * Equal to `max_position`, add `token` to `target_vec`,
335     /// * Bigger than `max_position`, set `token` as the only new element of `target_vec`.
336     #[allow(clippy::comparison_chain)]
try_add_new_token( &mut self, token: ParsingToken, start_position: usize, position: usize, negative_lookahead: bool, )337     fn try_add_new_token(
338         &mut self,
339         token: ParsingToken,
340         start_position: usize,
341         position: usize,
342         negative_lookahead: bool,
343     ) {
344         let target_vec_push_token = |attempts: &mut ParseAttempts<R>| {
345             let target_vec = if negative_lookahead {
346                 &mut attempts.unexpected_tokens
347             } else {
348                 &mut attempts.expected_tokens
349             };
350             target_vec.push(token);
351         };
352 
353         if position > self.max_position {
354             if negative_lookahead && start_position > self.max_position {
355                 // We encountered a sequence under negative lookahead.
356                 // We would like to track only first failed token in this sequence (which
357                 // `start_position` should be equal to `self.max_position`).
358                 return;
359             }
360             target_vec_push_token(self);
361 
362             if negative_lookahead {
363                 // In case of successful parsing of token under negative lookahead the only
364                 // thing we'd like to do is to track the token in the `unexpected_tokens`.
365                 return;
366             }
367             self.max_position = position;
368             self.expected_tokens.clear();
369             self.unexpected_tokens.clear();
370             self.call_stacks.clear();
371             self.call_stacks
372                 .push(RulesCallStack::new(ParseAttempt::Token));
373         } else if position == self.max_position {
374             target_vec_push_token(self);
375             self.call_stacks
376                 .push(RulesCallStack::new(ParseAttempt::Token));
377         }
378     }
379 
380     /// Reset state in case we've successfully parsed some token in
381     /// `match_string` or `match_insensetive`.
nullify_expected_tokens(&mut self, new_max_position: usize)382     fn nullify_expected_tokens(&mut self, new_max_position: usize) {
383         self.call_stacks.clear();
384         self.expected_tokens.clear();
385         self.unexpected_tokens.clear();
386         self.max_position = new_max_position;
387     }
388 }
389 
390 impl<R: RuleType> Default for ParseAttempts<R> {
default() -> Self391     fn default() -> Self {
392         Self::new()
393     }
394 }
395 
396 /// The complete state of a [`Parser`].
397 ///
398 /// [`Parser`]: trait.Parser.html
399 #[derive(Debug)]
400 pub struct ParserState<'i, R: RuleType> {
401     /// Current position from which we try to apply some parser function.
402     /// Initially is 0.
403     /// E.g., we are parsing `create user 'Bobby'` query, we parsed "create" via `match_insensitive`
404     /// and switched our `position` from 0 to the length of "create".
405     ///
406     /// E.g., see `match_string` -> `self.position.match_string(string)` which updates `self.pos`.
407     position: Position<'i>,
408     /// Queue representing rules partially (`QueueableToken::Start`) and
409     /// totally (`QueueableToken::End`) parsed. When entering rule we put it in the queue in a state
410     /// of `Start` and after all its sublogic (subrules or strings) are parsed, we change it to
411     /// `End` state.
412     queue: Vec<QueueableToken<'i, R>>,
413     /// Status set in case specific lookahead logic is used in grammar.
414     /// See `Lookahead` for more information.
415     lookahead: Lookahead,
416     /// Rules that we HAVE expected, tried to parse, but failed.
417     pos_attempts: Vec<R>,
418     /// Rules that we have NOT expected, tried to parse, but failed.
419     neg_attempts: Vec<R>,
420     /// Max position in the query from which we've tried to parse some rule but failed.
421     attempt_pos: usize,
422     /// Current atomicity status. For more information see `Atomicity`.
423     atomicity: Atomicity,
424     /// Helper structure tracking `Stack` status (used in case grammar contains stack PUSH/POP
425     /// invocations).
426     stack: Stack<Span<'i>>,
427     /// Used for setting max parser calls limit.
428     call_tracker: CallLimitTracker,
429     /// Together with tracking of `pos_attempts` and `attempt_pos`
430     /// as a pair of (list of rules that we've tried to parse but failed, max parsed position)
431     /// we track those rules (which we've tried to parse at the same max pos) at this helper struct.
432     ///
433     /// Note, that we may try to parse several rules on different positions. We want to track only
434     /// those rules, which attempt position is bigger, because we consider that it's nearer to the
435     /// query that user really wanted to pass.
436     ///
437     /// E.g. we have a query `create user "Bobby"` and two root rules:
438     /// * CreateUser  = { "create" ~ "user"  ~ Name }
439     /// * CreateTable = { "create" ~ "table" ~ Name }
440     /// * Name = { SOME_DEFINITION }
441     ///
442     /// While parsing the query we'll update tracker position to the start of "Bobby", because we'd
443     /// successfully parse "create" + "user" (and not "table").
444     parse_attempts: ParseAttempts<R>,
445 }
446 
447 /// Creates a `ParserState` from a `&str`, supplying it to a closure `f`.
448 ///
449 /// # Examples
450 ///
451 /// ```
452 /// # use pest;
453 /// let input = "";
454 /// pest::state::<(), _>(input, |s| Ok(s)).unwrap();
455 /// ```
456 #[allow(clippy::perf)]
state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>> where F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,457 pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>>
458 where
459     F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,
460 {
461     let state = ParserState::new(input);
462 
463     match f(state) {
464         Ok(state) => {
465             let len = state.queue.len();
466             Ok(new(Rc::new(state.queue), input, None, 0, len))
467         }
468         Err(mut state) => {
469             let variant = if state.reached_call_limit() {
470                 ErrorVariant::CustomError {
471                     message: "call limit reached".to_owned(),
472                 }
473             } else {
474                 state.pos_attempts.sort();
475                 state.pos_attempts.dedup();
476                 state.neg_attempts.sort();
477                 state.neg_attempts.dedup();
478                 ErrorVariant::ParsingError {
479                     positives: state.pos_attempts.clone(),
480                     negatives: state.neg_attempts.clone(),
481                 }
482             };
483 
484             if state.parse_attempts.enabled {
485                 Err(Error::new_from_pos_with_parsing_attempts(
486                     variant,
487                     Position::new_internal(input, state.attempt_pos),
488                     state.parse_attempts.clone(),
489                 ))
490             } else {
491                 Err(Error::new_from_pos(
492                     variant,
493                     Position::new_internal(input, state.attempt_pos),
494                 ))
495             }
496         }
497     }
498 }
499 
500 impl<'i, R: RuleType> ParserState<'i, R> {
501     /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box`
502     /// will be passed from closure to closure based on the needs of the specified `Parser`.
503     ///
504     /// # Examples
505     ///
506     /// ```
507     /// # use pest;
508     /// let input = "";
509     /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input);
510     /// ```
new(input: &'i str) -> Box<Self>511     pub fn new(input: &'i str) -> Box<Self> {
512         Box::new(ParserState {
513             position: Position::from_start(input),
514             queue: vec![],
515             lookahead: Lookahead::None,
516             pos_attempts: vec![],
517             neg_attempts: vec![],
518             attempt_pos: 0,
519             atomicity: Atomicity::NonAtomic,
520             stack: Stack::new(),
521             call_tracker: Default::default(),
522             parse_attempts: ParseAttempts::new(),
523         })
524     }
525 
526     /// Get all parse attempts after process of parsing is finished.
get_parse_attempts(&self) -> &ParseAttempts<R>527     pub fn get_parse_attempts(&self) -> &ParseAttempts<R> {
528         &self.parse_attempts
529     }
530 
531     /// Returns a reference to the current `Position` of the `ParserState`.
532     ///
533     /// # Examples
534     ///
535     /// ```
536     /// # use pest;
537     /// # #[allow(non_camel_case_types)]
538     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
539     /// enum Rule {
540     ///     ab
541     /// }
542     ///
543     /// let input = "ab";
544     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
545     /// let position = state.position();
546     /// assert_eq!(position.pos(), 0);
547     /// ```
position(&self) -> &Position<'i>548     pub fn position(&self) -> &Position<'i> {
549         &self.position
550     }
551 
552     /// Returns the current atomicity of the `ParserState`.
553     ///
554     /// # Examples
555     ///
556     /// ```
557     /// # use pest;
558     /// # use pest::Atomicity;
559     /// # #[allow(non_camel_case_types)]
560     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
561     /// enum Rule {
562     ///     ab
563     /// }
564     ///
565     /// let input = "ab";
566     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
567     /// let atomicity = state.atomicity();
568     /// assert_eq!(atomicity, Atomicity::NonAtomic);
569     /// ```
atomicity(&self) -> Atomicity570     pub fn atomicity(&self) -> Atomicity {
571         self.atomicity
572     }
573 
574     #[inline]
inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>>575     fn inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>> {
576         if self.call_tracker.limit_reached() {
577             return Err(self);
578         }
579         self.call_tracker.increment_depth();
580         Ok(self)
581     }
582 
583     #[inline]
reached_call_limit(&self) -> bool584     fn reached_call_limit(&self) -> bool {
585         self.call_tracker.limit_reached()
586     }
587 
588     /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure
589     /// meant to match the rule.
590     ///
591     /// # Examples
592     ///
593     /// ```
594     /// # use pest;
595     /// # #[allow(non_camel_case_types)]
596     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
597     /// enum Rule {
598     ///     a
599     /// }
600     ///
601     /// let input = "a";
602     /// let pairs: Vec<_> = pest::state(input, |state| {
603     ///     state.rule(Rule::a, |s| Ok(s))
604     /// }).unwrap().collect();
605     ///
606     /// assert_eq!(pairs.len(), 1);
607     /// ```
608     #[inline]
rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,609     pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
610     where
611         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
612     {
613         self = self.inc_call_check_limit()?;
614         // Position from which this `rule` starts parsing.
615         let actual_pos = self.position.pos();
616         // Remember index of the `self.queue` element that will be associated with this `rule`.
617         let index = self.queue.len();
618 
619         let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
620             (self.pos_attempts.len(), self.neg_attempts.len())
621         } else {
622             // Attempts have not been cleared yet since the attempt_pos is older.
623             (0, 0)
624         };
625 
626         if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
627             // Pair's position will only be known after running the closure.
628             self.queue.push(QueueableToken::Start {
629                 end_token_index: 0,
630                 input_pos: actual_pos,
631             });
632         }
633 
634         // Remember attempts number before `f` call.
635         // In `track` using this variable we can say, how many attempts were added
636         // during children rules traversal.
637         let attempts = self.attempts_at(actual_pos);
638         // Number of call stacks present in `self.parse_attempts` before `f` call.
639         // We need to remember this number only in case there wasn't found any farther attempt.
640         // E.g. we are handling rule, on start position of which may be tested two
641         // children rules. At the moment we'll return from `f` call below,
642         // there will be two more children rules in `self.parse_attempts` that we'll
643         // consider to be the children of current `rule`.
644         let mut remember_call_stacks_number = self.parse_attempts.call_stacks_number();
645         // Max parsing attempt position at the moment of `rule` handling.
646         // It case it's raised during children rules handling, it means
647         // we've made a parsing progress.
648         let remember_max_position = self.parse_attempts.max_position;
649 
650         let result = f(self);
651 
652         let mut try_add_rule_to_stack = |new_state: &mut Box<ParserState<'_, R>>| {
653             if new_state.parse_attempts.max_position > remember_max_position {
654                 // It means that one of `match_string` or e.g. `match_insensetive` function calls
655                 // have already erased `self.parse_attempts.call_stacks` and that previously
656                 // remembered values are not valid anymore.
657                 remember_call_stacks_number = 0;
658             }
659             if !matches!(new_state.atomicity, Atomicity::Atomic) {
660                 new_state
661                     .parse_attempts
662                     .try_add_new_stack_rule(rule, remember_call_stacks_number);
663             }
664         };
665 
666         match result {
667             Ok(mut new_state) => {
668                 if new_state.lookahead == Lookahead::Negative {
669                     new_state.track(
670                         rule,
671                         actual_pos,
672                         pos_attempts_index,
673                         neg_attempts_index,
674                         attempts,
675                     );
676                 }
677 
678                 if new_state.lookahead == Lookahead::None
679                     && new_state.atomicity != Atomicity::Atomic
680                 {
681                     // Index of `QueueableToken::End` token added below
682                     // that corresponds to previously added `QueueableToken::Start` token.
683                     let new_index = new_state.queue.len();
684                     match new_state.queue[index] {
685                         QueueableToken::Start {
686                             ref mut end_token_index,
687                             ..
688                         } => *end_token_index = new_index,
689                         _ => unreachable!(),
690                     };
691 
692                     let new_pos = new_state.position.pos();
693 
694                     new_state.queue.push(QueueableToken::End {
695                         start_token_index: index,
696                         rule,
697                         tag: None,
698                         input_pos: new_pos,
699                     });
700                 }
701 
702                 // Note, that we need to count positive parsing results too, because we can fail in
703                 // optional rule call inside which may lie the farthest
704                 // parsed token.
705                 if new_state.parse_attempts.enabled {
706                     try_add_rule_to_stack(&mut new_state);
707                 }
708                 Ok(new_state)
709             }
710             Err(mut new_state) => {
711                 if new_state.lookahead != Lookahead::Negative {
712                     new_state.track(
713                         rule,
714                         actual_pos,
715                         pos_attempts_index,
716                         neg_attempts_index,
717                         attempts,
718                     );
719                     if new_state.parse_attempts.enabled {
720                         try_add_rule_to_stack(&mut new_state);
721                     }
722                 }
723 
724                 if new_state.lookahead == Lookahead::None
725                     && new_state.atomicity != Atomicity::Atomic
726                 {
727                     new_state.queue.truncate(index);
728                 }
729 
730                 Err(new_state)
731             }
732         }
733     }
734 
735     /// Tag current node
736     ///
737     /// # Examples
738     ///
739     /// Try to recognize the one specified in a set of characters
740     ///
741     /// ```
742     /// use pest::{state, ParseResult, ParserState, iterators::Pair};
743     /// #[allow(non_camel_case_types)]
744     /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
745     /// enum Rule {
746     ///     character,
747     /// }
748     /// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
749     ///     state.sequence(|state| {
750     ///         character(state)
751     ///             .and_then(|state| character(state))
752     ///             .and_then(|state| character(state))
753     ///             .and_then(|state| state.tag_node("c"))
754     ///             .and_then(|state| character(state))
755     ///     })
756     /// }
757     /// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
758     ///     state.rule(Rule::character, |state| state.match_range('a'..'z'))
759     /// }
760     ///
761     /// let input = "abcd";
762     /// let pairs = state(input, mark_c).unwrap();
763     /// // find all node tag as `c`
764     /// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c")).collect();
765     /// assert_eq!(find[0].as_str(), "c")
766     /// ```
767     #[inline]
tag_node(mut self: Box<Self>, tag: &'i str) -> ParseResult<Box<Self>>768     pub fn tag_node(mut self: Box<Self>, tag: &'i str) -> ParseResult<Box<Self>> {
769         if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() {
770             *old = Some(tag)
771         }
772         Ok(self)
773     }
774 
775     /// Get number of allowed rules attempts + prohibited rules attempts.
attempts_at(&self, pos: usize) -> usize776     fn attempts_at(&self, pos: usize) -> usize {
777         if self.attempt_pos == pos {
778             self.pos_attempts.len() + self.neg_attempts.len()
779         } else {
780             0
781         }
782     }
783 
track( &mut self, rule: R, pos: usize, pos_attempts_index: usize, neg_attempts_index: usize, prev_attempts: usize, )784     fn track(
785         &mut self,
786         rule: R,
787         pos: usize,
788         pos_attempts_index: usize,
789         neg_attempts_index: usize,
790         prev_attempts: usize,
791     ) {
792         if self.atomicity == Atomicity::Atomic {
793             return;
794         }
795 
796         // If nested rules made no progress, there is no use to report them; it's only useful to
797         // track the current rule, the exception being when only one attempt has been made during
798         // the children rules.
799         let curr_attempts = self.attempts_at(pos);
800         if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
801             return;
802         }
803 
804         if pos == self.attempt_pos {
805             self.pos_attempts.truncate(pos_attempts_index);
806             self.neg_attempts.truncate(neg_attempts_index);
807         }
808 
809         if pos > self.attempt_pos {
810             self.pos_attempts.clear();
811             self.neg_attempts.clear();
812             self.attempt_pos = pos;
813         }
814 
815         let attempts = if self.lookahead != Lookahead::Negative {
816             &mut self.pos_attempts
817         } else {
818             &mut self.neg_attempts
819         };
820 
821         if pos == self.attempt_pos {
822             attempts.push(rule);
823         }
824     }
825 
826     /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns
827     /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current
828     /// `Box<ParserState>` otherwise.
829     ///
830     /// This method is useful to parse sequences that only match together which usually come in the
831     /// form of chained `Result`s with
832     /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
833     ///
834     ///
835     /// # Examples
836     ///
837     /// ```
838     /// # use pest;
839     /// # #[allow(non_camel_case_types)]
840     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
841     /// enum Rule {
842     ///     a
843     /// }
844     ///
845     /// let input = "a";
846     /// let pairs: Vec<_> = pest::state(input, |state| {
847     ///     state.sequence(|s| {
848     ///         s.rule(Rule::a, |s| Ok(s)).and_then(|s| {
849     ///             s.match_string("b")
850     ///         })
851     ///     }).or_else(|s| {
852     ///         Ok(s)
853     ///     })
854     /// }).unwrap().collect();
855     ///
856     /// assert_eq!(pairs.len(), 0);
857     /// ```
858     #[inline]
sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,859     pub fn sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
860     where
861         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
862     {
863         self = self.inc_call_check_limit()?;
864         let token_index = self.queue.len();
865         let initial_pos = self.position;
866 
867         let result = f(self);
868 
869         match result {
870             Ok(new_state) => Ok(new_state),
871             Err(mut new_state) => {
872                 // Restore the initial position and truncate the token queue.
873                 new_state.position = initial_pos;
874                 new_state.queue.truncate(token_index);
875                 Err(new_state)
876             }
877         }
878     }
879 
880     /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns
881     /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`.
882     ///
883     /// # Examples
884     ///
885     /// ```
886     /// # use pest;
887     /// # #[allow(non_camel_case_types)]
888     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
889     /// enum Rule {
890     ///     ab
891     /// }
892     ///
893     /// let input = "aab";
894     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
895     /// let mut result = state.repeat(|s| {
896     ///     s.match_string("a")
897     /// });
898     /// assert!(result.is_ok());
899     /// assert_eq!(result.unwrap().position().pos(), 2);
900     ///
901     /// state = pest::ParserState::new(input);
902     /// result = state.repeat(|s| {
903     ///     s.match_string("b")
904     /// });
905     /// assert!(result.is_ok());
906     /// assert_eq!(result.unwrap().position().pos(), 0);
907     /// ```
908     #[inline]
repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>> where F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,909     pub fn repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
910     where
911         F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
912     {
913         self = self.inc_call_check_limit()?;
914         let mut result = f(self);
915 
916         loop {
917             match result {
918                 Ok(state) => result = f(state),
919                 Err(state) => return Ok(state),
920             };
921         }
922     }
923 
924     /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns
925     /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`.
926     ///
927     /// # Examples
928     ///
929     /// ```
930     /// # use pest;
931     /// # #[allow(non_camel_case_types)]
932     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
933     /// enum Rule {
934     ///     ab
935     /// }
936     ///
937     /// let input = "ab";
938     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
939     /// let result = state.optional(|s| {
940     ///     s.match_string("ab")
941     /// });
942     /// assert!(result.is_ok());
943     ///
944     /// state = pest::ParserState::new(input);
945     /// let result = state.optional(|s| {
946     ///     s.match_string("ac")
947     /// });
948     /// assert!(result.is_ok());
949     /// ```
950     #[inline]
optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,951     pub fn optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
952     where
953         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
954     {
955         self = self.inc_call_check_limit()?;
956         match f(self) {
957             Ok(state) | Err(state) => Ok(state),
958         }
959     }
960 
961     /// Generic function to handle result of char/string/range parsing
962     /// in order to track (un)expected tokens.
handle_token_parse_result( &mut self, start_position: usize, token: ParsingToken, parse_succeeded: bool, )963     fn handle_token_parse_result(
964         &mut self,
965         start_position: usize,
966         token: ParsingToken,
967         parse_succeeded: bool,
968     ) {
969         // New position after tracked parsed element for case of `parse_succeded` is true.
970         // Position of parsing failure otherwise.
971         let current_pos = self.position.pos();
972 
973         if parse_succeeded {
974             if self.lookahead == Lookahead::Negative {
975                 self.parse_attempts
976                     .try_add_new_token(token, start_position, current_pos, true);
977             } else if current_pos > self.parse_attempts.max_position {
978                 self.parse_attempts.nullify_expected_tokens(current_pos);
979             }
980         } else if self.lookahead != Lookahead::Negative {
981             self.parse_attempts
982                 .try_add_new_token(token, start_position, current_pos, false);
983         }
984     }
985 
986     /// Attempts to match a single character based on a filter function. Returns `Ok` with the
987     /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>`
988     /// otherwise.
989     ///
990     /// # Examples
991     ///
992     /// ```
993     /// # use pest;
994     /// # #[allow(non_camel_case_types)]
995     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
996     /// enum Rule {}
997     ///
998     /// let input = "ab";
999     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1000     /// let result = state.match_char_by(|c| c.is_ascii());
1001     /// assert!(result.is_ok());
1002     /// assert_eq!(result.unwrap().position().pos(), 1);
1003     ///
1004     /// let input = "❤";
1005     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1006     /// let result = state.match_char_by(|c| c.is_ascii());
1007     /// assert!(result.is_err());
1008     /// assert_eq!(result.unwrap_err().position().pos(), 0);
1009     /// ```
1010     #[inline]
match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(char) -> bool,1011     pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1012     where
1013         F: FnOnce(char) -> bool,
1014     {
1015         let start_position = self.position.pos();
1016         let succeeded = self.position.match_char_by(f);
1017         if self.parse_attempts.enabled {
1018             let token = ParsingToken::BuiltInRule;
1019             self.handle_token_parse_result(start_position, token, succeeded);
1020         }
1021         if succeeded {
1022             Ok(self)
1023         } else {
1024             Err(self)
1025         }
1026     }
1027 
1028     /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if
1029     /// successful, or `Err` with the updated `Box<ParserState>` otherwise.
1030     ///
1031     /// # Examples
1032     ///
1033     /// ```
1034     /// # use pest;
1035     /// # #[allow(non_camel_case_types)]
1036     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1037     /// enum Rule {}
1038     ///
1039     /// let input = "ab";
1040     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1041     /// let mut result = state.match_string("ab");
1042     /// assert!(result.is_ok());
1043     /// assert_eq!(result.unwrap().position().pos(), 2);
1044     ///
1045     /// state = pest::ParserState::new(input);
1046     /// result = state.match_string("ac");
1047     /// assert!(result.is_err());
1048     /// assert_eq!(result.unwrap_err().position().pos(), 0);
1049     /// ```
1050     #[inline]
match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>>1051     pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
1052         let start_position = self.position.pos();
1053         let succeeded = self.position.match_string(string);
1054         if self.parse_attempts.enabled {
1055             let token = ParsingToken::Sensitive {
1056                 token: String::from(string),
1057             };
1058             self.handle_token_parse_result(start_position, token, succeeded);
1059         }
1060         if succeeded {
1061             Ok(self)
1062         } else {
1063             Err(self)
1064         }
1065     }
1066 
1067     /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated
1068     /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1069     ///
1070     /// # Examples
1071     ///
1072     /// ```
1073     /// # use pest;
1074     /// # #[allow(non_camel_case_types)]
1075     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1076     /// enum Rule {}
1077     ///
1078     /// let input = "ab";
1079     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1080     /// let mut result = state.match_insensitive("AB");
1081     /// assert!(result.is_ok());
1082     /// assert_eq!(result.unwrap().position().pos(), 2);
1083     ///
1084     /// state = pest::ParserState::new(input);
1085     /// result = state.match_insensitive("AC");
1086     /// assert!(result.is_err());
1087     /// assert_eq!(result.unwrap_err().position().pos(), 0);
1088     /// ```
1089     #[inline]
match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>>1090     pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
1091         let start_position: usize = self.position().pos();
1092         let succeeded = self.position.match_insensitive(string);
1093         if self.parse_attempts.enabled {
1094             let token = ParsingToken::Insensitive {
1095                 token: String::from(string),
1096             };
1097             self.handle_token_parse_result(start_position, token, succeeded);
1098         }
1099         if succeeded {
1100             Ok(self)
1101         } else {
1102             Err(self)
1103         }
1104     }
1105 
1106     /// Attempts to match a single character from the given range. Returns `Ok` with the updated
1107     /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1108     ///
1109     /// # Caution
1110     /// The provided `range` is interpreted as inclusive.
1111     ///
1112     /// # Examples
1113     ///
1114     /// ```
1115     /// # use pest;
1116     /// # #[allow(non_camel_case_types)]
1117     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1118     /// enum Rule {}
1119     ///
1120     /// let input = "ab";
1121     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1122     /// let mut result = state.match_range('a'..'z');
1123     /// assert!(result.is_ok());
1124     /// assert_eq!(result.unwrap().position().pos(), 1);
1125     ///
1126     /// state = pest::ParserState::new(input);
1127     /// result = state.match_range('A'..'Z');
1128     /// assert!(result.is_err());
1129     /// assert_eq!(result.unwrap_err().position().pos(), 0);
1130     /// ```
1131     #[inline]
match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>>1132     pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
1133         let start_position = self.position().pos();
1134         let token = ParsingToken::Range {
1135             start: range.start,
1136             end: range.end,
1137         };
1138         let succeeded = self.position.match_range(range);
1139         if self.parse_attempts.enabled {
1140             self.handle_token_parse_result(start_position, token, succeeded);
1141         }
1142         if succeeded {
1143             Ok(self)
1144         } else {
1145             Err(self)
1146         }
1147     }
1148 
1149     /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>`
1150     /// if successful, or `Err` with the updated `Box<ParserState>` otherwise.
1151     ///
1152     /// # Examples
1153     ///
1154     /// ```
1155     /// # use pest;
1156     /// # #[allow(non_camel_case_types)]
1157     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1158     /// enum Rule {}
1159     ///
1160     /// let input = "ab";
1161     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1162     /// let mut result = state.skip(1);
1163     /// assert!(result.is_ok());
1164     /// assert_eq!(result.unwrap().position().pos(), 1);
1165     ///
1166     /// state = pest::ParserState::new(input);
1167     /// result = state.skip(3);
1168     /// assert!(result.is_err());
1169     /// assert_eq!(result.unwrap_err().position().pos(), 0);
1170     /// ```
1171     #[inline]
skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>>1172     pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
1173         if self.position.skip(n) {
1174             Ok(self)
1175         } else {
1176             Err(self)
1177         }
1178     }
1179 
1180     /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the
1181     /// updated `Box<ParserState>` whether or not one of the strings is found.
1182     ///
1183     /// # Examples
1184     ///
1185     /// ```
1186     /// # use pest;
1187     /// # #[allow(non_camel_case_types)]
1188     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1189     /// enum Rule {}
1190     ///
1191     /// let input = "abcd";
1192     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1193     /// let mut result = state.skip_until(&["c", "d"]);
1194     /// assert!(result.is_ok());
1195     /// assert_eq!(result.unwrap().position().pos(), 2);
1196     /// ```
1197     #[inline]
skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>>1198     pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
1199         self.position.skip_until(strings);
1200         Ok(self)
1201     }
1202 
1203     /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>`
1204     /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise.
1205     ///
1206     /// # Examples
1207     ///
1208     /// ```
1209     /// # use pest;
1210     /// # #[allow(non_camel_case_types)]
1211     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1212     /// enum Rule {}
1213     ///
1214     /// let input = "ab";
1215     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1216     /// let mut result = state.start_of_input();
1217     /// assert!(result.is_ok());
1218     ///
1219     /// state = pest::ParserState::new(input);
1220     /// state = state.match_string("ab").unwrap();
1221     /// result = state.start_of_input();
1222     /// assert!(result.is_err());
1223     /// ```
1224     #[inline]
start_of_input(self: Box<Self>) -> ParseResult<Box<Self>>1225     pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
1226         if self.position.at_start() {
1227             Ok(self)
1228         } else {
1229             Err(self)
1230         }
1231     }
1232 
1233     /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if
1234     /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise.
1235     ///
1236     /// # Examples
1237     ///
1238     /// ```
1239     /// # use pest;
1240     /// # #[allow(non_camel_case_types)]
1241     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1242     /// enum Rule {}
1243     ///
1244     /// let input = "ab";
1245     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1246     /// let mut result = state.end_of_input();
1247     /// assert!(result.is_err());
1248     ///
1249     /// state = pest::ParserState::new(input);
1250     /// state = state.match_string("ab").unwrap();
1251     /// result = state.end_of_input();
1252     /// assert!(result.is_ok());
1253     /// ```
1254     #[inline]
end_of_input(self: Box<Self>) -> ParseResult<Box<Self>>1255     pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
1256         if self.position.at_end() {
1257             Ok(self)
1258         } else {
1259             Err(self)
1260         }
1261     }
1262 
1263     /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns
1264     /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current
1265     /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err`
1266     /// together, negating the `Result`.
1267     ///
1268     /// # Examples
1269     ///
1270     /// ```
1271     /// # use pest;
1272     /// # #[allow(non_camel_case_types)]
1273     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1274     /// enum Rule {
1275     ///     a
1276     /// }
1277     ///
1278     /// let input = "a";
1279     /// let pairs: Vec<_> = pest::state(input, |state| {
1280     ///     state.lookahead(true, |state| {
1281     ///         state.rule(Rule::a, |s| Ok(s))
1282     ///     })
1283     /// }).unwrap().collect();
1284     ///
1285     /// assert_eq!(pairs.len(), 0);
1286     /// ```
1287     #[inline]
lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,1288     pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
1289     where
1290         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1291     {
1292         self = self.inc_call_check_limit()?;
1293         let initial_lookahead = self.lookahead;
1294 
1295         self.lookahead = if is_positive {
1296             match initial_lookahead {
1297                 Lookahead::None | Lookahead::Positive => Lookahead::Positive,
1298                 Lookahead::Negative => Lookahead::Negative,
1299             }
1300         } else {
1301             match initial_lookahead {
1302                 Lookahead::None | Lookahead::Positive => Lookahead::Negative,
1303                 Lookahead::Negative => Lookahead::Positive,
1304             }
1305         };
1306 
1307         let initial_pos = self.position;
1308 
1309         let result = f(self.checkpoint());
1310 
1311         let result_state = match result {
1312             Ok(mut new_state) => {
1313                 new_state.position = initial_pos;
1314                 new_state.lookahead = initial_lookahead;
1315                 Ok(new_state.restore())
1316             }
1317             Err(mut new_state) => {
1318                 new_state.position = initial_pos;
1319                 new_state.lookahead = initial_lookahead;
1320                 Err(new_state.restore())
1321             }
1322         };
1323 
1324         if is_positive {
1325             result_state
1326         } else {
1327             match result_state {
1328                 Ok(state) => Err(state),
1329                 Err(state) => Ok(state),
1330             }
1331         }
1332     }
1333 
1334     /// Transformation which stops `Token`s from being generated according to `is_atomic`.
1335     /// Used as wrapper over `rule` (or even another `atomic`) call.
1336     ///
1337     /// # Examples
1338     ///
1339     /// ```
1340     /// # use pest::{self, Atomicity};
1341     /// # #[allow(non_camel_case_types)]
1342     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1343     /// enum Rule {
1344     ///     a
1345     /// }
1346     ///
1347     /// let input = "a";
1348     /// let pairs: Vec<_> = pest::state(input, |state| {
1349     ///     state.atomic(Atomicity::Atomic, |s| {
1350     ///         s.rule(Rule::a, |s| Ok(s))
1351     ///     })
1352     /// }).unwrap().collect();
1353     ///
1354     /// assert_eq!(pairs.len(), 0);
1355     /// ```
1356     #[inline]
atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,1357     pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
1358     where
1359         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1360     {
1361         self = self.inc_call_check_limit()?;
1362         // In case child parsing call is another `atomic` it will have its own atomicity status.
1363         let initial_atomicity = self.atomicity;
1364         // In case child atomicity is the same as we've demanded, we shouldn't do nothing.
1365         // E.g. we have the following rules:
1366         // * RootRule = @{ InnerRule }
1367         // * InnerRule = @{ ... }
1368         let should_toggle = self.atomicity != atomicity;
1369 
1370         // Note that we take atomicity of the top rule and not of the leaf (inner).
1371         if should_toggle {
1372             self.atomicity = atomicity;
1373         }
1374 
1375         let result = f(self);
1376 
1377         match result {
1378             Ok(mut new_state) => {
1379                 if should_toggle {
1380                     new_state.atomicity = initial_atomicity;
1381                 }
1382                 Ok(new_state)
1383             }
1384             Err(mut new_state) => {
1385                 if should_toggle {
1386                     new_state.atomicity = initial_atomicity;
1387                 }
1388                 Err(new_state)
1389             }
1390         }
1391     }
1392 
1393     /// Evaluates the result of closure `f` and pushes the span of the input consumed from before
1394     /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is
1395     /// called successfully, or `Err(Box<ParserState>)` otherwise.
1396     ///
1397     /// # Examples
1398     ///
1399     /// ```
1400     /// # use pest;
1401     /// # #[allow(non_camel_case_types)]
1402     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1403     /// enum Rule {}
1404     ///
1405     /// let input = "ab";
1406     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1407     /// let mut result = state.stack_push(|state| state.match_string("a"));
1408     /// assert!(result.is_ok());
1409     /// assert_eq!(result.unwrap().position().pos(), 1);
1410     /// ```
1411     #[inline]
stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,1412     pub fn stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1413     where
1414         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1415     {
1416         self = self.inc_call_check_limit()?;
1417         let start = self.position;
1418 
1419         let result = f(self);
1420 
1421         match result {
1422             Ok(mut state) => {
1423                 let end = state.position;
1424                 state.stack.push(start.span(&end));
1425                 Ok(state)
1426             }
1427             Err(state) => Err(state),
1428         }
1429     }
1430 
1431     /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1432     /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1433     ///
1434     /// # Examples
1435     ///
1436     /// ```
1437     /// # use pest;
1438     /// # #[allow(non_camel_case_types)]
1439     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1440     /// enum Rule {}
1441     ///
1442     /// let input = "aa";
1443     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1444     /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1445     ///     |state| state.stack_peek()
1446     /// );
1447     /// assert!(result.is_ok());
1448     /// assert_eq!(result.unwrap().position().pos(), 2);
1449     /// ```
1450     #[inline]
stack_peek(self: Box<Self>) -> ParseResult<Box<Self>>1451     pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1452         let string = self
1453             .stack
1454             .peek()
1455             .expect("peek was called on empty stack")
1456             .as_str();
1457         self.match_string(string)
1458     }
1459 
1460     /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1461     /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1462     ///
1463     /// # Examples
1464     ///
1465     /// ```
1466     /// # use pest;
1467     /// # #[allow(non_camel_case_types)]
1468     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1469     /// enum Rule {}
1470     ///
1471     /// let input = "aa";
1472     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1473     /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1474     ///     |state| state.stack_pop()
1475     /// );
1476     /// assert!(result.is_ok());
1477     /// assert_eq!(result.unwrap().position().pos(), 2);
1478     /// ```
1479     #[inline]
stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>>1480     pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1481         let string = self
1482             .stack
1483             .pop()
1484             .expect("pop was called on empty stack")
1485             .as_str();
1486         self.match_string(string)
1487     }
1488 
1489     /// Matches part of the state of the stack.
1490     ///
1491     /// # Examples
1492     ///
1493     /// ```
1494     /// # use pest::{self, MatchDir};
1495     /// # #[allow(non_camel_case_types)]
1496     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1497     /// enum Rule {}
1498     ///
1499     /// let input = "abcd cd cb";
1500     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1501     /// let mut result = state
1502     ///     .stack_push(|state| state.match_string("a"))
1503     ///     .and_then(|state| state.stack_push(|state| state.match_string("b")))
1504     ///     .and_then(|state| state.stack_push(|state| state.match_string("c")))
1505     ///     .and_then(|state| state.stack_push(|state| state.match_string("d")))
1506     ///     .and_then(|state| state.match_string(" "))
1507     ///     .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop))
1508     ///     .and_then(|state| state.match_string(" "))
1509     ///     .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom));
1510     /// assert!(result.is_ok());
1511     /// assert_eq!(result.unwrap().position().pos(), 10);
1512     /// ```
1513     #[inline]
stack_match_peek_slice( mut self: Box<Self>, start: i32, end: Option<i32>, match_dir: MatchDir, ) -> ParseResult<Box<Self>>1514     pub fn stack_match_peek_slice(
1515         mut self: Box<Self>,
1516         start: i32,
1517         end: Option<i32>,
1518         match_dir: MatchDir,
1519     ) -> ParseResult<Box<Self>> {
1520         let range = match constrain_idxs(start, end, self.stack.len()) {
1521             Some(r) => r,
1522             None => return Err(self),
1523         };
1524         // return true if an empty sequence is requested
1525         if range.end <= range.start {
1526             return Ok(self);
1527         }
1528 
1529         let mut position = self.position;
1530         let result = {
1531             let mut iter_b2t = self.stack[range].iter();
1532             let matcher = |span: &Span<'_>| position.match_string(span.as_str());
1533             match match_dir {
1534                 MatchDir::BottomToTop => iter_b2t.all(matcher),
1535                 MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
1536             }
1537         };
1538         if result {
1539             self.position = position;
1540             Ok(self)
1541         } else {
1542             Err(self)
1543         }
1544     }
1545 
1546     /// Matches the full state of the stack.
1547     ///
1548     /// # Examples
1549     ///
1550     /// ```
1551     /// # use pest;
1552     /// # #[allow(non_camel_case_types)]
1553     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1554     /// enum Rule {}
1555     ///
1556     /// let input = "abba";
1557     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1558     /// let mut result = state
1559     ///     .stack_push(|state| state.match_string("a"))
1560     ///     .and_then(|state| { state.stack_push(|state| state.match_string("b")) })
1561     ///     .and_then(|state| state.stack_match_peek());
1562     /// assert!(result.is_ok());
1563     /// assert_eq!(result.unwrap().position().pos(), 4);
1564     /// ```
1565     #[inline]
stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>>1566     pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1567         self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
1568     }
1569 
1570     /// Matches the full state of the stack. This method will clear the stack as it evaluates.
1571     ///
1572     /// # Examples
1573     ///
1574     /// ```
1575     /// /// # use pest;
1576     /// # #[allow(non_camel_case_types)]
1577     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1578     /// enum Rule {}
1579     ///
1580     /// let input = "aaaa";
1581     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1582     /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| {
1583     ///     state.stack_push(|state| state.match_string("a"))
1584     /// }).and_then(|state| state.stack_match_peek());
1585     /// assert!(result.is_ok());
1586     /// assert_eq!(result.unwrap().position().pos(), 4);
1587     /// ```
1588     #[inline]
stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>>1589     pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1590         let mut position = self.position;
1591         let mut result = true;
1592         while let Some(span) = self.stack.pop() {
1593             result = position.match_string(span.as_str());
1594             if !result {
1595                 break;
1596             }
1597         }
1598 
1599         if result {
1600             self.position = position;
1601             Ok(self)
1602         } else {
1603             Err(self)
1604         }
1605     }
1606 
1607     /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or
1608     /// `Err(Box<ParserState>)` otherwise.
1609     ///
1610     /// # Examples
1611     ///
1612     /// ```
1613     /// # use pest;
1614     /// # #[allow(non_camel_case_types)]
1615     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1616     /// enum Rule {}
1617     ///
1618     /// let input = "aa";
1619     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1620     /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1621     ///     |state| state.stack_drop()
1622     /// );
1623     /// assert!(result.is_ok());
1624     /// assert_eq!(result.unwrap().position().pos(), 1);
1625     /// ```
1626     #[inline]
stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>>1627     pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1628         match self.stack.pop() {
1629             Some(_) => Ok(self),
1630             None => Err(self),
1631         }
1632     }
1633 
1634     /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently,
1635     /// this method only restores the stack.
1636     ///
1637     /// # Examples
1638     ///
1639     /// ```
1640     /// # use pest;
1641     /// # #[allow(non_camel_case_types)]
1642     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1643     /// enum Rule {}
1644     ///
1645     /// let input = "ab";
1646     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1647     /// let mut result = state.restore_on_err(|state| state.stack_push(|state|
1648     ///     state.match_string("a")).and_then(|state| state.match_string("a"))
1649     /// );
1650     ///
1651     /// assert!(result.is_err());
1652     ///
1653     /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed.
1654     /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop());
1655     /// assert!(catch_panic.is_err());
1656     /// ```
1657     #[inline]
restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,1658     pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1659     where
1660         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1661     {
1662         match f(self.checkpoint()) {
1663             Ok(state) => Ok(state.checkpoint_ok()),
1664             Err(state) => Err(state.restore()),
1665         }
1666     }
1667 
1668     // Mark the current state as a checkpoint and return the `Box`.
1669     #[inline]
checkpoint(mut self: Box<Self>) -> Box<Self>1670     pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
1671         self.stack.snapshot();
1672         self
1673     }
1674 
1675     // The checkpoint was cleared successfully
1676     // so remove it without touching other stack state.
1677     #[inline]
checkpoint_ok(mut self: Box<Self>) -> Box<Self>1678     pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
1679         self.stack.clear_snapshot();
1680         self
1681     }
1682 
1683     // Restore the current state to the most recent checkpoint.
1684     #[inline]
restore(mut self: Box<Self>) -> Box<Self>1685     pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
1686         self.stack.restore();
1687         self
1688     }
1689 }
1690 
1691 /// Helper function used only in case stack operations (PUSH/POP) are used in grammar.
constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>>1692 fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
1693     let start_norm = normalize_index(start, len)?;
1694     let end_norm = end.map_or(Some(len), |e| normalize_index(e, len))?;
1695     Some(start_norm..end_norm)
1696 }
1697 
1698 /// `constrain_idxs` helper function.
1699 /// Normalizes the index using its sequence’s length.
1700 /// Returns `None` if the normalized index is OOB.
normalize_index(i: i32, len: usize) -> Option<usize>1701 fn normalize_index(i: i32, len: usize) -> Option<usize> {
1702     if i > len as i32 {
1703         None
1704     } else if i >= 0 {
1705         Some(i as usize)
1706     } else {
1707         let real_i = len as i32 + i;
1708         if real_i >= 0 {
1709             Some(real_i as usize)
1710         } else {
1711             None
1712         }
1713     }
1714 }
1715 
1716 #[cfg(test)]
1717 mod test {
1718     use super::*;
1719 
1720     #[test]
normalize_index_pos()1721     fn normalize_index_pos() {
1722         assert_eq!(normalize_index(4, 6), Some(4));
1723         assert_eq!(normalize_index(5, 5), Some(5));
1724         assert_eq!(normalize_index(6, 3), None);
1725     }
1726 
1727     #[test]
normalize_index_neg()1728     fn normalize_index_neg() {
1729         assert_eq!(normalize_index(-4, 6), Some(2));
1730         assert_eq!(normalize_index(-5, 5), Some(0));
1731         assert_eq!(normalize_index(-6, 3), None);
1732     }
1733 }
1734