• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 use std::ops::Range;
11 use std::rc::Rc;
12 
13 use error::{Error, ErrorVariant};
14 use iterators::{pairs, QueueableToken};
15 use position::{self, Position};
16 use span::Span;
17 use stack::Stack;
18 use RuleType;
19 
20 /// The current lookahead status of a [`ParserState`].
21 ///
22 /// [`ParserState`]: struct.ParserState.html
23 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
24 pub enum Lookahead {
25     Positive,
26     Negative,
27     None,
28 }
29 
30 /// The current atomicity of a [`ParserState`].
31 ///
32 /// [`ParserState`]: struct.ParserState.html
33 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
34 pub enum Atomicity {
35     Atomic,
36     CompoundAtomic,
37     NonAtomic,
38 }
39 
40 /// Type alias to simplify specifying the return value of chained closures.
41 pub type ParseResult<S> = Result<S, S>;
42 
43 /// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`.
44 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
45 pub enum MatchDir {
46     BottomToTop,
47     TopToBottom,
48 }
49 
50 /// The complete state of a [`Parser`].
51 ///
52 /// [`Parser`]: trait.Parser.html
53 #[derive(Debug)]
54 pub struct ParserState<'i, R: RuleType> {
55     position: Position<'i>,
56     queue: Vec<QueueableToken<R>>,
57     lookahead: Lookahead,
58     pos_attempts: Vec<R>,
59     neg_attempts: Vec<R>,
60     attempt_pos: usize,
61     atomicity: Atomicity,
62     stack: Stack<Span<'i>>,
63 }
64 
65 /// Creates a `ParserState` from a `&str`, supplying it to a closure `f`.
66 ///
67 /// # Examples
68 ///
69 /// ```
70 /// # use pest;
71 /// let input = "";
72 /// pest::state::<(), _>(input, |s| Ok(s)).unwrap();
73 /// ```
state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>> where F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,74 pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>>
75 where
76     F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,
77 {
78     let state = ParserState::new(input);
79 
80     match f(state) {
81         Ok(state) => {
82             let len = state.queue.len();
83             Ok(pairs::new(Rc::new(state.queue), input, 0, len))
84         }
85         Err(mut state) => {
86             state.pos_attempts.sort();
87             state.pos_attempts.dedup();
88             state.neg_attempts.sort();
89             state.neg_attempts.dedup();
90 
91             Err(Error::new_from_pos(
92                 ErrorVariant::ParsingError {
93                     positives: state.pos_attempts.clone(),
94                     negatives: state.neg_attempts.clone(),
95                 },
96                 // TODO(performance): Guarantee state.attempt_pos is a valid position
97                 position::Position::new(input, state.attempt_pos).unwrap(),
98             ))
99         }
100     }
101 }
102 
103 impl<'i, R: RuleType> ParserState<'i, R> {
104     /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box`
105     /// will be passed from closure to closure based on the needs of the specified `Parser`.
106     ///
107     /// # Examples
108     ///
109     /// ```
110     /// # use pest;
111     /// let input = "";
112     /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input);
113     /// ```
114     #[allow(clippy::new_ret_no_self)]
new(input: &'i str) -> Box<Self>115     pub fn new(input: &'i str) -> Box<Self> {
116         Box::new(ParserState {
117             position: Position::from_start(input),
118             queue: vec![],
119             lookahead: Lookahead::None,
120             pos_attempts: vec![],
121             neg_attempts: vec![],
122             attempt_pos: 0,
123             atomicity: Atomicity::NonAtomic,
124             stack: Stack::new(),
125         })
126     }
127 
128     /// Returns a reference to the current `Position` of the `ParserState`.
129     ///
130     /// # Examples
131     ///
132     /// ```
133     /// # use pest;
134     /// # #[allow(non_camel_case_types)]
135     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
136     /// enum Rule {
137     ///     ab
138     /// }
139     ///
140     /// let input = "ab";
141     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
142     /// let position = state.position();
143     /// assert_eq!(position.pos(), 0);
144     /// ```
position(&self) -> &Position<'i>145     pub fn position(&self) -> &Position<'i> {
146         &self.position
147     }
148 
149     /// Returns the current atomicity of the `ParserState`.
150     ///
151     /// # Examples
152     ///
153     /// ```
154     /// # use pest;
155     /// # use pest::Atomicity;
156     /// # #[allow(non_camel_case_types)]
157     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
158     /// enum Rule {
159     ///     ab
160     /// }
161     ///
162     /// let input = "ab";
163     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
164     /// let atomicity = state.atomicity();
165     /// assert_eq!(atomicity, Atomicity::NonAtomic);
166     /// ```
atomicity(&self) -> Atomicity167     pub fn atomicity(&self) -> Atomicity {
168         self.atomicity
169     }
170 
171     /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure
172     /// meant to match the rule.
173     ///
174     /// # Examples
175     ///
176     /// ```
177     /// # use pest;
178     /// # #[allow(non_camel_case_types)]
179     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
180     /// enum Rule {
181     ///     a
182     /// }
183     ///
184     /// let input = "a";
185     /// let pairs: Vec<_> = pest::state(input, |state| {
186     ///     state.rule(Rule::a, |s| Ok(s))
187     /// }).unwrap().collect();
188     ///
189     /// assert_eq!(pairs.len(), 1);
190     /// ```
191     #[inline]
rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,192     pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
193     where
194         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
195     {
196         let actual_pos = self.position.pos();
197         let index = self.queue.len();
198 
199         let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
200             (self.pos_attempts.len(), self.neg_attempts.len())
201         } else {
202             // Attempts have not been cleared yet since the attempt_pos is older.
203             (0, 0)
204         };
205 
206         if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
207             // Pair's position will only be known after running the closure.
208             self.queue.push(QueueableToken::Start {
209                 end_token_index: 0,
210                 input_pos: actual_pos,
211             });
212         }
213 
214         let attempts = self.attempts_at(actual_pos);
215 
216         let result = f(self);
217 
218         match result {
219             Ok(mut new_state) => {
220                 if new_state.lookahead == Lookahead::Negative {
221                     new_state.track(
222                         rule,
223                         actual_pos,
224                         pos_attempts_index,
225                         neg_attempts_index,
226                         attempts,
227                     );
228                 }
229 
230                 if new_state.lookahead == Lookahead::None
231                     && new_state.atomicity != Atomicity::Atomic
232                 {
233                     // Storing the pair's index in the first token that was added before the closure was
234                     // run.
235                     let new_index = new_state.queue.len();
236                     match new_state.queue[index] {
237                         QueueableToken::Start {
238                             ref mut end_token_index,
239                             ..
240                         } => *end_token_index = new_index,
241                         _ => unreachable!(),
242                     };
243 
244                     let new_pos = new_state.position.pos();
245 
246                     new_state.queue.push(QueueableToken::End {
247                         start_token_index: index,
248                         rule,
249                         input_pos: new_pos,
250                     });
251                 }
252 
253                 Ok(new_state)
254             }
255             Err(mut new_state) => {
256                 if new_state.lookahead != Lookahead::Negative {
257                     new_state.track(
258                         rule,
259                         actual_pos,
260                         pos_attempts_index,
261                         neg_attempts_index,
262                         attempts,
263                     );
264                 }
265 
266                 if new_state.lookahead == Lookahead::None
267                     && new_state.atomicity != Atomicity::Atomic
268                 {
269                     new_state.queue.truncate(index);
270                 }
271 
272                 Err(new_state)
273             }
274         }
275     }
276 
attempts_at(&self, pos: usize) -> usize277     fn attempts_at(&self, pos: usize) -> usize {
278         if self.attempt_pos == pos {
279             self.pos_attempts.len() + self.neg_attempts.len()
280         } else {
281             0
282         }
283     }
284 
track( &mut self, rule: R, pos: usize, pos_attempts_index: usize, neg_attempts_index: usize, prev_attempts: usize, )285     fn track(
286         &mut self,
287         rule: R,
288         pos: usize,
289         pos_attempts_index: usize,
290         neg_attempts_index: usize,
291         prev_attempts: usize,
292     ) {
293         if self.atomicity == Atomicity::Atomic {
294             return;
295         }
296 
297         // If nested rules made no progress, there is no use to report them; it's only useful to
298         // track the current rule, the exception being when only one attempt has been made during
299         // the children rules.
300         let curr_attempts = self.attempts_at(pos);
301         if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
302             return;
303         }
304 
305         if pos == self.attempt_pos {
306             self.pos_attempts.truncate(pos_attempts_index);
307             self.neg_attempts.truncate(neg_attempts_index);
308         }
309 
310         if pos > self.attempt_pos {
311             self.pos_attempts.clear();
312             self.neg_attempts.clear();
313             self.attempt_pos = pos;
314         }
315 
316         let attempts = if self.lookahead != Lookahead::Negative {
317             &mut self.pos_attempts
318         } else {
319             &mut self.neg_attempts
320         };
321 
322         if pos == self.attempt_pos {
323             attempts.push(rule);
324         }
325     }
326 
327     /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns
328     /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current
329     /// `Box<ParserState>` otherwise.
330     ///
331     /// This method is useful to parse sequences that only match together which usually come in the
332     /// form of chained `Result`s with
333     /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
334     ///
335     ///
336     /// # Examples
337     ///
338     /// ```
339     /// # use pest;
340     /// # #[allow(non_camel_case_types)]
341     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
342     /// enum Rule {
343     ///     a
344     /// }
345     ///
346     /// let input = "a";
347     /// let pairs: Vec<_> = pest::state(input, |state| {
348     ///     state.sequence(|s| {
349     ///         s.rule(Rule::a, |s| Ok(s)).and_then(|s| {
350     ///             s.match_string("b")
351     ///         })
352     ///     }).or_else(|s| {
353     ///         Ok(s)
354     ///     })
355     /// }).unwrap().collect();
356     ///
357     /// assert_eq!(pairs.len(), 0);
358     /// ```
359     #[inline]
sequence<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,360     pub fn sequence<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
361     where
362         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
363     {
364         let token_index = self.queue.len();
365         let initial_pos = self.position.clone();
366 
367         let result = f(self);
368 
369         match result {
370             Ok(new_state) => Ok(new_state),
371             Err(mut new_state) => {
372                 // Restore the initial position and truncate the token queue.
373                 new_state.position = initial_pos;
374                 new_state.queue.truncate(token_index);
375                 Err(new_state)
376             }
377         }
378     }
379 
380     /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns
381     /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`.
382     ///
383     /// # Examples
384     ///
385     /// ```
386     /// # use pest;
387     /// # #[allow(non_camel_case_types)]
388     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
389     /// enum Rule {
390     ///     ab
391     /// }
392     ///
393     /// let input = "aab";
394     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
395     /// let mut result = state.repeat(|s| {
396     ///     s.match_string("a")
397     /// });
398     /// assert!(result.is_ok());
399     /// assert_eq!(result.unwrap().position().pos(), 2);
400     ///
401     /// state = pest::ParserState::new(input);
402     /// result = state.repeat(|s| {
403     ///     s.match_string("b")
404     /// });
405     /// assert!(result.is_ok());
406     /// assert_eq!(result.unwrap().position().pos(), 0);
407     /// ```
408     #[inline]
repeat<F>(self: Box<Self>, mut f: F) -> ParseResult<Box<Self>> where F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,409     pub fn repeat<F>(self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
410     where
411         F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
412     {
413         let mut result = f(self);
414 
415         loop {
416             match result {
417                 Ok(state) => result = f(state),
418                 Err(state) => return Ok(state),
419             };
420         }
421     }
422 
423     /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns
424     /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`.
425     ///
426     /// # Examples
427     ///
428     /// ```
429     /// # use pest;
430     /// # #[allow(non_camel_case_types)]
431     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
432     /// enum Rule {
433     ///     ab
434     /// }
435     ///
436     /// let input = "ab";
437     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
438     /// let result = state.optional(|s| {
439     ///     s.match_string("ab")
440     /// });
441     /// assert!(result.is_ok());
442     ///
443     /// state = pest::ParserState::new(input);
444     /// let result = state.optional(|s| {
445     ///     s.match_string("ac")
446     /// });
447     /// assert!(result.is_ok());
448     /// ```
449     #[inline]
optional<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,450     pub fn optional<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
451     where
452         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
453     {
454         match f(self) {
455             Ok(state) | Err(state) => Ok(state),
456         }
457     }
458 
459     /// Attempts to match a single character based on a filter function. Returns `Ok` with the
460     /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>`
461     /// otherwise.
462     ///
463     /// # Examples
464     ///
465     /// ```
466     /// # use pest;
467     /// # #[allow(non_camel_case_types)]
468     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
469     /// enum Rule {}
470     ///
471     /// let input = "ab";
472     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
473     /// let result = state.match_char_by(|c| c.is_ascii());
474     /// assert!(result.is_ok());
475     /// assert_eq!(result.unwrap().position().pos(), 1);
476     ///
477     /// let input = "❤";
478     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
479     /// let result = state.match_char_by(|c| c.is_ascii());
480     /// assert!(result.is_err());
481     /// assert_eq!(result.unwrap_err().position().pos(), 0);
482     /// ```
483     #[inline]
match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(char) -> bool,484     pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
485     where
486         F: FnOnce(char) -> bool,
487     {
488         if self.position.match_char_by(f) {
489             Ok(self)
490         } else {
491             Err(self)
492         }
493     }
494 
495     /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if
496     /// successful, or `Err` with the updated `Box<ParserState>` otherwise.
497     ///
498     /// # Examples
499     ///
500     /// ```
501     /// # use pest;
502     /// # #[allow(non_camel_case_types)]
503     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
504     /// enum Rule {}
505     ///
506     /// let input = "ab";
507     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
508     /// let mut result = state.match_string("ab");
509     /// assert!(result.is_ok());
510     /// assert_eq!(result.unwrap().position().pos(), 2);
511     ///
512     /// state = pest::ParserState::new(input);
513     /// result = state.match_string("ac");
514     /// assert!(result.is_err());
515     /// assert_eq!(result.unwrap_err().position().pos(), 0);
516     /// ```
517     #[inline]
match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>>518     pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
519         if self.position.match_string(string) {
520             Ok(self)
521         } else {
522             Err(self)
523         }
524     }
525 
526     /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated
527     /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
528     ///
529     /// # Examples
530     ///
531     /// ```
532     /// # use pest;
533     /// # #[allow(non_camel_case_types)]
534     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
535     /// enum Rule {}
536     ///
537     /// let input = "ab";
538     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
539     /// let mut result = state.match_insensitive("AB");
540     /// assert!(result.is_ok());
541     /// assert_eq!(result.unwrap().position().pos(), 2);
542     ///
543     /// state = pest::ParserState::new(input);
544     /// result = state.match_insensitive("AC");
545     /// assert!(result.is_err());
546     /// assert_eq!(result.unwrap_err().position().pos(), 0);
547     /// ```
548     #[inline]
match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>>549     pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
550         if self.position.match_insensitive(string) {
551             Ok(self)
552         } else {
553             Err(self)
554         }
555     }
556 
557     /// Attempts to match a single character from the given range. Returns `Ok` with the updated
558     /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
559     ///
560     /// # Examples
561     ///
562     /// ```
563     /// # use pest;
564     /// # #[allow(non_camel_case_types)]
565     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
566     /// enum Rule {}
567     ///
568     /// let input = "ab";
569     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
570     /// let mut result = state.match_range('a'..'z');
571     /// assert!(result.is_ok());
572     /// assert_eq!(result.unwrap().position().pos(), 1);
573     ///
574     /// state = pest::ParserState::new(input);
575     /// result = state.match_range('A'..'Z');
576     /// assert!(result.is_err());
577     /// assert_eq!(result.unwrap_err().position().pos(), 0);
578     /// ```
579     #[inline]
match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>>580     pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
581         if self.position.match_range(range) {
582             Ok(self)
583         } else {
584             Err(self)
585         }
586     }
587 
588     /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>`
589     /// if successful, or `Err` with the updated `Box<ParserState>` otherwise.
590     ///
591     /// # Examples
592     ///
593     /// ```
594     /// # use pest;
595     /// # #[allow(non_camel_case_types)]
596     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
597     /// enum Rule {}
598     ///
599     /// let input = "ab";
600     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
601     /// let mut result = state.skip(1);
602     /// assert!(result.is_ok());
603     /// assert_eq!(result.unwrap().position().pos(), 1);
604     ///
605     /// state = pest::ParserState::new(input);
606     /// result = state.skip(3);
607     /// assert!(result.is_err());
608     /// assert_eq!(result.unwrap_err().position().pos(), 0);
609     /// ```
610     #[inline]
skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>>611     pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
612         if self.position.skip(n) {
613             Ok(self)
614         } else {
615             Err(self)
616         }
617     }
618 
619     /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the
620     /// updated `Box<ParserState>` whether or not one of the strings is found.
621     ///
622     /// # Examples
623     ///
624     /// ```
625     /// # use pest;
626     /// # #[allow(non_camel_case_types)]
627     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
628     /// enum Rule {}
629     ///
630     /// let input = "abcd";
631     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
632     /// let mut result = state.skip_until(&["c", "d"]);
633     /// assert!(result.is_ok());
634     /// assert_eq!(result.unwrap().position().pos(), 2);
635     /// ```
636     #[inline]
skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>>637     pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
638         self.position.skip_until(strings);
639         Ok(self)
640     }
641 
642     /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>`
643     /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise.
644     ///
645     /// # Examples
646     ///
647     /// ```
648     /// # use pest;
649     /// # #[allow(non_camel_case_types)]
650     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
651     /// enum Rule {}
652     ///
653     /// let input = "ab";
654     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
655     /// let mut result = state.start_of_input();
656     /// assert!(result.is_ok());
657     ///
658     /// state = pest::ParserState::new(input);
659     /// state = state.match_string("ab").unwrap();
660     /// result = state.start_of_input();
661     /// assert!(result.is_err());
662     /// ```
663     #[inline]
start_of_input(self: Box<Self>) -> ParseResult<Box<Self>>664     pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
665         if self.position.at_start() {
666             Ok(self)
667         } else {
668             Err(self)
669         }
670     }
671 
672     /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if
673     /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise.
674     ///
675     /// # Examples
676     ///
677     /// ```
678     /// # use pest;
679     /// # #[allow(non_camel_case_types)]
680     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
681     /// enum Rule {}
682     ///
683     /// let input = "ab";
684     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
685     /// let mut result = state.end_of_input();
686     /// assert!(result.is_err());
687     ///
688     /// state = pest::ParserState::new(input);
689     /// state = state.match_string("ab").unwrap();
690     /// result = state.end_of_input();
691     /// assert!(result.is_ok());
692     /// ```
693     #[inline]
end_of_input(self: Box<Self>) -> ParseResult<Box<Self>>694     pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
695         if self.position.at_end() {
696             Ok(self)
697         } else {
698             Err(self)
699         }
700     }
701 
702     /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns
703     /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current
704     /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err`
705     /// together, negating the `Result`.
706     ///
707     /// # Examples
708     ///
709     /// ```
710     /// # use pest;
711     /// # #[allow(non_camel_case_types)]
712     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
713     /// enum Rule {
714     ///     a
715     /// }
716     ///
717     /// let input = "a";
718     /// let pairs: Vec<_> = pest::state(input, |state| {
719     ///     state.lookahead(true, |state| {
720     ///         state.rule(Rule::a, |s| Ok(s))
721     ///     })
722     /// }).unwrap().collect();
723     ///
724     /// assert_eq!(pairs.len(), 0);
725     /// ```
726     #[inline]
lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,727     pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
728     where
729         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
730     {
731         let initial_lookahead = self.lookahead;
732 
733         self.lookahead = if is_positive {
734             match initial_lookahead {
735                 Lookahead::None | Lookahead::Positive => Lookahead::Positive,
736                 Lookahead::Negative => Lookahead::Negative,
737             }
738         } else {
739             match initial_lookahead {
740                 Lookahead::None | Lookahead::Positive => Lookahead::Negative,
741                 Lookahead::Negative => Lookahead::Positive,
742             }
743         };
744 
745         let initial_pos = self.position.clone();
746 
747         let result = f(self.checkpoint());
748 
749         let result_state = match result {
750             Ok(mut new_state) => {
751                 new_state.position = initial_pos;
752                 new_state.lookahead = initial_lookahead;
753                 Ok(new_state.restore())
754             }
755             Err(mut new_state) => {
756                 new_state.position = initial_pos;
757                 new_state.lookahead = initial_lookahead;
758                 Err(new_state.restore())
759             }
760         };
761 
762         if is_positive {
763             result_state
764         } else {
765             match result_state {
766                 Ok(state) => Err(state),
767                 Err(state) => Ok(state),
768             }
769         }
770     }
771 
772     /// Transformation which stops `Token`s from being generated according to `is_atomic`.
773     ///
774     /// # Examples
775     ///
776     /// ```
777     /// # use pest::{self, Atomicity};
778     /// # #[allow(non_camel_case_types)]
779     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
780     /// enum Rule {
781     ///     a
782     /// }
783     ///
784     /// let input = "a";
785     /// let pairs: Vec<_> = pest::state(input, |state| {
786     ///     state.atomic(Atomicity::Atomic, |s| {
787     ///         s.rule(Rule::a, |s| Ok(s))
788     ///     })
789     /// }).unwrap().collect();
790     ///
791     /// assert_eq!(pairs.len(), 0);
792     /// ```
793     #[inline]
atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,794     pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
795     where
796         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
797     {
798         let initial_atomicity = self.atomicity;
799         let should_toggle = self.atomicity != atomicity;
800 
801         if should_toggle {
802             self.atomicity = atomicity;
803         }
804 
805         let result = f(self);
806 
807         match result {
808             Ok(mut new_state) => {
809                 if should_toggle {
810                     new_state.atomicity = initial_atomicity;
811                 }
812                 Ok(new_state)
813             }
814             Err(mut new_state) => {
815                 if should_toggle {
816                     new_state.atomicity = initial_atomicity;
817                 }
818                 Err(new_state)
819             }
820         }
821     }
822 
823     /// Evaluates the result of closure `f` and pushes the span of the input consumed from before
824     /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is
825     /// called successfully, or `Err(Box<ParserState>)` otherwise.
826     ///
827     /// # Examples
828     ///
829     /// ```
830     /// # use pest;
831     /// # #[allow(non_camel_case_types)]
832     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
833     /// enum Rule {}
834     ///
835     /// let input = "ab";
836     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
837     /// let mut result = state.stack_push(|state| state.match_string("a"));
838     /// assert!(result.is_ok());
839     /// assert_eq!(result.unwrap().position().pos(), 1);
840     /// ```
841     #[inline]
stack_push<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,842     pub fn stack_push<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
843     where
844         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
845     {
846         let start = self.position.clone();
847 
848         let result = f(self);
849 
850         match result {
851             Ok(mut state) => {
852                 let end = state.position.clone();
853                 state.stack.push(start.span(&end));
854                 Ok(state)
855             }
856             Err(state) => Err(state),
857         }
858     }
859 
860     /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
861     /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
862     ///
863     /// # Examples
864     ///
865     /// ```
866     /// # use pest;
867     /// # #[allow(non_camel_case_types)]
868     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
869     /// enum Rule {}
870     ///
871     /// let input = "aa";
872     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
873     /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
874     ///     |state| state.stack_peek()
875     /// );
876     /// assert!(result.is_ok());
877     /// assert_eq!(result.unwrap().position().pos(), 2);
878     /// ```
879     #[inline]
stack_peek(self: Box<Self>) -> ParseResult<Box<Self>>880     pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
881         let string = self
882             .stack
883             .peek()
884             .expect("peek was called on empty stack")
885             .as_str();
886         self.match_string(string)
887     }
888 
889     /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
890     /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
891     ///
892     /// # Examples
893     ///
894     /// ```
895     /// # use pest;
896     /// # #[allow(non_camel_case_types)]
897     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
898     /// enum Rule {}
899     ///
900     /// let input = "aa";
901     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
902     /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
903     ///     |state| state.stack_pop()
904     /// );
905     /// assert!(result.is_ok());
906     /// assert_eq!(result.unwrap().position().pos(), 2);
907     /// ```
908     #[inline]
stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>>909     pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
910         let string = self
911             .stack
912             .pop()
913             .expect("pop was called on empty stack")
914             .as_str();
915         self.match_string(string)
916     }
917 
918     /// Matches part of the state of the stack.
919     ///
920     /// # Examples
921     ///
922     /// ```
923     /// # use pest::{self, MatchDir};
924     /// # #[allow(non_camel_case_types)]
925     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
926     /// enum Rule {}
927     ///
928     /// let input = "abcd cd cb";
929     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
930     /// let mut result = state
931     ///     .stack_push(|state| state.match_string("a"))
932     ///     .and_then(|state| state.stack_push(|state| state.match_string("b")))
933     ///     .and_then(|state| state.stack_push(|state| state.match_string("c")))
934     ///     .and_then(|state| state.stack_push(|state| state.match_string("d")))
935     ///     .and_then(|state| state.match_string(" "))
936     ///     .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop))
937     ///     .and_then(|state| state.match_string(" "))
938     ///     .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom));
939     /// assert!(result.is_ok());
940     /// assert_eq!(result.unwrap().position().pos(), 10);
941     /// ```
942     #[inline]
stack_match_peek_slice( mut self: Box<Self>, start: i32, end: Option<i32>, match_dir: MatchDir, ) -> ParseResult<Box<Self>>943     pub fn stack_match_peek_slice(
944         mut self: Box<Self>,
945         start: i32,
946         end: Option<i32>,
947         match_dir: MatchDir,
948     ) -> ParseResult<Box<Self>> {
949         let range = match constrain_idxs(start, end, self.stack.len()) {
950             Some(r) => r,
951             None => return Err(self),
952         };
953         // return true if an empty sequence is requested
954         if range.end <= range.start {
955             return Ok(self);
956         }
957 
958         let mut position = self.position.clone();
959         let result = {
960             let mut iter_b2t = self.stack[range].iter();
961             let matcher = |span: &Span| position.match_string(span.as_str());
962             match match_dir {
963                 MatchDir::BottomToTop => iter_b2t.all(matcher),
964                 MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
965             }
966         };
967         if result {
968             self.position = position;
969             Ok(self)
970         } else {
971             Err(self)
972         }
973     }
974 
975     /// Matches the full state of the stack.
976     ///
977     /// # Examples
978     ///
979     /// ```
980     /// # use pest;
981     /// # #[allow(non_camel_case_types)]
982     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
983     /// enum Rule {}
984     ///
985     /// let input = "abba";
986     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
987     /// let mut result = state
988     ///     .stack_push(|state| state.match_string("a"))
989     ///     .and_then(|state| { state.stack_push(|state| state.match_string("b")) })
990     ///     .and_then(|state| state.stack_match_peek());
991     /// assert!(result.is_ok());
992     /// assert_eq!(result.unwrap().position().pos(), 4);
993     /// ```
994     #[inline]
stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>>995     pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
996         self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
997     }
998 
999     /// Matches the full state of the stack. This method will clear the stack as it evaluates.
1000     ///
1001     /// # Examples
1002     ///
1003     /// ```
1004     /// /// # use pest;
1005     /// # #[allow(non_camel_case_types)]
1006     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1007     /// enum Rule {}
1008     ///
1009     /// let input = "aaaa";
1010     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
1011     /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| {
1012     ///     state.stack_push(|state| state.match_string("a"))
1013     /// }).and_then(|state| state.stack_match_peek());
1014     /// assert!(result.is_ok());
1015     /// assert_eq!(result.unwrap().position().pos(), 4);
1016     /// ```
1017     #[inline]
stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>>1018     pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1019         let mut position = self.position.clone();
1020         let mut result = true;
1021         while let Some(span) = self.stack.pop() {
1022             result = position.match_string(span.as_str());
1023             if !result {
1024                 break;
1025             }
1026         }
1027 
1028         if result {
1029             self.position = position;
1030             Ok(self)
1031         } else {
1032             Err(self)
1033         }
1034     }
1035 
1036     /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or
1037     /// `Err(Box<ParserState>)` otherwise.
1038     ///
1039     /// # Examples
1040     ///
1041     /// ```
1042     /// # use pest;
1043     /// # #[allow(non_camel_case_types)]
1044     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1045     /// enum Rule {}
1046     ///
1047     /// let input = "aa";
1048     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
1049     /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1050     ///     |state| state.stack_drop()
1051     /// );
1052     /// assert!(result.is_ok());
1053     /// assert_eq!(result.unwrap().position().pos(), 1);
1054     /// ```
1055     #[inline]
stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>>1056     pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1057         match self.stack.pop() {
1058             Some(_) => Ok(self),
1059             None => Err(self),
1060         }
1061     }
1062 
1063     /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently,
1064     /// this method only restores the stack.
1065     ///
1066     /// # Examples
1067     ///
1068     /// ```
1069     /// # use pest;
1070     /// # #[allow(non_camel_case_types)]
1071     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1072     /// enum Rule {}
1073     ///
1074     /// let input = "ab";
1075     /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
1076     /// let mut result = state.restore_on_err(|state| state.stack_push(|state|
1077     ///     state.match_string("a")).and_then(|state| state.match_string("a"))
1078     /// );
1079     ///
1080     /// assert!(result.is_err());
1081     ///
1082     /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed.
1083     /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop());
1084     /// assert!(catch_panic.is_err());
1085     /// ```
1086     #[inline]
restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,1087     pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1088     where
1089         F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1090     {
1091         match f(self.checkpoint()) {
1092             Ok(state) => Ok(state.checkpoint_ok()),
1093             Err(state) => Err(state.restore()),
1094         }
1095     }
1096 
1097     // Mark the current state as a checkpoint and return the `Box`.
1098     #[inline]
checkpoint(mut self: Box<Self>) -> Box<Self>1099     pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
1100         self.stack.snapshot();
1101         self
1102     }
1103 
1104     // The checkpoint was cleared successfully
1105     // so remove it without touching other stack state.
1106     #[inline]
checkpoint_ok(mut self: Box<Self>) -> Box<Self>1107     pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
1108         self.stack.clear_snapshot();
1109         self
1110     }
1111 
1112     // Restore the current state to the most recent checkpoint.
1113     #[inline]
restore(mut self: Box<Self>) -> Box<Self>1114     pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
1115         self.stack.restore();
1116         self
1117     }
1118 }
1119 
constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>>1120 fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
1121     let start_norm = normalize_index(start, len)?;
1122     let end_norm = end.map_or(Some(len), |e| normalize_index(e, len))?;
1123     Some(start_norm..end_norm)
1124 }
1125 
1126 /// Normalizes the index using its sequence’s length.
1127 /// Returns `None` if the normalized index is OOB.
normalize_index(i: i32, len: usize) -> Option<usize>1128 fn normalize_index(i: i32, len: usize) -> Option<usize> {
1129     if i > len as i32 {
1130         None
1131     } else if i >= 0 {
1132         Some(i as usize)
1133     } else {
1134         let real_i = len as i32 + i;
1135         if real_i >= 0 {
1136             Some(real_i as usize)
1137         } else {
1138             None
1139         }
1140     }
1141 }
1142 
1143 #[cfg(test)]
1144 mod test {
1145     use super::*;
1146 
1147     #[test]
normalize_index_pos()1148     fn normalize_index_pos() {
1149         assert_eq!(normalize_index(4, 6), Some(4));
1150         assert_eq!(normalize_index(5, 5), Some(5));
1151         assert_eq!(normalize_index(6, 3), None);
1152     }
1153 
1154     #[test]
normalize_index_neg()1155     fn normalize_index_neg() {
1156         assert_eq!(normalize_index(-4, 6), Some(2));
1157         assert_eq!(normalize_index(-5, 5), Some(0));
1158         assert_eq!(normalize_index(-6, 3), None);
1159     }
1160 }
1161