1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9
10 use std::ops::Range;
11 use std::rc::Rc;
12
13 use error::{Error, ErrorVariant};
14 use iterators::{pairs, QueueableToken};
15 use position::{self, Position};
16 use span::Span;
17 use stack::Stack;
18 use RuleType;
19
20 /// The current lookahead status of a [`ParserState`].
21 ///
22 /// [`ParserState`]: struct.ParserState.html
23 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
24 pub enum Lookahead {
25 Positive,
26 Negative,
27 None,
28 }
29
30 /// The current atomicity of a [`ParserState`].
31 ///
32 /// [`ParserState`]: struct.ParserState.html
33 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
34 pub enum Atomicity {
35 Atomic,
36 CompoundAtomic,
37 NonAtomic,
38 }
39
40 /// Type alias to simplify specifying the return value of chained closures.
41 pub type ParseResult<S> = Result<S, S>;
42
43 /// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`.
44 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
45 pub enum MatchDir {
46 BottomToTop,
47 TopToBottom,
48 }
49
50 /// The complete state of a [`Parser`].
51 ///
52 /// [`Parser`]: trait.Parser.html
53 #[derive(Debug)]
54 pub struct ParserState<'i, R: RuleType> {
55 position: Position<'i>,
56 queue: Vec<QueueableToken<R>>,
57 lookahead: Lookahead,
58 pos_attempts: Vec<R>,
59 neg_attempts: Vec<R>,
60 attempt_pos: usize,
61 atomicity: Atomicity,
62 stack: Stack<Span<'i>>,
63 }
64
65 /// Creates a `ParserState` from a `&str`, supplying it to a closure `f`.
66 ///
67 /// # Examples
68 ///
69 /// ```
70 /// # use pest;
71 /// let input = "";
72 /// pest::state::<(), _>(input, |s| Ok(s)).unwrap();
73 /// ```
state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>> where F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,74 pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>>
75 where
76 F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,
77 {
78 let state = ParserState::new(input);
79
80 match f(state) {
81 Ok(state) => {
82 let len = state.queue.len();
83 Ok(pairs::new(Rc::new(state.queue), input, 0, len))
84 }
85 Err(mut state) => {
86 state.pos_attempts.sort();
87 state.pos_attempts.dedup();
88 state.neg_attempts.sort();
89 state.neg_attempts.dedup();
90
91 Err(Error::new_from_pos(
92 ErrorVariant::ParsingError {
93 positives: state.pos_attempts.clone(),
94 negatives: state.neg_attempts.clone(),
95 },
96 // TODO(performance): Guarantee state.attempt_pos is a valid position
97 position::Position::new(input, state.attempt_pos).unwrap(),
98 ))
99 }
100 }
101 }
102
103 impl<'i, R: RuleType> ParserState<'i, R> {
104 /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box`
105 /// will be passed from closure to closure based on the needs of the specified `Parser`.
106 ///
107 /// # Examples
108 ///
109 /// ```
110 /// # use pest;
111 /// let input = "";
112 /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input);
113 /// ```
114 #[allow(clippy::new_ret_no_self)]
new(input: &'i str) -> Box<Self>115 pub fn new(input: &'i str) -> Box<Self> {
116 Box::new(ParserState {
117 position: Position::from_start(input),
118 queue: vec![],
119 lookahead: Lookahead::None,
120 pos_attempts: vec![],
121 neg_attempts: vec![],
122 attempt_pos: 0,
123 atomicity: Atomicity::NonAtomic,
124 stack: Stack::new(),
125 })
126 }
127
128 /// Returns a reference to the current `Position` of the `ParserState`.
129 ///
130 /// # Examples
131 ///
132 /// ```
133 /// # use pest;
134 /// # #[allow(non_camel_case_types)]
135 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
136 /// enum Rule {
137 /// ab
138 /// }
139 ///
140 /// let input = "ab";
141 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
142 /// let position = state.position();
143 /// assert_eq!(position.pos(), 0);
144 /// ```
position(&self) -> &Position<'i>145 pub fn position(&self) -> &Position<'i> {
146 &self.position
147 }
148
149 /// Returns the current atomicity of the `ParserState`.
150 ///
151 /// # Examples
152 ///
153 /// ```
154 /// # use pest;
155 /// # use pest::Atomicity;
156 /// # #[allow(non_camel_case_types)]
157 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
158 /// enum Rule {
159 /// ab
160 /// }
161 ///
162 /// let input = "ab";
163 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
164 /// let atomicity = state.atomicity();
165 /// assert_eq!(atomicity, Atomicity::NonAtomic);
166 /// ```
atomicity(&self) -> Atomicity167 pub fn atomicity(&self) -> Atomicity {
168 self.atomicity
169 }
170
171 /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure
172 /// meant to match the rule.
173 ///
174 /// # Examples
175 ///
176 /// ```
177 /// # use pest;
178 /// # #[allow(non_camel_case_types)]
179 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
180 /// enum Rule {
181 /// a
182 /// }
183 ///
184 /// let input = "a";
185 /// let pairs: Vec<_> = pest::state(input, |state| {
186 /// state.rule(Rule::a, |s| Ok(s))
187 /// }).unwrap().collect();
188 ///
189 /// assert_eq!(pairs.len(), 1);
190 /// ```
191 #[inline]
rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,192 pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
193 where
194 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
195 {
196 let actual_pos = self.position.pos();
197 let index = self.queue.len();
198
199 let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
200 (self.pos_attempts.len(), self.neg_attempts.len())
201 } else {
202 // Attempts have not been cleared yet since the attempt_pos is older.
203 (0, 0)
204 };
205
206 if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
207 // Pair's position will only be known after running the closure.
208 self.queue.push(QueueableToken::Start {
209 end_token_index: 0,
210 input_pos: actual_pos,
211 });
212 }
213
214 let attempts = self.attempts_at(actual_pos);
215
216 let result = f(self);
217
218 match result {
219 Ok(mut new_state) => {
220 if new_state.lookahead == Lookahead::Negative {
221 new_state.track(
222 rule,
223 actual_pos,
224 pos_attempts_index,
225 neg_attempts_index,
226 attempts,
227 );
228 }
229
230 if new_state.lookahead == Lookahead::None
231 && new_state.atomicity != Atomicity::Atomic
232 {
233 // Storing the pair's index in the first token that was added before the closure was
234 // run.
235 let new_index = new_state.queue.len();
236 match new_state.queue[index] {
237 QueueableToken::Start {
238 ref mut end_token_index,
239 ..
240 } => *end_token_index = new_index,
241 _ => unreachable!(),
242 };
243
244 let new_pos = new_state.position.pos();
245
246 new_state.queue.push(QueueableToken::End {
247 start_token_index: index,
248 rule,
249 input_pos: new_pos,
250 });
251 }
252
253 Ok(new_state)
254 }
255 Err(mut new_state) => {
256 if new_state.lookahead != Lookahead::Negative {
257 new_state.track(
258 rule,
259 actual_pos,
260 pos_attempts_index,
261 neg_attempts_index,
262 attempts,
263 );
264 }
265
266 if new_state.lookahead == Lookahead::None
267 && new_state.atomicity != Atomicity::Atomic
268 {
269 new_state.queue.truncate(index);
270 }
271
272 Err(new_state)
273 }
274 }
275 }
276
attempts_at(&self, pos: usize) -> usize277 fn attempts_at(&self, pos: usize) -> usize {
278 if self.attempt_pos == pos {
279 self.pos_attempts.len() + self.neg_attempts.len()
280 } else {
281 0
282 }
283 }
284
track( &mut self, rule: R, pos: usize, pos_attempts_index: usize, neg_attempts_index: usize, prev_attempts: usize, )285 fn track(
286 &mut self,
287 rule: R,
288 pos: usize,
289 pos_attempts_index: usize,
290 neg_attempts_index: usize,
291 prev_attempts: usize,
292 ) {
293 if self.atomicity == Atomicity::Atomic {
294 return;
295 }
296
297 // If nested rules made no progress, there is no use to report them; it's only useful to
298 // track the current rule, the exception being when only one attempt has been made during
299 // the children rules.
300 let curr_attempts = self.attempts_at(pos);
301 if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
302 return;
303 }
304
305 if pos == self.attempt_pos {
306 self.pos_attempts.truncate(pos_attempts_index);
307 self.neg_attempts.truncate(neg_attempts_index);
308 }
309
310 if pos > self.attempt_pos {
311 self.pos_attempts.clear();
312 self.neg_attempts.clear();
313 self.attempt_pos = pos;
314 }
315
316 let attempts = if self.lookahead != Lookahead::Negative {
317 &mut self.pos_attempts
318 } else {
319 &mut self.neg_attempts
320 };
321
322 if pos == self.attempt_pos {
323 attempts.push(rule);
324 }
325 }
326
327 /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns
328 /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current
329 /// `Box<ParserState>` otherwise.
330 ///
331 /// This method is useful to parse sequences that only match together which usually come in the
332 /// form of chained `Result`s with
333 /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
334 ///
335 ///
336 /// # Examples
337 ///
338 /// ```
339 /// # use pest;
340 /// # #[allow(non_camel_case_types)]
341 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
342 /// enum Rule {
343 /// a
344 /// }
345 ///
346 /// let input = "a";
347 /// let pairs: Vec<_> = pest::state(input, |state| {
348 /// state.sequence(|s| {
349 /// s.rule(Rule::a, |s| Ok(s)).and_then(|s| {
350 /// s.match_string("b")
351 /// })
352 /// }).or_else(|s| {
353 /// Ok(s)
354 /// })
355 /// }).unwrap().collect();
356 ///
357 /// assert_eq!(pairs.len(), 0);
358 /// ```
359 #[inline]
sequence<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,360 pub fn sequence<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
361 where
362 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
363 {
364 let token_index = self.queue.len();
365 let initial_pos = self.position.clone();
366
367 let result = f(self);
368
369 match result {
370 Ok(new_state) => Ok(new_state),
371 Err(mut new_state) => {
372 // Restore the initial position and truncate the token queue.
373 new_state.position = initial_pos;
374 new_state.queue.truncate(token_index);
375 Err(new_state)
376 }
377 }
378 }
379
380 /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns
381 /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`.
382 ///
383 /// # Examples
384 ///
385 /// ```
386 /// # use pest;
387 /// # #[allow(non_camel_case_types)]
388 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
389 /// enum Rule {
390 /// ab
391 /// }
392 ///
393 /// let input = "aab";
394 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
395 /// let mut result = state.repeat(|s| {
396 /// s.match_string("a")
397 /// });
398 /// assert!(result.is_ok());
399 /// assert_eq!(result.unwrap().position().pos(), 2);
400 ///
401 /// state = pest::ParserState::new(input);
402 /// result = state.repeat(|s| {
403 /// s.match_string("b")
404 /// });
405 /// assert!(result.is_ok());
406 /// assert_eq!(result.unwrap().position().pos(), 0);
407 /// ```
408 #[inline]
repeat<F>(self: Box<Self>, mut f: F) -> ParseResult<Box<Self>> where F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,409 pub fn repeat<F>(self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
410 where
411 F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
412 {
413 let mut result = f(self);
414
415 loop {
416 match result {
417 Ok(state) => result = f(state),
418 Err(state) => return Ok(state),
419 };
420 }
421 }
422
423 /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns
424 /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`.
425 ///
426 /// # Examples
427 ///
428 /// ```
429 /// # use pest;
430 /// # #[allow(non_camel_case_types)]
431 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
432 /// enum Rule {
433 /// ab
434 /// }
435 ///
436 /// let input = "ab";
437 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
438 /// let result = state.optional(|s| {
439 /// s.match_string("ab")
440 /// });
441 /// assert!(result.is_ok());
442 ///
443 /// state = pest::ParserState::new(input);
444 /// let result = state.optional(|s| {
445 /// s.match_string("ac")
446 /// });
447 /// assert!(result.is_ok());
448 /// ```
449 #[inline]
optional<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,450 pub fn optional<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
451 where
452 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
453 {
454 match f(self) {
455 Ok(state) | Err(state) => Ok(state),
456 }
457 }
458
459 /// Attempts to match a single character based on a filter function. Returns `Ok` with the
460 /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>`
461 /// otherwise.
462 ///
463 /// # Examples
464 ///
465 /// ```
466 /// # use pest;
467 /// # #[allow(non_camel_case_types)]
468 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
469 /// enum Rule {}
470 ///
471 /// let input = "ab";
472 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
473 /// let result = state.match_char_by(|c| c.is_ascii());
474 /// assert!(result.is_ok());
475 /// assert_eq!(result.unwrap().position().pos(), 1);
476 ///
477 /// let input = "❤";
478 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
479 /// let result = state.match_char_by(|c| c.is_ascii());
480 /// assert!(result.is_err());
481 /// assert_eq!(result.unwrap_err().position().pos(), 0);
482 /// ```
483 #[inline]
match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(char) -> bool,484 pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
485 where
486 F: FnOnce(char) -> bool,
487 {
488 if self.position.match_char_by(f) {
489 Ok(self)
490 } else {
491 Err(self)
492 }
493 }
494
495 /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if
496 /// successful, or `Err` with the updated `Box<ParserState>` otherwise.
497 ///
498 /// # Examples
499 ///
500 /// ```
501 /// # use pest;
502 /// # #[allow(non_camel_case_types)]
503 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
504 /// enum Rule {}
505 ///
506 /// let input = "ab";
507 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
508 /// let mut result = state.match_string("ab");
509 /// assert!(result.is_ok());
510 /// assert_eq!(result.unwrap().position().pos(), 2);
511 ///
512 /// state = pest::ParserState::new(input);
513 /// result = state.match_string("ac");
514 /// assert!(result.is_err());
515 /// assert_eq!(result.unwrap_err().position().pos(), 0);
516 /// ```
517 #[inline]
match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>>518 pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
519 if self.position.match_string(string) {
520 Ok(self)
521 } else {
522 Err(self)
523 }
524 }
525
526 /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated
527 /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
528 ///
529 /// # Examples
530 ///
531 /// ```
532 /// # use pest;
533 /// # #[allow(non_camel_case_types)]
534 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
535 /// enum Rule {}
536 ///
537 /// let input = "ab";
538 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
539 /// let mut result = state.match_insensitive("AB");
540 /// assert!(result.is_ok());
541 /// assert_eq!(result.unwrap().position().pos(), 2);
542 ///
543 /// state = pest::ParserState::new(input);
544 /// result = state.match_insensitive("AC");
545 /// assert!(result.is_err());
546 /// assert_eq!(result.unwrap_err().position().pos(), 0);
547 /// ```
548 #[inline]
match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>>549 pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
550 if self.position.match_insensitive(string) {
551 Ok(self)
552 } else {
553 Err(self)
554 }
555 }
556
557 /// Attempts to match a single character from the given range. Returns `Ok` with the updated
558 /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
559 ///
560 /// # Examples
561 ///
562 /// ```
563 /// # use pest;
564 /// # #[allow(non_camel_case_types)]
565 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
566 /// enum Rule {}
567 ///
568 /// let input = "ab";
569 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
570 /// let mut result = state.match_range('a'..'z');
571 /// assert!(result.is_ok());
572 /// assert_eq!(result.unwrap().position().pos(), 1);
573 ///
574 /// state = pest::ParserState::new(input);
575 /// result = state.match_range('A'..'Z');
576 /// assert!(result.is_err());
577 /// assert_eq!(result.unwrap_err().position().pos(), 0);
578 /// ```
579 #[inline]
match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>>580 pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
581 if self.position.match_range(range) {
582 Ok(self)
583 } else {
584 Err(self)
585 }
586 }
587
588 /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>`
589 /// if successful, or `Err` with the updated `Box<ParserState>` otherwise.
590 ///
591 /// # Examples
592 ///
593 /// ```
594 /// # use pest;
595 /// # #[allow(non_camel_case_types)]
596 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
597 /// enum Rule {}
598 ///
599 /// let input = "ab";
600 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
601 /// let mut result = state.skip(1);
602 /// assert!(result.is_ok());
603 /// assert_eq!(result.unwrap().position().pos(), 1);
604 ///
605 /// state = pest::ParserState::new(input);
606 /// result = state.skip(3);
607 /// assert!(result.is_err());
608 /// assert_eq!(result.unwrap_err().position().pos(), 0);
609 /// ```
610 #[inline]
skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>>611 pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
612 if self.position.skip(n) {
613 Ok(self)
614 } else {
615 Err(self)
616 }
617 }
618
619 /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the
620 /// updated `Box<ParserState>` whether or not one of the strings is found.
621 ///
622 /// # Examples
623 ///
624 /// ```
625 /// # use pest;
626 /// # #[allow(non_camel_case_types)]
627 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
628 /// enum Rule {}
629 ///
630 /// let input = "abcd";
631 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
632 /// let mut result = state.skip_until(&["c", "d"]);
633 /// assert!(result.is_ok());
634 /// assert_eq!(result.unwrap().position().pos(), 2);
635 /// ```
636 #[inline]
skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>>637 pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
638 self.position.skip_until(strings);
639 Ok(self)
640 }
641
642 /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>`
643 /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise.
644 ///
645 /// # Examples
646 ///
647 /// ```
648 /// # use pest;
649 /// # #[allow(non_camel_case_types)]
650 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
651 /// enum Rule {}
652 ///
653 /// let input = "ab";
654 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
655 /// let mut result = state.start_of_input();
656 /// assert!(result.is_ok());
657 ///
658 /// state = pest::ParserState::new(input);
659 /// state = state.match_string("ab").unwrap();
660 /// result = state.start_of_input();
661 /// assert!(result.is_err());
662 /// ```
663 #[inline]
start_of_input(self: Box<Self>) -> ParseResult<Box<Self>>664 pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
665 if self.position.at_start() {
666 Ok(self)
667 } else {
668 Err(self)
669 }
670 }
671
672 /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if
673 /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise.
674 ///
675 /// # Examples
676 ///
677 /// ```
678 /// # use pest;
679 /// # #[allow(non_camel_case_types)]
680 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
681 /// enum Rule {}
682 ///
683 /// let input = "ab";
684 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
685 /// let mut result = state.end_of_input();
686 /// assert!(result.is_err());
687 ///
688 /// state = pest::ParserState::new(input);
689 /// state = state.match_string("ab").unwrap();
690 /// result = state.end_of_input();
691 /// assert!(result.is_ok());
692 /// ```
693 #[inline]
end_of_input(self: Box<Self>) -> ParseResult<Box<Self>>694 pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
695 if self.position.at_end() {
696 Ok(self)
697 } else {
698 Err(self)
699 }
700 }
701
702 /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns
703 /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current
704 /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err`
705 /// together, negating the `Result`.
706 ///
707 /// # Examples
708 ///
709 /// ```
710 /// # use pest;
711 /// # #[allow(non_camel_case_types)]
712 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
713 /// enum Rule {
714 /// a
715 /// }
716 ///
717 /// let input = "a";
718 /// let pairs: Vec<_> = pest::state(input, |state| {
719 /// state.lookahead(true, |state| {
720 /// state.rule(Rule::a, |s| Ok(s))
721 /// })
722 /// }).unwrap().collect();
723 ///
724 /// assert_eq!(pairs.len(), 0);
725 /// ```
726 #[inline]
lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,727 pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
728 where
729 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
730 {
731 let initial_lookahead = self.lookahead;
732
733 self.lookahead = if is_positive {
734 match initial_lookahead {
735 Lookahead::None | Lookahead::Positive => Lookahead::Positive,
736 Lookahead::Negative => Lookahead::Negative,
737 }
738 } else {
739 match initial_lookahead {
740 Lookahead::None | Lookahead::Positive => Lookahead::Negative,
741 Lookahead::Negative => Lookahead::Positive,
742 }
743 };
744
745 let initial_pos = self.position.clone();
746
747 let result = f(self.checkpoint());
748
749 let result_state = match result {
750 Ok(mut new_state) => {
751 new_state.position = initial_pos;
752 new_state.lookahead = initial_lookahead;
753 Ok(new_state.restore())
754 }
755 Err(mut new_state) => {
756 new_state.position = initial_pos;
757 new_state.lookahead = initial_lookahead;
758 Err(new_state.restore())
759 }
760 };
761
762 if is_positive {
763 result_state
764 } else {
765 match result_state {
766 Ok(state) => Err(state),
767 Err(state) => Ok(state),
768 }
769 }
770 }
771
772 /// Transformation which stops `Token`s from being generated according to `is_atomic`.
773 ///
774 /// # Examples
775 ///
776 /// ```
777 /// # use pest::{self, Atomicity};
778 /// # #[allow(non_camel_case_types)]
779 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
780 /// enum Rule {
781 /// a
782 /// }
783 ///
784 /// let input = "a";
785 /// let pairs: Vec<_> = pest::state(input, |state| {
786 /// state.atomic(Atomicity::Atomic, |s| {
787 /// s.rule(Rule::a, |s| Ok(s))
788 /// })
789 /// }).unwrap().collect();
790 ///
791 /// assert_eq!(pairs.len(), 0);
792 /// ```
793 #[inline]
atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,794 pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
795 where
796 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
797 {
798 let initial_atomicity = self.atomicity;
799 let should_toggle = self.atomicity != atomicity;
800
801 if should_toggle {
802 self.atomicity = atomicity;
803 }
804
805 let result = f(self);
806
807 match result {
808 Ok(mut new_state) => {
809 if should_toggle {
810 new_state.atomicity = initial_atomicity;
811 }
812 Ok(new_state)
813 }
814 Err(mut new_state) => {
815 if should_toggle {
816 new_state.atomicity = initial_atomicity;
817 }
818 Err(new_state)
819 }
820 }
821 }
822
823 /// Evaluates the result of closure `f` and pushes the span of the input consumed from before
824 /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is
825 /// called successfully, or `Err(Box<ParserState>)` otherwise.
826 ///
827 /// # Examples
828 ///
829 /// ```
830 /// # use pest;
831 /// # #[allow(non_camel_case_types)]
832 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
833 /// enum Rule {}
834 ///
835 /// let input = "ab";
836 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
837 /// let mut result = state.stack_push(|state| state.match_string("a"));
838 /// assert!(result.is_ok());
839 /// assert_eq!(result.unwrap().position().pos(), 1);
840 /// ```
841 #[inline]
stack_push<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,842 pub fn stack_push<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
843 where
844 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
845 {
846 let start = self.position.clone();
847
848 let result = f(self);
849
850 match result {
851 Ok(mut state) => {
852 let end = state.position.clone();
853 state.stack.push(start.span(&end));
854 Ok(state)
855 }
856 Err(state) => Err(state),
857 }
858 }
859
860 /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
861 /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
862 ///
863 /// # Examples
864 ///
865 /// ```
866 /// # use pest;
867 /// # #[allow(non_camel_case_types)]
868 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
869 /// enum Rule {}
870 ///
871 /// let input = "aa";
872 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
873 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
874 /// |state| state.stack_peek()
875 /// );
876 /// assert!(result.is_ok());
877 /// assert_eq!(result.unwrap().position().pos(), 2);
878 /// ```
879 #[inline]
stack_peek(self: Box<Self>) -> ParseResult<Box<Self>>880 pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
881 let string = self
882 .stack
883 .peek()
884 .expect("peek was called on empty stack")
885 .as_str();
886 self.match_string(string)
887 }
888
889 /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
890 /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
891 ///
892 /// # Examples
893 ///
894 /// ```
895 /// # use pest;
896 /// # #[allow(non_camel_case_types)]
897 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
898 /// enum Rule {}
899 ///
900 /// let input = "aa";
901 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
902 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
903 /// |state| state.stack_pop()
904 /// );
905 /// assert!(result.is_ok());
906 /// assert_eq!(result.unwrap().position().pos(), 2);
907 /// ```
908 #[inline]
stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>>909 pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
910 let string = self
911 .stack
912 .pop()
913 .expect("pop was called on empty stack")
914 .as_str();
915 self.match_string(string)
916 }
917
918 /// Matches part of the state of the stack.
919 ///
920 /// # Examples
921 ///
922 /// ```
923 /// # use pest::{self, MatchDir};
924 /// # #[allow(non_camel_case_types)]
925 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
926 /// enum Rule {}
927 ///
928 /// let input = "abcd cd cb";
929 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
930 /// let mut result = state
931 /// .stack_push(|state| state.match_string("a"))
932 /// .and_then(|state| state.stack_push(|state| state.match_string("b")))
933 /// .and_then(|state| state.stack_push(|state| state.match_string("c")))
934 /// .and_then(|state| state.stack_push(|state| state.match_string("d")))
935 /// .and_then(|state| state.match_string(" "))
936 /// .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop))
937 /// .and_then(|state| state.match_string(" "))
938 /// .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom));
939 /// assert!(result.is_ok());
940 /// assert_eq!(result.unwrap().position().pos(), 10);
941 /// ```
942 #[inline]
stack_match_peek_slice( mut self: Box<Self>, start: i32, end: Option<i32>, match_dir: MatchDir, ) -> ParseResult<Box<Self>>943 pub fn stack_match_peek_slice(
944 mut self: Box<Self>,
945 start: i32,
946 end: Option<i32>,
947 match_dir: MatchDir,
948 ) -> ParseResult<Box<Self>> {
949 let range = match constrain_idxs(start, end, self.stack.len()) {
950 Some(r) => r,
951 None => return Err(self),
952 };
953 // return true if an empty sequence is requested
954 if range.end <= range.start {
955 return Ok(self);
956 }
957
958 let mut position = self.position.clone();
959 let result = {
960 let mut iter_b2t = self.stack[range].iter();
961 let matcher = |span: &Span| position.match_string(span.as_str());
962 match match_dir {
963 MatchDir::BottomToTop => iter_b2t.all(matcher),
964 MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
965 }
966 };
967 if result {
968 self.position = position;
969 Ok(self)
970 } else {
971 Err(self)
972 }
973 }
974
975 /// Matches the full state of the stack.
976 ///
977 /// # Examples
978 ///
979 /// ```
980 /// # use pest;
981 /// # #[allow(non_camel_case_types)]
982 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
983 /// enum Rule {}
984 ///
985 /// let input = "abba";
986 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
987 /// let mut result = state
988 /// .stack_push(|state| state.match_string("a"))
989 /// .and_then(|state| { state.stack_push(|state| state.match_string("b")) })
990 /// .and_then(|state| state.stack_match_peek());
991 /// assert!(result.is_ok());
992 /// assert_eq!(result.unwrap().position().pos(), 4);
993 /// ```
994 #[inline]
stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>>995 pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
996 self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
997 }
998
999 /// Matches the full state of the stack. This method will clear the stack as it evaluates.
1000 ///
1001 /// # Examples
1002 ///
1003 /// ```
1004 /// /// # use pest;
1005 /// # #[allow(non_camel_case_types)]
1006 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1007 /// enum Rule {}
1008 ///
1009 /// let input = "aaaa";
1010 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
1011 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| {
1012 /// state.stack_push(|state| state.match_string("a"))
1013 /// }).and_then(|state| state.stack_match_peek());
1014 /// assert!(result.is_ok());
1015 /// assert_eq!(result.unwrap().position().pos(), 4);
1016 /// ```
1017 #[inline]
stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>>1018 pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1019 let mut position = self.position.clone();
1020 let mut result = true;
1021 while let Some(span) = self.stack.pop() {
1022 result = position.match_string(span.as_str());
1023 if !result {
1024 break;
1025 }
1026 }
1027
1028 if result {
1029 self.position = position;
1030 Ok(self)
1031 } else {
1032 Err(self)
1033 }
1034 }
1035
1036 /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or
1037 /// `Err(Box<ParserState>)` otherwise.
1038 ///
1039 /// # Examples
1040 ///
1041 /// ```
1042 /// # use pest;
1043 /// # #[allow(non_camel_case_types)]
1044 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1045 /// enum Rule {}
1046 ///
1047 /// let input = "aa";
1048 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
1049 /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1050 /// |state| state.stack_drop()
1051 /// );
1052 /// assert!(result.is_ok());
1053 /// assert_eq!(result.unwrap().position().pos(), 1);
1054 /// ```
1055 #[inline]
stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>>1056 pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1057 match self.stack.pop() {
1058 Some(_) => Ok(self),
1059 None => Err(self),
1060 }
1061 }
1062
1063 /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently,
1064 /// this method only restores the stack.
1065 ///
1066 /// # Examples
1067 ///
1068 /// ```
1069 /// # use pest;
1070 /// # #[allow(non_camel_case_types)]
1071 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1072 /// enum Rule {}
1073 ///
1074 /// let input = "ab";
1075 /// let mut state: Box<pest::ParserState<Rule>> = pest::ParserState::new(input);
1076 /// let mut result = state.restore_on_err(|state| state.stack_push(|state|
1077 /// state.match_string("a")).and_then(|state| state.match_string("a"))
1078 /// );
1079 ///
1080 /// assert!(result.is_err());
1081 ///
1082 /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed.
1083 /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop());
1084 /// assert!(catch_panic.is_err());
1085 /// ```
1086 #[inline]
restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>> where F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,1087 pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1088 where
1089 F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1090 {
1091 match f(self.checkpoint()) {
1092 Ok(state) => Ok(state.checkpoint_ok()),
1093 Err(state) => Err(state.restore()),
1094 }
1095 }
1096
1097 // Mark the current state as a checkpoint and return the `Box`.
1098 #[inline]
checkpoint(mut self: Box<Self>) -> Box<Self>1099 pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
1100 self.stack.snapshot();
1101 self
1102 }
1103
1104 // The checkpoint was cleared successfully
1105 // so remove it without touching other stack state.
1106 #[inline]
checkpoint_ok(mut self: Box<Self>) -> Box<Self>1107 pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
1108 self.stack.clear_snapshot();
1109 self
1110 }
1111
1112 // Restore the current state to the most recent checkpoint.
1113 #[inline]
restore(mut self: Box<Self>) -> Box<Self>1114 pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
1115 self.stack.restore();
1116 self
1117 }
1118 }
1119
constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>>1120 fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
1121 let start_norm = normalize_index(start, len)?;
1122 let end_norm = end.map_or(Some(len), |e| normalize_index(e, len))?;
1123 Some(start_norm..end_norm)
1124 }
1125
1126 /// Normalizes the index using its sequence’s length.
1127 /// Returns `None` if the normalized index is OOB.
normalize_index(i: i32, len: usize) -> Option<usize>1128 fn normalize_index(i: i32, len: usize) -> Option<usize> {
1129 if i > len as i32 {
1130 None
1131 } else if i >= 0 {
1132 Some(i as usize)
1133 } else {
1134 let real_i = len as i32 + i;
1135 if real_i >= 0 {
1136 Some(real_i as usize)
1137 } else {
1138 None
1139 }
1140 }
1141 }
1142
1143 #[cfg(test)]
1144 mod test {
1145 use super::*;
1146
1147 #[test]
normalize_index_pos()1148 fn normalize_index_pos() {
1149 assert_eq!(normalize_index(4, 6), Some(4));
1150 assert_eq!(normalize_index(5, 5), Some(5));
1151 assert_eq!(normalize_index(6, 3), None);
1152 }
1153
1154 #[test]
normalize_index_neg()1155 fn normalize_index_neg() {
1156 assert_eq!(normalize_index(-4, 6), Some(2));
1157 assert_eq!(normalize_index(-5, 5), Some(0));
1158 assert_eq!(normalize_index(-6, 3), None);
1159 }
1160 }
1161