• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 use alloc::format;
11 use alloc::rc::Rc;
12 #[cfg(feature = "pretty-print")]
13 use alloc::string::String;
14 use alloc::vec::Vec;
15 use core::borrow::Borrow;
16 use core::fmt;
17 use core::hash::{Hash, Hasher};
18 use core::ptr;
19 use core::str;
20 
21 #[cfg(feature = "pretty-print")]
22 use serde::ser::SerializeStruct;
23 
24 use super::line_index::LineIndex;
25 use super::pairs::{self, Pairs};
26 use super::queueable_token::QueueableToken;
27 use super::tokens::{self, Tokens};
28 use crate::span::Span;
29 use crate::RuleType;
30 
31 /// A matching pair of [`Token`]s and everything between them.
32 ///
33 /// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
34 /// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
35 /// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
36 /// editors.
37 ///
38 /// [`Token`]: ../enum.Token.html
39 #[derive(Clone)]
40 pub struct Pair<'i, R> {
41     queue: Rc<Vec<QueueableToken<'i, R>>>,
42     input: &'i str,
43     /// Token index into `queue`.
44     start: usize,
45     line_index: Rc<LineIndex>,
46 }
47 
new<'i, R: RuleType>( queue: Rc<Vec<QueueableToken<'i, R>>>, input: &'i str, line_index: Rc<LineIndex>, start: usize, ) -> Pair<'i, R>48 pub fn new<'i, R: RuleType>(
49     queue: Rc<Vec<QueueableToken<'i, R>>>,
50     input: &'i str,
51     line_index: Rc<LineIndex>,
52     start: usize,
53 ) -> Pair<'i, R> {
54     Pair {
55         queue,
56         input,
57         start,
58         line_index,
59     }
60 }
61 
62 impl<'i, R: RuleType> Pair<'i, R> {
63     /// Returns the `Rule` of the `Pair`.
64     ///
65     /// # Examples
66     ///
67     /// ```
68     /// # use std::rc::Rc;
69     /// # use pest;
70     /// # #[allow(non_camel_case_types)]
71     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
72     /// enum Rule {
73     ///     a
74     /// }
75     ///
76     /// let input = "";
77     /// let pair = pest::state(input, |state| {
78     ///     // generating Token pair with Rule::a ...
79     /// #     state.rule(Rule::a, |s| Ok(s))
80     /// }).unwrap().next().unwrap();
81     ///
82     /// assert_eq!(pair.as_rule(), Rule::a);
83     /// ```
84     #[inline]
as_rule(&self) -> R85     pub fn as_rule(&self) -> R {
86         match self.queue[self.pair()] {
87             QueueableToken::End { rule, .. } => rule,
88             _ => unreachable!(),
89         }
90     }
91 
92     /// Captures a slice from the `&str` defined by the token `Pair`.
93     ///
94     /// # Examples
95     ///
96     /// ```
97     /// # use std::rc::Rc;
98     /// # use pest;
99     /// # #[allow(non_camel_case_types)]
100     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
101     /// enum Rule {
102     ///     ab
103     /// }
104     ///
105     /// let input = "ab";
106     /// let pair = pest::state(input, |state| {
107     ///     // generating Token pair with Rule::ab ...
108     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
109     /// }).unwrap().next().unwrap();
110     ///
111     /// assert_eq!(pair.as_str(), "ab");
112     /// ```
113     #[inline]
as_str(&self) -> &'i str114     pub fn as_str(&self) -> &'i str {
115         let start = self.pos(self.start);
116         let end = self.pos(self.pair());
117 
118         // Generated positions always come from Positions and are UTF-8 borders.
119         &self.input[start..end]
120     }
121 
122     /// Returns the input string of the `Pair`.
123     ///
124     /// This function returns the input string of the `Pair` as a `&str`. This is the source string
125     /// from which the `Pair` was created. The returned `&str` can be used to examine the contents of
126     /// the `Pair` or to perform further processing on the string.
127     ///
128     /// # Examples
129     ///
130     /// ```
131     /// # use std::rc::Rc;
132     /// # use pest;
133     /// # #[allow(non_camel_case_types)]
134     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
135     /// enum Rule {
136     ///     ab
137     /// }
138     ///
139     /// // Example: Get input string from a Pair
140     ///
141     /// let input = "ab";
142     /// let pair = pest::state(input, |state| {
143     ///     // generating Token pair with Rule::ab ...
144     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
145     /// }).unwrap().next().unwrap();
146     ///
147     /// assert_eq!(pair.as_str(), "ab");
148     /// assert_eq!(input, pair.get_input());
149     /// ```
get_input(&self) -> &'i str150     pub fn get_input(&self) -> &'i str {
151         self.input
152     }
153 
154     /// Returns the `Span` defined by the `Pair`, consuming it.
155     ///
156     /// # Examples
157     ///
158     /// ```
159     /// # use std::rc::Rc;
160     /// # use pest;
161     /// # #[allow(non_camel_case_types)]
162     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
163     /// enum Rule {
164     ///     ab
165     /// }
166     ///
167     /// let input = "ab";
168     /// let pair = pest::state(input, |state| {
169     ///     // generating Token pair with Rule::ab ...
170     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
171     /// }).unwrap().next().unwrap();
172     ///
173     /// assert_eq!(pair.into_span().as_str(), "ab");
174     /// ```
175     #[inline]
176     #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
into_span(self) -> Span<'i>177     pub fn into_span(self) -> Span<'i> {
178         self.as_span()
179     }
180 
181     /// Returns the `Span` defined by the `Pair`, **without** consuming it.
182     ///
183     /// # Examples
184     ///
185     /// ```
186     /// # use std::rc::Rc;
187     /// # use pest;
188     /// # #[allow(non_camel_case_types)]
189     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
190     /// enum Rule {
191     ///     ab
192     /// }
193     ///
194     /// let input = "ab";
195     /// let pair = pest::state(input, |state| {
196     ///     // generating Token pair with Rule::ab ...
197     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
198     /// }).unwrap().next().unwrap();
199     ///
200     /// assert_eq!(pair.as_span().as_str(), "ab");
201     /// ```
202     #[inline]
as_span(&self) -> Span<'i>203     pub fn as_span(&self) -> Span<'i> {
204         let start = self.pos(self.start);
205         let end = self.pos(self.pair());
206 
207         Span::new_internal(self.input, start, end)
208     }
209 
210     /// Get current node tag
211     #[inline]
as_node_tag(&self) -> Option<&str>212     pub fn as_node_tag(&self) -> Option<&str> {
213         match &self.queue[self.pair()] {
214             QueueableToken::End { tag, .. } => tag.as_ref().map(|x| x.borrow()),
215             _ => None,
216         }
217     }
218 
219     /// Returns the inner `Pairs` between the `Pair`, consuming it.
220     ///
221     /// # Examples
222     ///
223     /// ```
224     /// # use std::rc::Rc;
225     /// # use pest;
226     /// # #[allow(non_camel_case_types)]
227     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
228     /// enum Rule {
229     ///     a
230     /// }
231     ///
232     /// let input = "";
233     /// let pair = pest::state(input, |state| {
234     ///     // generating Token pair with Rule::a ...
235     /// #     state.rule(Rule::a, |s| Ok(s))
236     /// }).unwrap().next().unwrap();
237     ///
238     /// assert!(pair.into_inner().next().is_none());
239     /// ```
240     #[inline]
into_inner(self) -> Pairs<'i, R>241     pub fn into_inner(self) -> Pairs<'i, R> {
242         let pair = self.pair();
243 
244         pairs::new(
245             self.queue,
246             self.input,
247             Some(self.line_index),
248             self.start + 1,
249             pair,
250         )
251     }
252 
253     /// Returns the `Tokens` for the `Pair`.
254     ///
255     /// # Examples
256     ///
257     /// ```
258     /// # use std::rc::Rc;
259     /// # use pest;
260     /// # #[allow(non_camel_case_types)]
261     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
262     /// enum Rule {
263     ///     a
264     /// }
265     ///
266     /// let input = "";
267     /// let pair = pest::state(input, |state| {
268     ///     // generating Token pair with Rule::a ...
269     /// #     state.rule(Rule::a, |s| Ok(s))
270     /// }).unwrap().next().unwrap();
271     /// let tokens: Vec<_> = pair.tokens().collect();
272     ///
273     /// assert_eq!(tokens.len(), 2);
274     /// ```
275     #[inline]
tokens(self) -> Tokens<'i, R>276     pub fn tokens(self) -> Tokens<'i, R> {
277         let end = self.pair();
278 
279         tokens::new(self.queue, self.input, self.start, end + 1)
280     }
281 
282     /// Generates a string that stores the lexical information of `self` in
283     /// a pretty-printed JSON format.
284     #[cfg(feature = "pretty-print")]
to_json(&self) -> String285     pub fn to_json(&self) -> String {
286         ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
287     }
288 
289     /// Returns the `line`, `col` of this pair start.
line_col(&self) -> (usize, usize)290     pub fn line_col(&self) -> (usize, usize) {
291         let pos = self.pos(self.start);
292         self.line_index.line_col(self.input, pos)
293     }
294 
pair(&self) -> usize295     fn pair(&self) -> usize {
296         match self.queue[self.start] {
297             QueueableToken::Start {
298                 end_token_index, ..
299             } => end_token_index,
300             _ => unreachable!(),
301         }
302     }
303 
pos(&self, index: usize) -> usize304     fn pos(&self, index: usize) -> usize {
305         match self.queue[index] {
306             QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
307                 input_pos
308             }
309         }
310     }
311 }
312 
313 impl<'i, R: RuleType> Pairs<'i, R> {
314     /// Create a new `Pairs` iterator containing just the single `Pair`.
single(pair: Pair<'i, R>) -> Self315     pub fn single(pair: Pair<'i, R>) -> Self {
316         let end = pair.pair();
317         pairs::new(
318             pair.queue,
319             pair.input,
320             Some(pair.line_index),
321             pair.start,
322             end,
323         )
324     }
325 }
326 
327 impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result328     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
329         let pair = &mut f.debug_struct("Pair");
330         pair.field("rule", &self.as_rule());
331         // In order not to break compatibility
332         if let Some(s) = self.as_node_tag() {
333             pair.field("node_tag", &s);
334         }
335         pair.field("span", &self.as_span())
336             .field("inner", &self.clone().into_inner().collect::<Vec<_>>())
337             .finish()
338     }
339 }
340 
341 impl<'i, R: RuleType> fmt::Display for Pair<'i, R> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result342     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
343         let rule = self.as_rule();
344         let start = self.pos(self.start);
345         let end = self.pos(self.pair());
346         let mut pairs = self.clone().into_inner().peekable();
347 
348         if pairs.peek().is_none() {
349             write!(f, "{:?}({}, {})", rule, start, end)
350         } else {
351             write!(
352                 f,
353                 "{:?}({}, {}, [{}])",
354                 rule,
355                 start,
356                 end,
357                 pairs
358                     .map(|pair| format!("{}", pair))
359                     .collect::<Vec<_>>()
360                     .join(", ")
361             )
362         }
363     }
364 }
365 
366 impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
eq(&self, other: &Pair<'i, R>) -> bool367     fn eq(&self, other: &Pair<'i, R>) -> bool {
368         Rc::ptr_eq(&self.queue, &other.queue)
369             && ptr::eq(self.input, other.input)
370             && self.start == other.start
371     }
372 }
373 
374 impl<'i, R: Eq> Eq for Pair<'i, R> {}
375 
376 impl<'i, R: Hash> Hash for Pair<'i, R> {
hash<H: Hasher>(&self, state: &mut H)377     fn hash<H: Hasher>(&self, state: &mut H) {
378         (&*self.queue as *const Vec<QueueableToken<'i, R>>).hash(state);
379         (self.input as *const str).hash(state);
380         self.start.hash(state);
381     }
382 }
383 
384 #[cfg(feature = "pretty-print")]
385 impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: ::serde::Serializer,386     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
387     where
388         S: ::serde::Serializer,
389     {
390         let start = self.pos(self.start);
391         let end = self.pos(self.pair());
392         let rule = format!("{:?}", self.as_rule());
393         let inner = self.clone().into_inner();
394 
395         let mut ser = serializer.serialize_struct("Pairs", 3)?;
396         ser.serialize_field("pos", &(start, end))?;
397         ser.serialize_field("rule", &rule)?;
398 
399         if inner.peek().is_none() {
400             ser.serialize_field("inner", &self.as_str())?;
401         } else {
402             ser.serialize_field("inner", &inner)?;
403         }
404 
405         ser.end()
406     }
407 }
408 
409 #[cfg(test)]
410 mod tests {
411     use crate::macros::tests::*;
412     use crate::parser::Parser;
413 
414     #[test]
415     #[cfg(feature = "pretty-print")]
test_pretty_print()416     fn test_pretty_print() {
417         let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
418 
419         let expected = r#"{
420   "pos": [
421     0,
422     3
423   ],
424   "rule": "a",
425   "inner": {
426     "pos": [
427       1,
428       2
429     ],
430     "pairs": [
431       {
432         "pos": [
433           1,
434           2
435         ],
436         "rule": "b",
437         "inner": "b"
438       }
439     ]
440   }
441 }"#;
442 
443         assert_eq!(expected, pair.to_json());
444     }
445 
446     #[test]
pair_into_inner()447     fn pair_into_inner() {
448         let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
449 
450         let pairs = pair.into_inner(); // the tokens b()
451 
452         assert_eq!(2, pairs.tokens().count());
453     }
454 
455     #[test]
get_input_of_pair()456     fn get_input_of_pair() {
457         let input = "abcde";
458         let pair = AbcParser::parse(Rule::a, input).unwrap().next().unwrap();
459 
460         assert_eq!(input, pair.get_input());
461     }
462 }
463