• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 use std::fmt;
11 use std::hash::{Hash, Hasher};
12 use std::ptr;
13 use std::rc::Rc;
14 use std::str;
15 
16 #[cfg(feature = "pretty-print")]
17 use serde::ser::SerializeStruct;
18 
19 use super::pairs::{self, Pairs};
20 use super::queueable_token::QueueableToken;
21 use super::tokens::{self, Tokens};
22 use span::{self, Span};
23 use RuleType;
24 
25 /// A matching pair of [`Token`]s and everything between them.
26 ///
27 /// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
28 /// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
29 /// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
30 /// editors.
31 ///
32 /// [`Token`]: ../enum.Token.html
33 #[derive(Clone)]
34 pub struct Pair<'i, R> {
35     /// # Safety
36     ///
37     /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
38     queue: Rc<Vec<QueueableToken<R>>>,
39     input: &'i str,
40     /// Token index into `queue`.
41     start: usize,
42 }
43 
44 /// # Safety
45 ///
46 /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
new<R: RuleType>( queue: Rc<Vec<QueueableToken<R>>>, input: &str, start: usize, ) -> Pair<R>47 pub unsafe fn new<R: RuleType>(
48     queue: Rc<Vec<QueueableToken<R>>>,
49     input: &str,
50     start: usize,
51 ) -> Pair<R> {
52     Pair {
53         queue,
54         input,
55         start,
56     }
57 }
58 
59 impl<'i, R: RuleType> Pair<'i, R> {
60     /// Returns the `Rule` of the `Pair`.
61     ///
62     /// # Examples
63     ///
64     /// ```
65     /// # use std::rc::Rc;
66     /// # use pest;
67     /// # #[allow(non_camel_case_types)]
68     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
69     /// enum Rule {
70     ///     a
71     /// }
72     ///
73     /// let input = "";
74     /// let pair = pest::state(input, |state| {
75     ///     // generating Token pair with Rule::a ...
76     /// #     state.rule(Rule::a, |s| Ok(s))
77     /// }).unwrap().next().unwrap();
78     ///
79     /// assert_eq!(pair.as_rule(), Rule::a);
80     /// ```
81     #[inline]
as_rule(&self) -> R82     pub fn as_rule(&self) -> R {
83         match self.queue[self.pair()] {
84             QueueableToken::End { rule, .. } => rule,
85             _ => unreachable!(),
86         }
87     }
88 
89     /// Captures a slice from the `&str` defined by the token `Pair`.
90     ///
91     /// # Examples
92     ///
93     /// ```
94     /// # use std::rc::Rc;
95     /// # use pest;
96     /// # #[allow(non_camel_case_types)]
97     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
98     /// enum Rule {
99     ///     ab
100     /// }
101     ///
102     /// let input = "ab";
103     /// let pair = pest::state(input, |state| {
104     ///     // generating Token pair with Rule::ab ...
105     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
106     /// }).unwrap().next().unwrap();
107     ///
108     /// assert_eq!(pair.as_str(), "ab");
109     /// ```
110     #[inline]
as_str(&self) -> &'i str111     pub fn as_str(&self) -> &'i str {
112         let start = self.pos(self.start);
113         let end = self.pos(self.pair());
114 
115         // Generated positions always come from Positions and are UTF-8 borders.
116         &self.input[start..end]
117     }
118 
119     /// Returns the `Span` defined by the `Pair`, consuming it.
120     ///
121     /// # Examples
122     ///
123     /// ```
124     /// # use std::rc::Rc;
125     /// # use pest;
126     /// # #[allow(non_camel_case_types)]
127     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
128     /// enum Rule {
129     ///     ab
130     /// }
131     ///
132     /// let input = "ab";
133     /// let pair = pest::state(input, |state| {
134     ///     // generating Token pair with Rule::ab ...
135     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
136     /// }).unwrap().next().unwrap();
137     ///
138     /// assert_eq!(pair.into_span().as_str(), "ab");
139     /// ```
140     #[inline]
141     #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
into_span(self) -> Span<'i>142     pub fn into_span(self) -> Span<'i> {
143         self.as_span()
144     }
145 
146     /// Returns the `Span` defined by the `Pair`, **without** consuming it.
147     ///
148     /// # Examples
149     ///
150     /// ```
151     /// # use std::rc::Rc;
152     /// # use pest;
153     /// # #[allow(non_camel_case_types)]
154     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
155     /// enum Rule {
156     ///     ab
157     /// }
158     ///
159     /// let input = "ab";
160     /// let pair = pest::state(input, |state| {
161     ///     // generating Token pair with Rule::ab ...
162     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
163     /// }).unwrap().next().unwrap();
164     ///
165     /// assert_eq!(pair.as_span().as_str(), "ab");
166     /// ```
167     #[inline]
as_span(&self) -> Span<'i>168     pub fn as_span(&self) -> Span<'i> {
169         let start = self.pos(self.start);
170         let end = self.pos(self.pair());
171 
172         // Generated positions always come from Positions and are UTF-8 borders.
173         unsafe { span::Span::new_unchecked(self.input, start, end) }
174     }
175 
176     /// Returns the inner `Pairs` between the `Pair`, consuming it.
177     ///
178     /// # Examples
179     ///
180     /// ```
181     /// # use std::rc::Rc;
182     /// # use pest;
183     /// # #[allow(non_camel_case_types)]
184     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
185     /// enum Rule {
186     ///     a
187     /// }
188     ///
189     /// let input = "";
190     /// let pair = pest::state(input, |state| {
191     ///     // generating Token pair with Rule::a ...
192     /// #     state.rule(Rule::a, |s| Ok(s))
193     /// }).unwrap().next().unwrap();
194     ///
195     /// assert!(pair.into_inner().next().is_none());
196     /// ```
197     #[inline]
into_inner(self) -> Pairs<'i, R>198     pub fn into_inner(self) -> Pairs<'i, R> {
199         let pair = self.pair();
200 
201         pairs::new(self.queue, self.input, self.start + 1, pair)
202     }
203 
204     /// Returns the `Tokens` for the `Pair`.
205     ///
206     /// # Examples
207     ///
208     /// ```
209     /// # use std::rc::Rc;
210     /// # use pest;
211     /// # #[allow(non_camel_case_types)]
212     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
213     /// enum Rule {
214     ///     a
215     /// }
216     ///
217     /// let input = "";
218     /// let pair = pest::state(input, |state| {
219     ///     // generating Token pair with Rule::a ...
220     /// #     state.rule(Rule::a, |s| Ok(s))
221     /// }).unwrap().next().unwrap();
222     /// let tokens: Vec<_> = pair.tokens().collect();
223     ///
224     /// assert_eq!(tokens.len(), 2);
225     /// ```
226     #[inline]
tokens(self) -> Tokens<'i, R>227     pub fn tokens(self) -> Tokens<'i, R> {
228         let end = self.pair();
229 
230         tokens::new(self.queue, self.input, self.start, end + 1)
231     }
232 
233     /// Generates a string that stores the lexical information of `self` in
234     /// a pretty-printed JSON format.
235     #[cfg(feature = "pretty-print")]
to_json(&self) -> String236     pub fn to_json(&self) -> String {
237         ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
238     }
239 
pair(&self) -> usize240     fn pair(&self) -> usize {
241         match self.queue[self.start] {
242             QueueableToken::Start {
243                 end_token_index, ..
244             } => end_token_index,
245             _ => unreachable!(),
246         }
247     }
248 
pos(&self, index: usize) -> usize249     fn pos(&self, index: usize) -> usize {
250         match self.queue[index] {
251             QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
252                 input_pos
253             }
254         }
255     }
256 }
257 
258 impl<'i, R: RuleType> Pairs<'i, R> {
259     /// Create a new `Pairs` iterator containing just the single `Pair`.
single(pair: Pair<'i, R>) -> Self260     pub fn single(pair: Pair<'i, R>) -> Self {
261         let end = pair.pair();
262         pairs::new(pair.queue, pair.input, pair.start, end)
263     }
264 }
265 
266 impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result267     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
268         f.debug_struct("Pair")
269             .field("rule", &self.as_rule())
270             .field("span", &self.as_span())
271             .field("inner", &self.clone().into_inner().collect::<Vec<_>>())
272             .finish()
273     }
274 }
275 
276 impl<'i, R: RuleType> fmt::Display for Pair<'i, R> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result277     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
278         let rule = self.as_rule();
279         let start = self.pos(self.start);
280         let end = self.pos(self.pair());
281         let mut pairs = self.clone().into_inner().peekable();
282 
283         if pairs.peek().is_none() {
284             write!(f, "{:?}({}, {})", rule, start, end)
285         } else {
286             write!(
287                 f,
288                 "{:?}({}, {}, [{}])",
289                 rule,
290                 start,
291                 end,
292                 pairs
293                     .map(|pair| format!("{}", pair))
294                     .collect::<Vec<_>>()
295                     .join(", ")
296             )
297         }
298     }
299 }
300 
301 impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
eq(&self, other: &Pair<'i, R>) -> bool302     fn eq(&self, other: &Pair<'i, R>) -> bool {
303         Rc::ptr_eq(&self.queue, &other.queue)
304             && ptr::eq(self.input, other.input)
305             && self.start == other.start
306     }
307 }
308 
309 impl<'i, R: Eq> Eq for Pair<'i, R> {}
310 
311 impl<'i, R: Hash> Hash for Pair<'i, R> {
hash<H: Hasher>(&self, state: &mut H)312     fn hash<H: Hasher>(&self, state: &mut H) {
313         (&*self.queue as *const Vec<QueueableToken<R>>).hash(state);
314         (self.input as *const str).hash(state);
315         self.start.hash(state);
316     }
317 }
318 
319 #[cfg(feature = "pretty-print")]
320 impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: ::serde::Serializer,321     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
322     where
323         S: ::serde::Serializer,
324     {
325         let start = self.pos(self.start);
326         let end = self.pos(self.pair());
327         let rule = format!("{:?}", self.as_rule());
328         let inner = self.clone().into_inner();
329 
330         let mut ser = serializer.serialize_struct("Pairs", 3)?;
331         ser.serialize_field("pos", &(start, end))?;
332         ser.serialize_field("rule", &rule)?;
333 
334         if inner.peek().is_none() {
335             ser.serialize_field("inner", &self.as_str())?;
336         } else {
337             ser.serialize_field("inner", &inner)?;
338         }
339 
340         ser.end()
341     }
342 }
343 
344 #[cfg(test)]
345 mod tests {
346     use macros::tests::*;
347     use parser::Parser;
348 
349     #[test]
350     #[cfg(feature = "pretty-print")]
test_pretty_print()351     fn test_pretty_print() {
352         let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
353 
354         let expected = r#"{
355   "pos": [
356     0,
357     3
358   ],
359   "rule": "a",
360   "inner": {
361     "pos": [
362       1,
363       2
364     ],
365     "pairs": [
366       {
367         "pos": [
368           1,
369           2
370         ],
371         "rule": "b",
372         "inner": "b"
373       }
374     ]
375   }
376 }"#;
377 
378         assert_eq!(expected, pair.to_json());
379     }
380 
381     #[test]
pair_into_inner()382     fn pair_into_inner() {
383         let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
384 
385         let pairs = pair.into_inner(); // the tokens b()
386 
387         assert_eq!(2, pairs.tokens().count());
388     }
389 }
390