• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 use alloc::format;
11 use alloc::rc::Rc;
12 #[cfg(feature = "pretty-print")]
13 use alloc::string::String;
14 use alloc::vec::Vec;
15 use core::fmt;
16 use core::hash::{Hash, Hasher};
17 use core::ptr;
18 use core::str;
19 
20 #[cfg(feature = "pretty-print")]
21 use serde::ser::SerializeStruct;
22 
23 use super::line_index::LineIndex;
24 use super::pairs::{self, Pairs};
25 use super::queueable_token::QueueableToken;
26 use super::tokens::{self, Tokens};
27 use crate::span::{self, Span};
28 use crate::RuleType;
29 
30 /// A matching pair of [`Token`]s and everything between them.
31 ///
32 /// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
33 /// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
34 /// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
35 /// editors.
36 ///
37 /// [`Token`]: ../enum.Token.html
38 #[derive(Clone)]
39 pub struct Pair<'i, R> {
40     /// # Safety
41     ///
42     /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
43     queue: Rc<Vec<QueueableToken<R>>>,
44     input: &'i str,
45     /// Token index into `queue`.
46     start: usize,
47     line_index: Rc<LineIndex>,
48 }
49 
50 /// # Safety
51 ///
52 /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
new<R: RuleType>( queue: Rc<Vec<QueueableToken<R>>>, input: &str, line_index: Rc<LineIndex>, start: usize, ) -> Pair<'_, R>53 pub unsafe fn new<R: RuleType>(
54     queue: Rc<Vec<QueueableToken<R>>>,
55     input: &str,
56     line_index: Rc<LineIndex>,
57     start: usize,
58 ) -> Pair<'_, R> {
59     Pair {
60         queue,
61         input,
62         start,
63         line_index,
64     }
65 }
66 
67 impl<'i, R: RuleType> Pair<'i, R> {
68     /// Returns the `Rule` of the `Pair`.
69     ///
70     /// # Examples
71     ///
72     /// ```
73     /// # use std::rc::Rc;
74     /// # use pest;
75     /// # #[allow(non_camel_case_types)]
76     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
77     /// enum Rule {
78     ///     a
79     /// }
80     ///
81     /// let input = "";
82     /// let pair = pest::state(input, |state| {
83     ///     // generating Token pair with Rule::a ...
84     /// #     state.rule(Rule::a, |s| Ok(s))
85     /// }).unwrap().next().unwrap();
86     ///
87     /// assert_eq!(pair.as_rule(), Rule::a);
88     /// ```
89     #[inline]
as_rule(&self) -> R90     pub fn as_rule(&self) -> R {
91         match self.queue[self.pair()] {
92             QueueableToken::End { rule, .. } => rule,
93             _ => unreachable!(),
94         }
95     }
96 
97     /// Captures a slice from the `&str` defined by the token `Pair`.
98     ///
99     /// # Examples
100     ///
101     /// ```
102     /// # use std::rc::Rc;
103     /// # use pest;
104     /// # #[allow(non_camel_case_types)]
105     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
106     /// enum Rule {
107     ///     ab
108     /// }
109     ///
110     /// let input = "ab";
111     /// let pair = pest::state(input, |state| {
112     ///     // generating Token pair with Rule::ab ...
113     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
114     /// }).unwrap().next().unwrap();
115     ///
116     /// assert_eq!(pair.as_str(), "ab");
117     /// ```
118     #[inline]
as_str(&self) -> &'i str119     pub fn as_str(&self) -> &'i str {
120         let start = self.pos(self.start);
121         let end = self.pos(self.pair());
122 
123         // Generated positions always come from Positions and are UTF-8 borders.
124         &self.input[start..end]
125     }
126 
127     /// Returns the `Span` defined by the `Pair`, consuming it.
128     ///
129     /// # Examples
130     ///
131     /// ```
132     /// # use std::rc::Rc;
133     /// # use pest;
134     /// # #[allow(non_camel_case_types)]
135     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
136     /// enum Rule {
137     ///     ab
138     /// }
139     ///
140     /// let input = "ab";
141     /// let pair = pest::state(input, |state| {
142     ///     // generating Token pair with Rule::ab ...
143     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
144     /// }).unwrap().next().unwrap();
145     ///
146     /// assert_eq!(pair.into_span().as_str(), "ab");
147     /// ```
148     #[inline]
149     #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
into_span(self) -> Span<'i>150     pub fn into_span(self) -> Span<'i> {
151         self.as_span()
152     }
153 
154     /// Returns the `Span` defined by the `Pair`, **without** consuming it.
155     ///
156     /// # Examples
157     ///
158     /// ```
159     /// # use std::rc::Rc;
160     /// # use pest;
161     /// # #[allow(non_camel_case_types)]
162     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
163     /// enum Rule {
164     ///     ab
165     /// }
166     ///
167     /// let input = "ab";
168     /// let pair = pest::state(input, |state| {
169     ///     // generating Token pair with Rule::ab ...
170     /// #     state.rule(Rule::ab, |s| s.match_string("ab"))
171     /// }).unwrap().next().unwrap();
172     ///
173     /// assert_eq!(pair.as_span().as_str(), "ab");
174     /// ```
175     #[inline]
as_span(&self) -> Span<'i>176     pub fn as_span(&self) -> Span<'i> {
177         let start = self.pos(self.start);
178         let end = self.pos(self.pair());
179 
180         // Generated positions always come from Positions and are UTF-8 borders.
181         unsafe { span::Span::new_unchecked(self.input, start, end) }
182     }
183 
184     /// Returns the inner `Pairs` between the `Pair`, consuming it.
185     ///
186     /// # Examples
187     ///
188     /// ```
189     /// # use std::rc::Rc;
190     /// # use pest;
191     /// # #[allow(non_camel_case_types)]
192     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
193     /// enum Rule {
194     ///     a
195     /// }
196     ///
197     /// let input = "";
198     /// let pair = pest::state(input, |state| {
199     ///     // generating Token pair with Rule::a ...
200     /// #     state.rule(Rule::a, |s| Ok(s))
201     /// }).unwrap().next().unwrap();
202     ///
203     /// assert!(pair.into_inner().next().is_none());
204     /// ```
205     #[inline]
into_inner(self) -> Pairs<'i, R>206     pub fn into_inner(self) -> Pairs<'i, R> {
207         let pair = self.pair();
208 
209         pairs::new(
210             self.queue,
211             self.input,
212             Some(self.line_index),
213             self.start + 1,
214             pair,
215         )
216     }
217 
218     /// Returns the `Tokens` for the `Pair`.
219     ///
220     /// # Examples
221     ///
222     /// ```
223     /// # use std::rc::Rc;
224     /// # use pest;
225     /// # #[allow(non_camel_case_types)]
226     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
227     /// enum Rule {
228     ///     a
229     /// }
230     ///
231     /// let input = "";
232     /// let pair = pest::state(input, |state| {
233     ///     // generating Token pair with Rule::a ...
234     /// #     state.rule(Rule::a, |s| Ok(s))
235     /// }).unwrap().next().unwrap();
236     /// let tokens: Vec<_> = pair.tokens().collect();
237     ///
238     /// assert_eq!(tokens.len(), 2);
239     /// ```
240     #[inline]
tokens(self) -> Tokens<'i, R>241     pub fn tokens(self) -> Tokens<'i, R> {
242         let end = self.pair();
243 
244         tokens::new(self.queue, self.input, self.start, end + 1)
245     }
246 
247     /// Generates a string that stores the lexical information of `self` in
248     /// a pretty-printed JSON format.
249     #[cfg(feature = "pretty-print")]
to_json(&self) -> String250     pub fn to_json(&self) -> String {
251         ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
252     }
253 
254     /// Returns the `line`, `col` of this pair start.
line_col(&self) -> (usize, usize)255     pub fn line_col(&self) -> (usize, usize) {
256         let pos = self.pos(self.start);
257         self.line_index.line_col(self.input, pos)
258     }
259 
pair(&self) -> usize260     fn pair(&self) -> usize {
261         match self.queue[self.start] {
262             QueueableToken::Start {
263                 end_token_index, ..
264             } => end_token_index,
265             _ => unreachable!(),
266         }
267     }
268 
pos(&self, index: usize) -> usize269     fn pos(&self, index: usize) -> usize {
270         match self.queue[index] {
271             QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
272                 input_pos
273             }
274         }
275     }
276 }
277 
278 impl<'i, R: RuleType> Pairs<'i, R> {
279     /// Create a new `Pairs` iterator containing just the single `Pair`.
single(pair: Pair<'i, R>) -> Self280     pub fn single(pair: Pair<'i, R>) -> Self {
281         let end = pair.pair();
282         pairs::new(
283             pair.queue,
284             pair.input,
285             Some(pair.line_index),
286             pair.start,
287             end,
288         )
289     }
290 }
291 
292 impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result293     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
294         f.debug_struct("Pair")
295             .field("rule", &self.as_rule())
296             .field("span", &self.as_span())
297             .field("inner", &self.clone().into_inner().collect::<Vec<_>>())
298             .finish()
299     }
300 }
301 
302 impl<'i, R: RuleType> fmt::Display for Pair<'i, R> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result303     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
304         let rule = self.as_rule();
305         let start = self.pos(self.start);
306         let end = self.pos(self.pair());
307         let mut pairs = self.clone().into_inner().peekable();
308 
309         if pairs.peek().is_none() {
310             write!(f, "{:?}({}, {})", rule, start, end)
311         } else {
312             write!(
313                 f,
314                 "{:?}({}, {}, [{}])",
315                 rule,
316                 start,
317                 end,
318                 pairs
319                     .map(|pair| format!("{}", pair))
320                     .collect::<Vec<_>>()
321                     .join(", ")
322             )
323         }
324     }
325 }
326 
327 impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
eq(&self, other: &Pair<'i, R>) -> bool328     fn eq(&self, other: &Pair<'i, R>) -> bool {
329         Rc::ptr_eq(&self.queue, &other.queue)
330             && ptr::eq(self.input, other.input)
331             && self.start == other.start
332     }
333 }
334 
335 impl<'i, R: Eq> Eq for Pair<'i, R> {}
336 
337 impl<'i, R: Hash> Hash for Pair<'i, R> {
hash<H: Hasher>(&self, state: &mut H)338     fn hash<H: Hasher>(&self, state: &mut H) {
339         (&*self.queue as *const Vec<QueueableToken<R>>).hash(state);
340         (self.input as *const str).hash(state);
341         self.start.hash(state);
342     }
343 }
344 
345 #[cfg(feature = "pretty-print")]
346 impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: ::serde::Serializer,347     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
348     where
349         S: ::serde::Serializer,
350     {
351         let start = self.pos(self.start);
352         let end = self.pos(self.pair());
353         let rule = format!("{:?}", self.as_rule());
354         let inner = self.clone().into_inner();
355 
356         let mut ser = serializer.serialize_struct("Pairs", 3)?;
357         ser.serialize_field("pos", &(start, end))?;
358         ser.serialize_field("rule", &rule)?;
359 
360         if inner.peek().is_none() {
361             ser.serialize_field("inner", &self.as_str())?;
362         } else {
363             ser.serialize_field("inner", &inner)?;
364         }
365 
366         ser.end()
367     }
368 }
369 
370 #[cfg(test)]
371 mod tests {
372     use crate::macros::tests::*;
373     use crate::parser::Parser;
374 
375     #[test]
376     #[cfg(feature = "pretty-print")]
test_pretty_print()377     fn test_pretty_print() {
378         let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
379 
380         let expected = r#"{
381   "pos": [
382     0,
383     3
384   ],
385   "rule": "a",
386   "inner": {
387     "pos": [
388       1,
389       2
390     ],
391     "pairs": [
392       {
393         "pos": [
394           1,
395           2
396         ],
397         "rule": "b",
398         "inner": "b"
399       }
400     ]
401   }
402 }"#;
403 
404         assert_eq!(expected, pair.to_json());
405     }
406 
407     #[test]
pair_into_inner()408     fn pair_into_inner() {
409         let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
410 
411         let pairs = pair.into_inner(); // the tokens b()
412 
413         assert_eq!(2, pairs.tokens().count());
414     }
415 }
416