• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 use std::fmt;
11 use std::hash::{Hash, Hasher};
12 use std::ptr;
13 use std::rc::Rc;
14 use std::str;
15 
16 #[cfg(feature = "pretty-print")]
17 use serde::ser::SerializeStruct;
18 
19 use super::flat_pairs::{self, FlatPairs};
20 use super::pair::{self, Pair};
21 use super::queueable_token::QueueableToken;
22 use super::tokens::{self, Tokens};
23 use RuleType;
24 
25 /// An iterator over [`Pair`]s. It is created by [`pest::state`] and [`Pair::into_inner`].
26 ///
27 /// [`Pair`]: struct.Pair.html
28 /// [`pest::state`]: ../fn.state.html
29 /// [`Pair::into_inner`]: struct.Pair.html#method.into_inner
30 #[derive(Clone)]
31 pub struct Pairs<'i, R> {
32     queue: Rc<Vec<QueueableToken<R>>>,
33     input: &'i str,
34     start: usize,
35     end: usize,
36 }
37 
new<R: RuleType>( queue: Rc<Vec<QueueableToken<R>>>, input: &str, start: usize, end: usize, ) -> Pairs<R>38 pub fn new<R: RuleType>(
39     queue: Rc<Vec<QueueableToken<R>>>,
40     input: &str,
41     start: usize,
42     end: usize,
43 ) -> Pairs<R> {
44     Pairs {
45         queue,
46         input,
47         start,
48         end,
49     }
50 }
51 
52 impl<'i, R: RuleType> Pairs<'i, R> {
53     /// Captures a slice from the `&str` defined by the starting position of the first token `Pair`
54     /// and the ending position of the last token `Pair` of the `Pairs`. This also captures
55     /// the input between those two token `Pair`s.
56     ///
57     /// # Examples
58     ///
59     /// ```
60     /// # use std::rc::Rc;
61     /// # use pest;
62     /// # #[allow(non_camel_case_types)]
63     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
64     /// enum Rule {
65     ///     a,
66     ///     b
67     /// }
68     ///
69     /// let input = "a b";
70     /// let pairs = pest::state(input, |state| {
71     ///     // generating Token pairs with Rule::a and Rule::b ...
72     /// #     state.rule(Rule::a, |s| s.match_string("a")).and_then(|s| s.skip(1))
73     /// #         .and_then(|s| s.rule(Rule::b, |s| s.match_string("b")))
74     /// }).unwrap();
75     ///
76     /// assert_eq!(pairs.as_str(), "a b");
77     /// ```
78     #[inline]
as_str(&self) -> &'i str79     pub fn as_str(&self) -> &'i str {
80         if self.start < self.end {
81             let start = self.pos(self.start);
82             let end = self.pos(self.end - 1);
83             // Generated positions always come from Positions and are UTF-8 borders.
84             &self.input[start..end]
85         } else {
86             ""
87         }
88     }
89 
90     /// Captures inner token `Pair`s and concatenates resulting `&str`s. This does not capture
91     /// the input between token `Pair`s.
92     ///
93     /// # Examples
94     ///
95     /// ```
96     /// # use std::rc::Rc;
97     /// # use pest;
98     /// # #[allow(non_camel_case_types)]
99     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
100     /// enum Rule {
101     ///     a,
102     ///     b
103     /// }
104     ///
105     /// let input = "a b";
106     /// let pairs = pest::state(input, |state| {
107     ///     // generating Token pairs with Rule::a and Rule::b ...
108     /// #     state.rule(Rule::a, |s| s.match_string("a")).and_then(|s| s.skip(1))
109     /// #         .and_then(|s| s.rule(Rule::b, |s| s.match_string("b")))
110     /// }).unwrap();
111     ///
112     /// assert_eq!(pairs.concat(), "ab");
113     /// ```
114     #[inline]
concat(&self) -> String115     pub fn concat(&self) -> String {
116         self.clone()
117             .fold(String::new(), |string, pair| string + pair.as_str())
118     }
119 
120     /// Flattens the `Pairs`.
121     ///
122     /// # Examples
123     ///
124     /// ```
125     /// # use std::rc::Rc;
126     /// # use pest;
127     /// # #[allow(non_camel_case_types)]
128     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
129     /// enum Rule {
130     ///     a,
131     ///     b
132     /// }
133     ///
134     /// let input = "";
135     /// let pairs = pest::state(input, |state| {
136     ///     // generating nested Token pair with Rule::b inside Rule::a
137     /// #     state.rule(Rule::a, |state| {
138     /// #         state.rule(Rule::b, |s| Ok(s))
139     /// #     })
140     /// }).unwrap();
141     /// let tokens: Vec<_> = pairs.flatten().tokens().collect();
142     ///
143     /// assert_eq!(tokens.len(), 4);
144     /// ```
145     #[inline]
flatten(self) -> FlatPairs<'i, R>146     pub fn flatten(self) -> FlatPairs<'i, R> {
147         unsafe { flat_pairs::new(self.queue, self.input, self.start, self.end) }
148     }
149 
150     /// Returns the `Tokens` for the `Pairs`.
151     ///
152     /// # Examples
153     ///
154     /// ```
155     /// # use std::rc::Rc;
156     /// # use pest;
157     /// # #[allow(non_camel_case_types)]
158     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
159     /// enum Rule {
160     ///     a
161     /// }
162     ///
163     /// let input = "";
164     /// let pairs = pest::state(input, |state| {
165     ///     // generating Token pair with Rule::a ...
166     /// #     state.rule(Rule::a, |s| Ok(s))
167     /// }).unwrap();
168     /// let tokens: Vec<_> = pairs.tokens().collect();
169     ///
170     /// assert_eq!(tokens.len(), 2);
171     /// ```
172     #[inline]
tokens(self) -> Tokens<'i, R>173     pub fn tokens(self) -> Tokens<'i, R> {
174         tokens::new(self.queue, self.input, self.start, self.end)
175     }
176 
177     /// Peek at the first inner `Pair` without changing the position of this iterator.
178     #[inline]
peek(&self) -> Option<Pair<'i, R>>179     pub fn peek(&self) -> Option<Pair<'i, R>> {
180         if self.start < self.end {
181             Some(unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) })
182         } else {
183             None
184         }
185     }
186 
187     /// Generates a string that stores the lexical information of `self` in
188     /// a pretty-printed JSON format.
189     #[cfg(feature = "pretty-print")]
to_json(&self) -> String190     pub fn to_json(&self) -> String {
191         ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pairs to json.")
192     }
193 
pair(&self) -> usize194     fn pair(&self) -> usize {
195         match self.queue[self.start] {
196             QueueableToken::Start {
197                 end_token_index, ..
198             } => end_token_index,
199             _ => unreachable!(),
200         }
201     }
202 
pair_from_end(&self) -> usize203     fn pair_from_end(&self) -> usize {
204         match self.queue[self.end - 1] {
205             QueueableToken::End {
206                 start_token_index, ..
207             } => start_token_index,
208             _ => unreachable!(),
209         }
210     }
211 
pos(&self, index: usize) -> usize212     fn pos(&self, index: usize) -> usize {
213         match self.queue[index] {
214             QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
215                 input_pos
216             }
217         }
218     }
219 }
220 
221 impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
222     type Item = Pair<'i, R>;
223 
next(&mut self) -> Option<Self::Item>224     fn next(&mut self) -> Option<Self::Item> {
225         let pair = self.peek()?;
226         self.start = self.pair() + 1;
227         Some(pair)
228     }
229 }
230 
231 impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
next_back(&mut self) -> Option<Self::Item>232     fn next_back(&mut self) -> Option<Self::Item> {
233         if self.end <= self.start {
234             return None;
235         }
236 
237         self.end = self.pair_from_end();
238 
239         let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) };
240 
241         Some(pair)
242     }
243 }
244 
245 impl<'i, R: RuleType> fmt::Debug for Pairs<'i, R> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result246     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
247         f.debug_list().entries(self.clone()).finish()
248     }
249 }
250 
251 impl<'i, R: RuleType> fmt::Display for Pairs<'i, R> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result252     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
253         write!(
254             f,
255             "[{}]",
256             self.clone()
257                 .map(|pair| format!("{}", pair))
258                 .collect::<Vec<_>>()
259                 .join(", ")
260         )
261     }
262 }
263 
264 impl<'i, R: PartialEq> PartialEq for Pairs<'i, R> {
eq(&self, other: &Pairs<'i, R>) -> bool265     fn eq(&self, other: &Pairs<'i, R>) -> bool {
266         Rc::ptr_eq(&self.queue, &other.queue)
267             && ptr::eq(self.input, other.input)
268             && self.start == other.start
269             && self.end == other.end
270     }
271 }
272 
273 impl<'i, R: Eq> Eq for Pairs<'i, R> {}
274 
275 impl<'i, R: Hash> Hash for Pairs<'i, R> {
hash<H: Hasher>(&self, state: &mut H)276     fn hash<H: Hasher>(&self, state: &mut H) {
277         (&*self.queue as *const Vec<QueueableToken<R>>).hash(state);
278         (self.input as *const str).hash(state);
279         self.start.hash(state);
280         self.end.hash(state);
281     }
282 }
283 
284 #[cfg(feature = "pretty-print")]
285 impl<'i, R: RuleType> ::serde::Serialize for Pairs<'i, R> {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: ::serde::Serializer,286     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
287     where
288         S: ::serde::Serializer,
289     {
290         let start = self.pos(self.start);
291         let end = self.pos(self.end - 1);
292         let pairs = self.clone().collect::<Vec<_>>();
293 
294         let mut ser = serializer.serialize_struct("Pairs", 2)?;
295         ser.serialize_field("pos", &(start, end))?;
296         ser.serialize_field("pairs", &pairs)?;
297         ser.end()
298     }
299 }
300 
301 #[cfg(test)]
302 mod tests {
303     use super::super::super::macros::tests::*;
304     use super::super::super::Parser;
305 
306     #[test]
307     #[cfg(feature = "pretty-print")]
test_pretty_print()308     fn test_pretty_print() {
309         let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
310 
311         let expected = r#"{
312   "pos": [
313     0,
314     5
315   ],
316   "pairs": [
317     {
318       "pos": [
319         0,
320         3
321       ],
322       "rule": "a",
323       "inner": {
324         "pos": [
325           1,
326           2
327         ],
328         "pairs": [
329           {
330             "pos": [
331               1,
332               2
333             ],
334             "rule": "b",
335             "inner": "b"
336           }
337         ]
338       }
339     },
340     {
341       "pos": [
342         4,
343         5
344       ],
345       "rule": "c",
346       "inner": "e"
347     }
348   ]
349 }"#;
350 
351         assert_eq!(expected, pairs.to_json());
352     }
353 
354     #[test]
as_str()355     fn as_str() {
356         let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
357 
358         assert_eq!(pairs.as_str(), "abcde");
359     }
360 
361     #[test]
as_str_empty()362     fn as_str_empty() {
363         let mut pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
364 
365         assert_eq!(pairs.nth(1).unwrap().into_inner().as_str(), "");
366     }
367 
368     #[test]
concat()369     fn concat() {
370         let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
371 
372         assert_eq!(pairs.concat(), "abce");
373     }
374 
375     #[test]
pairs_debug()376     fn pairs_debug() {
377         let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
378 
379         #[rustfmt::skip]
380         assert_eq!(
381             format!("{:?}", pairs),
382             "[\
383                 Pair { rule: a, span: Span { str: \"abc\", start: 0, end: 3 }, inner: [\
384                     Pair { rule: b, span: Span { str: \"b\", start: 1, end: 2 }, inner: [] }\
385                 ] }, \
386                 Pair { rule: c, span: Span { str: \"e\", start: 4, end: 5 }, inner: [] }\
387             ]"
388             .to_owned()
389         );
390     }
391 
392     #[test]
pairs_display()393     fn pairs_display() {
394         let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
395 
396         assert_eq!(
397             format!("{}", pairs),
398             "[a(0, 3, [b(1, 2)]), c(4, 5)]".to_owned()
399         );
400     }
401 
402     #[test]
iter_for_pairs()403     fn iter_for_pairs() {
404         let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
405         assert_eq!(
406             pairs.map(|p| p.as_rule()).collect::<Vec<Rule>>(),
407             vec![Rule::a, Rule::c]
408         );
409     }
410 
411     #[test]
double_ended_iter_for_pairs()412     fn double_ended_iter_for_pairs() {
413         let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
414         assert_eq!(
415             pairs.rev().map(|p| p.as_rule()).collect::<Vec<Rule>>(),
416             vec![Rule::c, Rule::a]
417         );
418     }
419 }
420