1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9
10 use std::fmt;
11 use std::hash::{Hash, Hasher};
12 use std::ptr;
13 use std::rc::Rc;
14 use std::str;
15
16 #[cfg(feature = "pretty-print")]
17 use serde::ser::SerializeStruct;
18
19 use super::flat_pairs::{self, FlatPairs};
20 use super::pair::{self, Pair};
21 use super::queueable_token::QueueableToken;
22 use super::tokens::{self, Tokens};
23 use RuleType;
24
25 /// An iterator over [`Pair`]s. It is created by [`pest::state`] and [`Pair::into_inner`].
26 ///
27 /// [`Pair`]: struct.Pair.html
28 /// [`pest::state`]: ../fn.state.html
29 /// [`Pair::into_inner`]: struct.Pair.html#method.into_inner
30 #[derive(Clone)]
31 pub struct Pairs<'i, R> {
32 queue: Rc<Vec<QueueableToken<R>>>,
33 input: &'i str,
34 start: usize,
35 end: usize,
36 }
37
new<R: RuleType>( queue: Rc<Vec<QueueableToken<R>>>, input: &str, start: usize, end: usize, ) -> Pairs<R>38 pub fn new<R: RuleType>(
39 queue: Rc<Vec<QueueableToken<R>>>,
40 input: &str,
41 start: usize,
42 end: usize,
43 ) -> Pairs<R> {
44 Pairs {
45 queue,
46 input,
47 start,
48 end,
49 }
50 }
51
52 impl<'i, R: RuleType> Pairs<'i, R> {
53 /// Captures a slice from the `&str` defined by the starting position of the first token `Pair`
54 /// and the ending position of the last token `Pair` of the `Pairs`. This also captures
55 /// the input between those two token `Pair`s.
56 ///
57 /// # Examples
58 ///
59 /// ```
60 /// # use std::rc::Rc;
61 /// # use pest;
62 /// # #[allow(non_camel_case_types)]
63 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
64 /// enum Rule {
65 /// a,
66 /// b
67 /// }
68 ///
69 /// let input = "a b";
70 /// let pairs = pest::state(input, |state| {
71 /// // generating Token pairs with Rule::a and Rule::b ...
72 /// # state.rule(Rule::a, |s| s.match_string("a")).and_then(|s| s.skip(1))
73 /// # .and_then(|s| s.rule(Rule::b, |s| s.match_string("b")))
74 /// }).unwrap();
75 ///
76 /// assert_eq!(pairs.as_str(), "a b");
77 /// ```
78 #[inline]
as_str(&self) -> &'i str79 pub fn as_str(&self) -> &'i str {
80 if self.start < self.end {
81 let start = self.pos(self.start);
82 let end = self.pos(self.end - 1);
83 // Generated positions always come from Positions and are UTF-8 borders.
84 &self.input[start..end]
85 } else {
86 ""
87 }
88 }
89
90 /// Captures inner token `Pair`s and concatenates resulting `&str`s. This does not capture
91 /// the input between token `Pair`s.
92 ///
93 /// # Examples
94 ///
95 /// ```
96 /// # use std::rc::Rc;
97 /// # use pest;
98 /// # #[allow(non_camel_case_types)]
99 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
100 /// enum Rule {
101 /// a,
102 /// b
103 /// }
104 ///
105 /// let input = "a b";
106 /// let pairs = pest::state(input, |state| {
107 /// // generating Token pairs with Rule::a and Rule::b ...
108 /// # state.rule(Rule::a, |s| s.match_string("a")).and_then(|s| s.skip(1))
109 /// # .and_then(|s| s.rule(Rule::b, |s| s.match_string("b")))
110 /// }).unwrap();
111 ///
112 /// assert_eq!(pairs.concat(), "ab");
113 /// ```
114 #[inline]
concat(&self) -> String115 pub fn concat(&self) -> String {
116 self.clone()
117 .fold(String::new(), |string, pair| string + pair.as_str())
118 }
119
120 /// Flattens the `Pairs`.
121 ///
122 /// # Examples
123 ///
124 /// ```
125 /// # use std::rc::Rc;
126 /// # use pest;
127 /// # #[allow(non_camel_case_types)]
128 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
129 /// enum Rule {
130 /// a,
131 /// b
132 /// }
133 ///
134 /// let input = "";
135 /// let pairs = pest::state(input, |state| {
136 /// // generating nested Token pair with Rule::b inside Rule::a
137 /// # state.rule(Rule::a, |state| {
138 /// # state.rule(Rule::b, |s| Ok(s))
139 /// # })
140 /// }).unwrap();
141 /// let tokens: Vec<_> = pairs.flatten().tokens().collect();
142 ///
143 /// assert_eq!(tokens.len(), 4);
144 /// ```
145 #[inline]
flatten(self) -> FlatPairs<'i, R>146 pub fn flatten(self) -> FlatPairs<'i, R> {
147 unsafe { flat_pairs::new(self.queue, self.input, self.start, self.end) }
148 }
149
150 /// Returns the `Tokens` for the `Pairs`.
151 ///
152 /// # Examples
153 ///
154 /// ```
155 /// # use std::rc::Rc;
156 /// # use pest;
157 /// # #[allow(non_camel_case_types)]
158 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
159 /// enum Rule {
160 /// a
161 /// }
162 ///
163 /// let input = "";
164 /// let pairs = pest::state(input, |state| {
165 /// // generating Token pair with Rule::a ...
166 /// # state.rule(Rule::a, |s| Ok(s))
167 /// }).unwrap();
168 /// let tokens: Vec<_> = pairs.tokens().collect();
169 ///
170 /// assert_eq!(tokens.len(), 2);
171 /// ```
172 #[inline]
tokens(self) -> Tokens<'i, R>173 pub fn tokens(self) -> Tokens<'i, R> {
174 tokens::new(self.queue, self.input, self.start, self.end)
175 }
176
177 /// Peek at the first inner `Pair` without changing the position of this iterator.
178 #[inline]
peek(&self) -> Option<Pair<'i, R>>179 pub fn peek(&self) -> Option<Pair<'i, R>> {
180 if self.start < self.end {
181 Some(unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) })
182 } else {
183 None
184 }
185 }
186
187 /// Generates a string that stores the lexical information of `self` in
188 /// a pretty-printed JSON format.
189 #[cfg(feature = "pretty-print")]
to_json(&self) -> String190 pub fn to_json(&self) -> String {
191 ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pairs to json.")
192 }
193
pair(&self) -> usize194 fn pair(&self) -> usize {
195 match self.queue[self.start] {
196 QueueableToken::Start {
197 end_token_index, ..
198 } => end_token_index,
199 _ => unreachable!(),
200 }
201 }
202
pair_from_end(&self) -> usize203 fn pair_from_end(&self) -> usize {
204 match self.queue[self.end - 1] {
205 QueueableToken::End {
206 start_token_index, ..
207 } => start_token_index,
208 _ => unreachable!(),
209 }
210 }
211
pos(&self, index: usize) -> usize212 fn pos(&self, index: usize) -> usize {
213 match self.queue[index] {
214 QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
215 input_pos
216 }
217 }
218 }
219 }
220
221 impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
222 type Item = Pair<'i, R>;
223
next(&mut self) -> Option<Self::Item>224 fn next(&mut self) -> Option<Self::Item> {
225 let pair = self.peek()?;
226 self.start = self.pair() + 1;
227 Some(pair)
228 }
229 }
230
231 impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
next_back(&mut self) -> Option<Self::Item>232 fn next_back(&mut self) -> Option<Self::Item> {
233 if self.end <= self.start {
234 return None;
235 }
236
237 self.end = self.pair_from_end();
238
239 let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) };
240
241 Some(pair)
242 }
243 }
244
245 impl<'i, R: RuleType> fmt::Debug for Pairs<'i, R> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result246 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
247 f.debug_list().entries(self.clone()).finish()
248 }
249 }
250
251 impl<'i, R: RuleType> fmt::Display for Pairs<'i, R> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result252 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
253 write!(
254 f,
255 "[{}]",
256 self.clone()
257 .map(|pair| format!("{}", pair))
258 .collect::<Vec<_>>()
259 .join(", ")
260 )
261 }
262 }
263
264 impl<'i, R: PartialEq> PartialEq for Pairs<'i, R> {
eq(&self, other: &Pairs<'i, R>) -> bool265 fn eq(&self, other: &Pairs<'i, R>) -> bool {
266 Rc::ptr_eq(&self.queue, &other.queue)
267 && ptr::eq(self.input, other.input)
268 && self.start == other.start
269 && self.end == other.end
270 }
271 }
272
273 impl<'i, R: Eq> Eq for Pairs<'i, R> {}
274
275 impl<'i, R: Hash> Hash for Pairs<'i, R> {
hash<H: Hasher>(&self, state: &mut H)276 fn hash<H: Hasher>(&self, state: &mut H) {
277 (&*self.queue as *const Vec<QueueableToken<R>>).hash(state);
278 (self.input as *const str).hash(state);
279 self.start.hash(state);
280 self.end.hash(state);
281 }
282 }
283
284 #[cfg(feature = "pretty-print")]
285 impl<'i, R: RuleType> ::serde::Serialize for Pairs<'i, R> {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: ::serde::Serializer,286 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
287 where
288 S: ::serde::Serializer,
289 {
290 let start = self.pos(self.start);
291 let end = self.pos(self.end - 1);
292 let pairs = self.clone().collect::<Vec<_>>();
293
294 let mut ser = serializer.serialize_struct("Pairs", 2)?;
295 ser.serialize_field("pos", &(start, end))?;
296 ser.serialize_field("pairs", &pairs)?;
297 ser.end()
298 }
299 }
300
301 #[cfg(test)]
302 mod tests {
303 use super::super::super::macros::tests::*;
304 use super::super::super::Parser;
305
306 #[test]
307 #[cfg(feature = "pretty-print")]
test_pretty_print()308 fn test_pretty_print() {
309 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
310
311 let expected = r#"{
312 "pos": [
313 0,
314 5
315 ],
316 "pairs": [
317 {
318 "pos": [
319 0,
320 3
321 ],
322 "rule": "a",
323 "inner": {
324 "pos": [
325 1,
326 2
327 ],
328 "pairs": [
329 {
330 "pos": [
331 1,
332 2
333 ],
334 "rule": "b",
335 "inner": "b"
336 }
337 ]
338 }
339 },
340 {
341 "pos": [
342 4,
343 5
344 ],
345 "rule": "c",
346 "inner": "e"
347 }
348 ]
349 }"#;
350
351 assert_eq!(expected, pairs.to_json());
352 }
353
354 #[test]
as_str()355 fn as_str() {
356 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
357
358 assert_eq!(pairs.as_str(), "abcde");
359 }
360
361 #[test]
as_str_empty()362 fn as_str_empty() {
363 let mut pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
364
365 assert_eq!(pairs.nth(1).unwrap().into_inner().as_str(), "");
366 }
367
368 #[test]
concat()369 fn concat() {
370 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
371
372 assert_eq!(pairs.concat(), "abce");
373 }
374
375 #[test]
pairs_debug()376 fn pairs_debug() {
377 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
378
379 #[rustfmt::skip]
380 assert_eq!(
381 format!("{:?}", pairs),
382 "[\
383 Pair { rule: a, span: Span { str: \"abc\", start: 0, end: 3 }, inner: [\
384 Pair { rule: b, span: Span { str: \"b\", start: 1, end: 2 }, inner: [] }\
385 ] }, \
386 Pair { rule: c, span: Span { str: \"e\", start: 4, end: 5 }, inner: [] }\
387 ]"
388 .to_owned()
389 );
390 }
391
392 #[test]
pairs_display()393 fn pairs_display() {
394 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
395
396 assert_eq!(
397 format!("{}", pairs),
398 "[a(0, 3, [b(1, 2)]), c(4, 5)]".to_owned()
399 );
400 }
401
402 #[test]
iter_for_pairs()403 fn iter_for_pairs() {
404 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
405 assert_eq!(
406 pairs.map(|p| p.as_rule()).collect::<Vec<Rule>>(),
407 vec![Rule::a, Rule::c]
408 );
409 }
410
411 #[test]
double_ended_iter_for_pairs()412 fn double_ended_iter_for_pairs() {
413 let pairs = AbcParser::parse(Rule::a, "abcde").unwrap();
414 assert_eq!(
415 pairs.rev().map(|p| p.as_rule()).collect::<Vec<Rule>>(),
416 vec![Rule::c, Rule::a]
417 );
418 }
419 }
420