1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9
10 use std::fmt;
11 use std::hash::{Hash, Hasher};
12 use std::ptr;
13 use std::rc::Rc;
14 use std::str;
15
16 #[cfg(feature = "pretty-print")]
17 use serde::ser::SerializeStruct;
18
19 use super::pairs::{self, Pairs};
20 use super::queueable_token::QueueableToken;
21 use super::tokens::{self, Tokens};
22 use span::{self, Span};
23 use RuleType;
24
25 /// A matching pair of [`Token`]s and everything between them.
26 ///
27 /// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
28 /// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
29 /// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
30 /// editors.
31 ///
32 /// [`Token`]: ../enum.Token.html
33 #[derive(Clone)]
34 pub struct Pair<'i, R> {
35 /// # Safety
36 ///
37 /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
38 queue: Rc<Vec<QueueableToken<R>>>,
39 input: &'i str,
40 /// Token index into `queue`.
41 start: usize,
42 }
43
44 /// # Safety
45 ///
46 /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
new<R: RuleType>( queue: Rc<Vec<QueueableToken<R>>>, input: &str, start: usize, ) -> Pair<R>47 pub unsafe fn new<R: RuleType>(
48 queue: Rc<Vec<QueueableToken<R>>>,
49 input: &str,
50 start: usize,
51 ) -> Pair<R> {
52 Pair {
53 queue,
54 input,
55 start,
56 }
57 }
58
59 impl<'i, R: RuleType> Pair<'i, R> {
60 /// Returns the `Rule` of the `Pair`.
61 ///
62 /// # Examples
63 ///
64 /// ```
65 /// # use std::rc::Rc;
66 /// # use pest;
67 /// # #[allow(non_camel_case_types)]
68 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
69 /// enum Rule {
70 /// a
71 /// }
72 ///
73 /// let input = "";
74 /// let pair = pest::state(input, |state| {
75 /// // generating Token pair with Rule::a ...
76 /// # state.rule(Rule::a, |s| Ok(s))
77 /// }).unwrap().next().unwrap();
78 ///
79 /// assert_eq!(pair.as_rule(), Rule::a);
80 /// ```
81 #[inline]
as_rule(&self) -> R82 pub fn as_rule(&self) -> R {
83 match self.queue[self.pair()] {
84 QueueableToken::End { rule, .. } => rule,
85 _ => unreachable!(),
86 }
87 }
88
89 /// Captures a slice from the `&str` defined by the token `Pair`.
90 ///
91 /// # Examples
92 ///
93 /// ```
94 /// # use std::rc::Rc;
95 /// # use pest;
96 /// # #[allow(non_camel_case_types)]
97 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
98 /// enum Rule {
99 /// ab
100 /// }
101 ///
102 /// let input = "ab";
103 /// let pair = pest::state(input, |state| {
104 /// // generating Token pair with Rule::ab ...
105 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
106 /// }).unwrap().next().unwrap();
107 ///
108 /// assert_eq!(pair.as_str(), "ab");
109 /// ```
110 #[inline]
as_str(&self) -> &'i str111 pub fn as_str(&self) -> &'i str {
112 let start = self.pos(self.start);
113 let end = self.pos(self.pair());
114
115 // Generated positions always come from Positions and are UTF-8 borders.
116 &self.input[start..end]
117 }
118
119 /// Returns the `Span` defined by the `Pair`, consuming it.
120 ///
121 /// # Examples
122 ///
123 /// ```
124 /// # use std::rc::Rc;
125 /// # use pest;
126 /// # #[allow(non_camel_case_types)]
127 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
128 /// enum Rule {
129 /// ab
130 /// }
131 ///
132 /// let input = "ab";
133 /// let pair = pest::state(input, |state| {
134 /// // generating Token pair with Rule::ab ...
135 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
136 /// }).unwrap().next().unwrap();
137 ///
138 /// assert_eq!(pair.into_span().as_str(), "ab");
139 /// ```
140 #[inline]
141 #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
into_span(self) -> Span<'i>142 pub fn into_span(self) -> Span<'i> {
143 self.as_span()
144 }
145
146 /// Returns the `Span` defined by the `Pair`, **without** consuming it.
147 ///
148 /// # Examples
149 ///
150 /// ```
151 /// # use std::rc::Rc;
152 /// # use pest;
153 /// # #[allow(non_camel_case_types)]
154 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
155 /// enum Rule {
156 /// ab
157 /// }
158 ///
159 /// let input = "ab";
160 /// let pair = pest::state(input, |state| {
161 /// // generating Token pair with Rule::ab ...
162 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
163 /// }).unwrap().next().unwrap();
164 ///
165 /// assert_eq!(pair.as_span().as_str(), "ab");
166 /// ```
167 #[inline]
as_span(&self) -> Span<'i>168 pub fn as_span(&self) -> Span<'i> {
169 let start = self.pos(self.start);
170 let end = self.pos(self.pair());
171
172 // Generated positions always come from Positions and are UTF-8 borders.
173 unsafe { span::Span::new_unchecked(self.input, start, end) }
174 }
175
176 /// Returns the inner `Pairs` between the `Pair`, consuming it.
177 ///
178 /// # Examples
179 ///
180 /// ```
181 /// # use std::rc::Rc;
182 /// # use pest;
183 /// # #[allow(non_camel_case_types)]
184 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
185 /// enum Rule {
186 /// a
187 /// }
188 ///
189 /// let input = "";
190 /// let pair = pest::state(input, |state| {
191 /// // generating Token pair with Rule::a ...
192 /// # state.rule(Rule::a, |s| Ok(s))
193 /// }).unwrap().next().unwrap();
194 ///
195 /// assert!(pair.into_inner().next().is_none());
196 /// ```
197 #[inline]
into_inner(self) -> Pairs<'i, R>198 pub fn into_inner(self) -> Pairs<'i, R> {
199 let pair = self.pair();
200
201 pairs::new(self.queue, self.input, self.start + 1, pair)
202 }
203
204 /// Returns the `Tokens` for the `Pair`.
205 ///
206 /// # Examples
207 ///
208 /// ```
209 /// # use std::rc::Rc;
210 /// # use pest;
211 /// # #[allow(non_camel_case_types)]
212 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
213 /// enum Rule {
214 /// a
215 /// }
216 ///
217 /// let input = "";
218 /// let pair = pest::state(input, |state| {
219 /// // generating Token pair with Rule::a ...
220 /// # state.rule(Rule::a, |s| Ok(s))
221 /// }).unwrap().next().unwrap();
222 /// let tokens: Vec<_> = pair.tokens().collect();
223 ///
224 /// assert_eq!(tokens.len(), 2);
225 /// ```
226 #[inline]
tokens(self) -> Tokens<'i, R>227 pub fn tokens(self) -> Tokens<'i, R> {
228 let end = self.pair();
229
230 tokens::new(self.queue, self.input, self.start, end + 1)
231 }
232
233 /// Generates a string that stores the lexical information of `self` in
234 /// a pretty-printed JSON format.
235 #[cfg(feature = "pretty-print")]
to_json(&self) -> String236 pub fn to_json(&self) -> String {
237 ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
238 }
239
pair(&self) -> usize240 fn pair(&self) -> usize {
241 match self.queue[self.start] {
242 QueueableToken::Start {
243 end_token_index, ..
244 } => end_token_index,
245 _ => unreachable!(),
246 }
247 }
248
pos(&self, index: usize) -> usize249 fn pos(&self, index: usize) -> usize {
250 match self.queue[index] {
251 QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
252 input_pos
253 }
254 }
255 }
256 }
257
258 impl<'i, R: RuleType> Pairs<'i, R> {
259 /// Create a new `Pairs` iterator containing just the single `Pair`.
single(pair: Pair<'i, R>) -> Self260 pub fn single(pair: Pair<'i, R>) -> Self {
261 let end = pair.pair();
262 pairs::new(pair.queue, pair.input, pair.start, end)
263 }
264 }
265
266 impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result267 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
268 f.debug_struct("Pair")
269 .field("rule", &self.as_rule())
270 .field("span", &self.as_span())
271 .field("inner", &self.clone().into_inner().collect::<Vec<_>>())
272 .finish()
273 }
274 }
275
276 impl<'i, R: RuleType> fmt::Display for Pair<'i, R> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result277 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
278 let rule = self.as_rule();
279 let start = self.pos(self.start);
280 let end = self.pos(self.pair());
281 let mut pairs = self.clone().into_inner().peekable();
282
283 if pairs.peek().is_none() {
284 write!(f, "{:?}({}, {})", rule, start, end)
285 } else {
286 write!(
287 f,
288 "{:?}({}, {}, [{}])",
289 rule,
290 start,
291 end,
292 pairs
293 .map(|pair| format!("{}", pair))
294 .collect::<Vec<_>>()
295 .join(", ")
296 )
297 }
298 }
299 }
300
301 impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
eq(&self, other: &Pair<'i, R>) -> bool302 fn eq(&self, other: &Pair<'i, R>) -> bool {
303 Rc::ptr_eq(&self.queue, &other.queue)
304 && ptr::eq(self.input, other.input)
305 && self.start == other.start
306 }
307 }
308
309 impl<'i, R: Eq> Eq for Pair<'i, R> {}
310
311 impl<'i, R: Hash> Hash for Pair<'i, R> {
hash<H: Hasher>(&self, state: &mut H)312 fn hash<H: Hasher>(&self, state: &mut H) {
313 (&*self.queue as *const Vec<QueueableToken<R>>).hash(state);
314 (self.input as *const str).hash(state);
315 self.start.hash(state);
316 }
317 }
318
319 #[cfg(feature = "pretty-print")]
320 impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: ::serde::Serializer,321 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
322 where
323 S: ::serde::Serializer,
324 {
325 let start = self.pos(self.start);
326 let end = self.pos(self.pair());
327 let rule = format!("{:?}", self.as_rule());
328 let inner = self.clone().into_inner();
329
330 let mut ser = serializer.serialize_struct("Pairs", 3)?;
331 ser.serialize_field("pos", &(start, end))?;
332 ser.serialize_field("rule", &rule)?;
333
334 if inner.peek().is_none() {
335 ser.serialize_field("inner", &self.as_str())?;
336 } else {
337 ser.serialize_field("inner", &inner)?;
338 }
339
340 ser.end()
341 }
342 }
343
344 #[cfg(test)]
345 mod tests {
346 use macros::tests::*;
347 use parser::Parser;
348
349 #[test]
350 #[cfg(feature = "pretty-print")]
test_pretty_print()351 fn test_pretty_print() {
352 let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
353
354 let expected = r#"{
355 "pos": [
356 0,
357 3
358 ],
359 "rule": "a",
360 "inner": {
361 "pos": [
362 1,
363 2
364 ],
365 "pairs": [
366 {
367 "pos": [
368 1,
369 2
370 ],
371 "rule": "b",
372 "inner": "b"
373 }
374 ]
375 }
376 }"#;
377
378 assert_eq!(expected, pair.to_json());
379 }
380
381 #[test]
pair_into_inner()382 fn pair_into_inner() {
383 let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
384
385 let pairs = pair.into_inner(); // the tokens b()
386
387 assert_eq!(2, pairs.tokens().count());
388 }
389 }
390