1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9
10 use alloc::format;
11 use alloc::rc::Rc;
12 #[cfg(feature = "pretty-print")]
13 use alloc::string::String;
14 use alloc::vec::Vec;
15 use core::fmt;
16 use core::hash::{Hash, Hasher};
17 use core::ptr;
18 use core::str;
19
20 #[cfg(feature = "pretty-print")]
21 use serde::ser::SerializeStruct;
22
23 use super::line_index::LineIndex;
24 use super::pairs::{self, Pairs};
25 use super::queueable_token::QueueableToken;
26 use super::tokens::{self, Tokens};
27 use crate::span::{self, Span};
28 use crate::RuleType;
29
30 /// A matching pair of [`Token`]s and everything between them.
31 ///
32 /// A matching `Token` pair is formed by a `Token::Start` and a subsequent `Token::End` with the
33 /// same `Rule`, with the condition that all `Token`s between them can form such pairs as well.
34 /// This is similar to the [brace matching problem](https://en.wikipedia.org/wiki/Brace_matching) in
35 /// editors.
36 ///
37 /// [`Token`]: ../enum.Token.html
38 #[derive(Clone)]
39 pub struct Pair<'i, R> {
40 /// # Safety
41 ///
42 /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
43 queue: Rc<Vec<QueueableToken<R>>>,
44 input: &'i str,
45 /// Token index into `queue`.
46 start: usize,
47 line_index: Rc<LineIndex>,
48 }
49
50 /// # Safety
51 ///
52 /// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
new<R: RuleType>( queue: Rc<Vec<QueueableToken<R>>>, input: &str, line_index: Rc<LineIndex>, start: usize, ) -> Pair<'_, R>53 pub unsafe fn new<R: RuleType>(
54 queue: Rc<Vec<QueueableToken<R>>>,
55 input: &str,
56 line_index: Rc<LineIndex>,
57 start: usize,
58 ) -> Pair<'_, R> {
59 Pair {
60 queue,
61 input,
62 start,
63 line_index,
64 }
65 }
66
67 impl<'i, R: RuleType> Pair<'i, R> {
68 /// Returns the `Rule` of the `Pair`.
69 ///
70 /// # Examples
71 ///
72 /// ```
73 /// # use std::rc::Rc;
74 /// # use pest;
75 /// # #[allow(non_camel_case_types)]
76 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
77 /// enum Rule {
78 /// a
79 /// }
80 ///
81 /// let input = "";
82 /// let pair = pest::state(input, |state| {
83 /// // generating Token pair with Rule::a ...
84 /// # state.rule(Rule::a, |s| Ok(s))
85 /// }).unwrap().next().unwrap();
86 ///
87 /// assert_eq!(pair.as_rule(), Rule::a);
88 /// ```
89 #[inline]
as_rule(&self) -> R90 pub fn as_rule(&self) -> R {
91 match self.queue[self.pair()] {
92 QueueableToken::End { rule, .. } => rule,
93 _ => unreachable!(),
94 }
95 }
96
97 /// Captures a slice from the `&str` defined by the token `Pair`.
98 ///
99 /// # Examples
100 ///
101 /// ```
102 /// # use std::rc::Rc;
103 /// # use pest;
104 /// # #[allow(non_camel_case_types)]
105 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
106 /// enum Rule {
107 /// ab
108 /// }
109 ///
110 /// let input = "ab";
111 /// let pair = pest::state(input, |state| {
112 /// // generating Token pair with Rule::ab ...
113 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
114 /// }).unwrap().next().unwrap();
115 ///
116 /// assert_eq!(pair.as_str(), "ab");
117 /// ```
118 #[inline]
as_str(&self) -> &'i str119 pub fn as_str(&self) -> &'i str {
120 let start = self.pos(self.start);
121 let end = self.pos(self.pair());
122
123 // Generated positions always come from Positions and are UTF-8 borders.
124 &self.input[start..end]
125 }
126
127 /// Returns the `Span` defined by the `Pair`, consuming it.
128 ///
129 /// # Examples
130 ///
131 /// ```
132 /// # use std::rc::Rc;
133 /// # use pest;
134 /// # #[allow(non_camel_case_types)]
135 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
136 /// enum Rule {
137 /// ab
138 /// }
139 ///
140 /// let input = "ab";
141 /// let pair = pest::state(input, |state| {
142 /// // generating Token pair with Rule::ab ...
143 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
144 /// }).unwrap().next().unwrap();
145 ///
146 /// assert_eq!(pair.into_span().as_str(), "ab");
147 /// ```
148 #[inline]
149 #[deprecated(since = "2.0.0", note = "Please use `as_span` instead")]
into_span(self) -> Span<'i>150 pub fn into_span(self) -> Span<'i> {
151 self.as_span()
152 }
153
154 /// Returns the `Span` defined by the `Pair`, **without** consuming it.
155 ///
156 /// # Examples
157 ///
158 /// ```
159 /// # use std::rc::Rc;
160 /// # use pest;
161 /// # #[allow(non_camel_case_types)]
162 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
163 /// enum Rule {
164 /// ab
165 /// }
166 ///
167 /// let input = "ab";
168 /// let pair = pest::state(input, |state| {
169 /// // generating Token pair with Rule::ab ...
170 /// # state.rule(Rule::ab, |s| s.match_string("ab"))
171 /// }).unwrap().next().unwrap();
172 ///
173 /// assert_eq!(pair.as_span().as_str(), "ab");
174 /// ```
175 #[inline]
as_span(&self) -> Span<'i>176 pub fn as_span(&self) -> Span<'i> {
177 let start = self.pos(self.start);
178 let end = self.pos(self.pair());
179
180 // Generated positions always come from Positions and are UTF-8 borders.
181 unsafe { span::Span::new_unchecked(self.input, start, end) }
182 }
183
184 /// Returns the inner `Pairs` between the `Pair`, consuming it.
185 ///
186 /// # Examples
187 ///
188 /// ```
189 /// # use std::rc::Rc;
190 /// # use pest;
191 /// # #[allow(non_camel_case_types)]
192 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
193 /// enum Rule {
194 /// a
195 /// }
196 ///
197 /// let input = "";
198 /// let pair = pest::state(input, |state| {
199 /// // generating Token pair with Rule::a ...
200 /// # state.rule(Rule::a, |s| Ok(s))
201 /// }).unwrap().next().unwrap();
202 ///
203 /// assert!(pair.into_inner().next().is_none());
204 /// ```
205 #[inline]
into_inner(self) -> Pairs<'i, R>206 pub fn into_inner(self) -> Pairs<'i, R> {
207 let pair = self.pair();
208
209 pairs::new(
210 self.queue,
211 self.input,
212 Some(self.line_index),
213 self.start + 1,
214 pair,
215 )
216 }
217
218 /// Returns the `Tokens` for the `Pair`.
219 ///
220 /// # Examples
221 ///
222 /// ```
223 /// # use std::rc::Rc;
224 /// # use pest;
225 /// # #[allow(non_camel_case_types)]
226 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
227 /// enum Rule {
228 /// a
229 /// }
230 ///
231 /// let input = "";
232 /// let pair = pest::state(input, |state| {
233 /// // generating Token pair with Rule::a ...
234 /// # state.rule(Rule::a, |s| Ok(s))
235 /// }).unwrap().next().unwrap();
236 /// let tokens: Vec<_> = pair.tokens().collect();
237 ///
238 /// assert_eq!(tokens.len(), 2);
239 /// ```
240 #[inline]
tokens(self) -> Tokens<'i, R>241 pub fn tokens(self) -> Tokens<'i, R> {
242 let end = self.pair();
243
244 tokens::new(self.queue, self.input, self.start, end + 1)
245 }
246
247 /// Generates a string that stores the lexical information of `self` in
248 /// a pretty-printed JSON format.
249 #[cfg(feature = "pretty-print")]
to_json(&self) -> String250 pub fn to_json(&self) -> String {
251 ::serde_json::to_string_pretty(self).expect("Failed to pretty-print Pair to json.")
252 }
253
254 /// Returns the `line`, `col` of this pair start.
line_col(&self) -> (usize, usize)255 pub fn line_col(&self) -> (usize, usize) {
256 let pos = self.pos(self.start);
257 self.line_index.line_col(self.input, pos)
258 }
259
pair(&self) -> usize260 fn pair(&self) -> usize {
261 match self.queue[self.start] {
262 QueueableToken::Start {
263 end_token_index, ..
264 } => end_token_index,
265 _ => unreachable!(),
266 }
267 }
268
pos(&self, index: usize) -> usize269 fn pos(&self, index: usize) -> usize {
270 match self.queue[index] {
271 QueueableToken::Start { input_pos, .. } | QueueableToken::End { input_pos, .. } => {
272 input_pos
273 }
274 }
275 }
276 }
277
278 impl<'i, R: RuleType> Pairs<'i, R> {
279 /// Create a new `Pairs` iterator containing just the single `Pair`.
single(pair: Pair<'i, R>) -> Self280 pub fn single(pair: Pair<'i, R>) -> Self {
281 let end = pair.pair();
282 pairs::new(
283 pair.queue,
284 pair.input,
285 Some(pair.line_index),
286 pair.start,
287 end,
288 )
289 }
290 }
291
292 impl<'i, R: RuleType> fmt::Debug for Pair<'i, R> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result293 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
294 f.debug_struct("Pair")
295 .field("rule", &self.as_rule())
296 .field("span", &self.as_span())
297 .field("inner", &self.clone().into_inner().collect::<Vec<_>>())
298 .finish()
299 }
300 }
301
302 impl<'i, R: RuleType> fmt::Display for Pair<'i, R> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result303 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
304 let rule = self.as_rule();
305 let start = self.pos(self.start);
306 let end = self.pos(self.pair());
307 let mut pairs = self.clone().into_inner().peekable();
308
309 if pairs.peek().is_none() {
310 write!(f, "{:?}({}, {})", rule, start, end)
311 } else {
312 write!(
313 f,
314 "{:?}({}, {}, [{}])",
315 rule,
316 start,
317 end,
318 pairs
319 .map(|pair| format!("{}", pair))
320 .collect::<Vec<_>>()
321 .join(", ")
322 )
323 }
324 }
325 }
326
327 impl<'i, R: PartialEq> PartialEq for Pair<'i, R> {
eq(&self, other: &Pair<'i, R>) -> bool328 fn eq(&self, other: &Pair<'i, R>) -> bool {
329 Rc::ptr_eq(&self.queue, &other.queue)
330 && ptr::eq(self.input, other.input)
331 && self.start == other.start
332 }
333 }
334
335 impl<'i, R: Eq> Eq for Pair<'i, R> {}
336
337 impl<'i, R: Hash> Hash for Pair<'i, R> {
hash<H: Hasher>(&self, state: &mut H)338 fn hash<H: Hasher>(&self, state: &mut H) {
339 (&*self.queue as *const Vec<QueueableToken<R>>).hash(state);
340 (self.input as *const str).hash(state);
341 self.start.hash(state);
342 }
343 }
344
345 #[cfg(feature = "pretty-print")]
346 impl<'i, R: RuleType> ::serde::Serialize for Pair<'i, R> {
serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: ::serde::Serializer,347 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
348 where
349 S: ::serde::Serializer,
350 {
351 let start = self.pos(self.start);
352 let end = self.pos(self.pair());
353 let rule = format!("{:?}", self.as_rule());
354 let inner = self.clone().into_inner();
355
356 let mut ser = serializer.serialize_struct("Pairs", 3)?;
357 ser.serialize_field("pos", &(start, end))?;
358 ser.serialize_field("rule", &rule)?;
359
360 if inner.peek().is_none() {
361 ser.serialize_field("inner", &self.as_str())?;
362 } else {
363 ser.serialize_field("inner", &inner)?;
364 }
365
366 ser.end()
367 }
368 }
369
370 #[cfg(test)]
371 mod tests {
372 use crate::macros::tests::*;
373 use crate::parser::Parser;
374
375 #[test]
376 #[cfg(feature = "pretty-print")]
test_pretty_print()377 fn test_pretty_print() {
378 let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap();
379
380 let expected = r#"{
381 "pos": [
382 0,
383 3
384 ],
385 "rule": "a",
386 "inner": {
387 "pos": [
388 1,
389 2
390 ],
391 "pairs": [
392 {
393 "pos": [
394 1,
395 2
396 ],
397 "rule": "b",
398 "inner": "b"
399 }
400 ]
401 }
402 }"#;
403
404 assert_eq!(expected, pair.to_json());
405 }
406
407 #[test]
pair_into_inner()408 fn pair_into_inner() {
409 let pair = AbcParser::parse(Rule::a, "abcde").unwrap().next().unwrap(); // the tokens a(b())
410
411 let pairs = pair.into_inner(); // the tokens b()
412
413 assert_eq!(2, pairs.tokens().count());
414 }
415 }
416