• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 //! Different optimizations for pest's ASTs.
11 
12 use crate::ast::*;
13 use std::collections::HashMap;
14 
15 #[cfg(test)]
16 macro_rules! box_tree {
17     ( $node:ident( $(                      $child:ident( $($args:tt)* )     ),+ ) ) => (
18       $node      ( $( Box::new( box_tree!( $child      ( $($args   )* ) ) ) ),+ )
19     );
20     ($expr:expr) => ($expr);
21 }
22 
23 mod concatenator;
24 mod factorizer;
25 mod lister;
26 mod restorer;
27 mod rotater;
28 mod skipper;
29 mod unroller;
30 
31 /// Takes pest's ASTs and optimizes them
optimize(rules: Vec<Rule>) -> Vec<OptimizedRule>32 pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> {
33     let map = to_hash_map(&rules);
34     let optimized: Vec<OptimizedRule> = rules
35         .into_iter()
36         .map(rotater::rotate)
37         .map(|rule| skipper::skip(rule, &map))
38         .map(unroller::unroll)
39         .map(concatenator::concatenate)
40         .map(factorizer::factor)
41         .map(lister::list)
42         .map(rule_to_optimized_rule)
43         .collect();
44 
45     let optimized_map = to_optimized_hash_map(&optimized);
46     optimized
47         .into_iter()
48         .map(|rule| restorer::restore_on_err(rule, &optimized_map))
49         .collect()
50 }
51 
rule_to_optimized_rule(rule: Rule) -> OptimizedRule52 fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
53     fn to_optimized(expr: Expr) -> OptimizedExpr {
54         match expr {
55             Expr::Str(string) => OptimizedExpr::Str(string),
56             Expr::Insens(string) => OptimizedExpr::Insens(string),
57             Expr::Range(start, end) => OptimizedExpr::Range(start, end),
58             Expr::Ident(ident) => OptimizedExpr::Ident(ident),
59             Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
60             Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
61             Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
62             Expr::Seq(lhs, rhs) => {
63                 OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
64             }
65             Expr::Choice(lhs, rhs) => {
66                 OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
67             }
68             Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))),
69             Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
70             Expr::Skip(strings) => OptimizedExpr::Skip(strings),
71             Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
72             #[cfg(feature = "grammar-extras")]
73             Expr::NodeTag(expr, tag) => OptimizedExpr::NodeTag(Box::new(to_optimized(*expr)), tag),
74             #[cfg(feature = "grammar-extras")]
75             Expr::RepOnce(expr) => OptimizedExpr::RepOnce(Box::new(to_optimized(*expr))),
76             #[cfg(not(feature = "grammar-extras"))]
77             Expr::RepOnce(_) => unreachable!("No valid transformation to OptimizedRule"),
78             Expr::RepExact(..) | Expr::RepMin(..) | Expr::RepMax(..) | Expr::RepMinMax(..) => {
79                 unreachable!("No valid transformation to OptimizedRule")
80             }
81         }
82     }
83 
84     OptimizedRule {
85         name: rule.name,
86         ty: rule.ty,
87         expr: to_optimized(rule.expr),
88     }
89 }
90 
91 macro_rules! to_hash_map {
92     ($func_name:ident, $rule:ty, $expr:ty) => {
93         fn $func_name(rules: &[$rule]) -> HashMap<String, $expr> {
94             rules
95                 .iter()
96                 .map(|r| (r.name.clone(), r.expr.clone()))
97                 .collect()
98         }
99     };
100 }
101 to_hash_map!(to_hash_map, Rule, Expr);
102 to_hash_map!(to_optimized_hash_map, OptimizedRule, OptimizedExpr);
103 
104 /// The optimized version of the pest AST's `Rule`.
105 #[derive(Clone, Debug, Eq, PartialEq)]
106 pub struct OptimizedRule {
107     /// The name of the rule.
108     pub name: String,
109     /// The type of the rule.
110     pub ty: RuleType,
111     /// The optimized expression of the rule.
112     pub expr: OptimizedExpr,
113 }
114 
115 /// The optimized version of the pest AST's `Expr`.
116 ///
117 /// # Warning: Semantic Versioning
118 /// There may be non-breaking changes to the meta-grammar
119 /// between minor versions. Those non-breaking changes, however,
120 /// may translate into semver-breaking changes due to the additional variants
121 /// propaged from the `Rule` enum. This is a known issue and will be fixed in the
122 /// future (e.g. by increasing MSRV and non_exhaustive annotations).
123 #[derive(Clone, Debug, Eq, PartialEq)]
124 pub enum OptimizedExpr {
125     /// Matches an exact string, e.g. `"a"`
126     Str(String),
127     /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
128     Insens(String),
129     /// Matches one character in the range, e.g. `'a'..'z'`
130     Range(String, String),
131     /// Matches the rule with the given name, e.g. `a`
132     Ident(String),
133     /// Matches a custom part of the stack, e.g. `PEEK[..]`
134     PeekSlice(i32, Option<i32>),
135     /// Positive lookahead; matches expression without making progress, e.g. `&e`
136     PosPred(Box<OptimizedExpr>),
137     /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
138     NegPred(Box<OptimizedExpr>),
139     /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
140     Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
141     /// Matches either of two expressions, e.g. `e1 | e2`
142     Choice(Box<OptimizedExpr>, Box<OptimizedExpr>),
143     /// Optionally matches an expression, e.g. `e?`
144     Opt(Box<OptimizedExpr>),
145     /// Matches an expression zero or more times, e.g. `e*`
146     Rep(Box<OptimizedExpr>),
147     /// Matches an expression one or more times, e.g. `e+`
148     #[cfg(feature = "grammar-extras")]
149     RepOnce(Box<OptimizedExpr>),
150     /// Continues to match expressions until one of the strings in the `Vec` is found
151     Skip(Vec<String>),
152     /// Matches an expression and pushes it to the stack, e.g. `push(e)`
153     Push(Box<OptimizedExpr>),
154     /// Matches an expression and assigns a label to it, e.g. #label = exp
155     #[cfg(feature = "grammar-extras")]
156     NodeTag(Box<OptimizedExpr>, String),
157     /// Restores an expression's checkpoint
158     RestoreOnErr(Box<OptimizedExpr>),
159 }
160 
161 impl OptimizedExpr {
162     /// Returns a top-down iterator over the `OptimizedExpr`.
iter_top_down(&self) -> OptimizedExprTopDownIterator163     pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator {
164         OptimizedExprTopDownIterator::new(self)
165     }
166 
167     /// Applies `f` to the `OptimizedExpr` top-down.
map_top_down<F>(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr,168     pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr
169     where
170         F: FnMut(OptimizedExpr) -> OptimizedExpr,
171     {
172         fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
173         where
174             F: FnMut(OptimizedExpr) -> OptimizedExpr,
175         {
176             let expr = f(expr);
177 
178             match expr {
179                 OptimizedExpr::PosPred(expr) => {
180                     let mapped = Box::new(map_internal(*expr, f));
181                     OptimizedExpr::PosPred(mapped)
182                 }
183                 OptimizedExpr::NegPred(expr) => {
184                     let mapped = Box::new(map_internal(*expr, f));
185                     OptimizedExpr::NegPred(mapped)
186                 }
187                 OptimizedExpr::Seq(lhs, rhs) => {
188                     let mapped_lhs = Box::new(map_internal(*lhs, f));
189                     let mapped_rhs = Box::new(map_internal(*rhs, f));
190                     OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
191                 }
192                 OptimizedExpr::Choice(lhs, rhs) => {
193                     let mapped_lhs = Box::new(map_internal(*lhs, f));
194                     let mapped_rhs = Box::new(map_internal(*rhs, f));
195                     OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
196                 }
197                 OptimizedExpr::Rep(expr) => {
198                     let mapped = Box::new(map_internal(*expr, f));
199                     OptimizedExpr::Rep(mapped)
200                 }
201                 OptimizedExpr::Opt(expr) => {
202                     let mapped = Box::new(map_internal(*expr, f));
203                     OptimizedExpr::Opt(mapped)
204                 }
205                 OptimizedExpr::Push(expr) => {
206                     let mapped = Box::new(map_internal(*expr, f));
207                     OptimizedExpr::Push(mapped)
208                 }
209                 expr => expr,
210             }
211         }
212 
213         map_internal(self, &mut f)
214     }
215 
216     /// Applies `f` to the `OptimizedExpr` bottom-up.
map_bottom_up<F>(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr,217     pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr
218     where
219         F: FnMut(OptimizedExpr) -> OptimizedExpr,
220     {
221         fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
222         where
223             F: FnMut(OptimizedExpr) -> OptimizedExpr,
224         {
225             let mapped = match expr {
226                 OptimizedExpr::PosPred(expr) => {
227                     let mapped = Box::new(map_internal(*expr, f));
228                     OptimizedExpr::PosPred(mapped)
229                 }
230                 OptimizedExpr::NegPred(expr) => {
231                     let mapped = Box::new(map_internal(*expr, f));
232                     OptimizedExpr::NegPred(mapped)
233                 }
234                 OptimizedExpr::Seq(lhs, rhs) => {
235                     let mapped_lhs = Box::new(map_internal(*lhs, f));
236                     let mapped_rhs = Box::new(map_internal(*rhs, f));
237                     OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
238                 }
239                 OptimizedExpr::Choice(lhs, rhs) => {
240                     let mapped_lhs = Box::new(map_internal(*lhs, f));
241                     let mapped_rhs = Box::new(map_internal(*rhs, f));
242                     OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
243                 }
244                 OptimizedExpr::Rep(expr) => {
245                     let mapped = Box::new(map_internal(*expr, f));
246                     OptimizedExpr::Rep(mapped)
247                 }
248                 OptimizedExpr::Opt(expr) => {
249                     let mapped = Box::new(map_internal(*expr, f));
250                     OptimizedExpr::Opt(mapped)
251                 }
252                 OptimizedExpr::Push(expr) => {
253                     let mapped = Box::new(map_internal(*expr, f));
254                     OptimizedExpr::Push(mapped)
255                 }
256                 expr => expr,
257             };
258 
259             f(mapped)
260         }
261 
262         map_internal(self, &mut f)
263     }
264 }
265 
266 impl core::fmt::Display for OptimizedExpr {
fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result267     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
268         match self {
269             OptimizedExpr::Str(s) => write!(f, "{:?}", s),
270             OptimizedExpr::Insens(s) => write!(f, "^{:?}", s),
271             OptimizedExpr::Range(start, end) => {
272                 let start = start.chars().next().expect("Empty range start.");
273                 let end = end.chars().next().expect("Empty range end.");
274                 write!(f, "({:?}..{:?})", start, end)
275             }
276             OptimizedExpr::Ident(id) => write!(f, "{}", id),
277             OptimizedExpr::PeekSlice(start, end) => match end {
278                 Some(end) => write!(f, "PEEK[{}..{}]", start, end),
279                 None => write!(f, "PEEK[{}..]", start),
280             },
281             OptimizedExpr::PosPred(expr) => write!(f, "&{}", expr.as_ref()),
282             OptimizedExpr::NegPred(expr) => write!(f, "!{}", expr.as_ref()),
283             OptimizedExpr::Seq(lhs, rhs) => {
284                 let mut nodes = Vec::new();
285                 nodes.push(lhs);
286                 let mut current = rhs;
287                 while let OptimizedExpr::Seq(lhs, rhs) = current.as_ref() {
288                     nodes.push(lhs);
289                     current = rhs;
290                 }
291                 nodes.push(current);
292                 let sequence = nodes
293                     .iter()
294                     .map(|node| format!("{}", node))
295                     .collect::<Vec<_>>()
296                     .join(" ~ ");
297                 write!(f, "({})", sequence)
298             }
299             OptimizedExpr::Choice(lhs, rhs) => {
300                 let mut nodes = Vec::new();
301                 nodes.push(lhs);
302                 let mut current = rhs;
303                 while let OptimizedExpr::Choice(lhs, rhs) = current.as_ref() {
304                     nodes.push(lhs);
305                     current = rhs;
306                 }
307                 nodes.push(current);
308                 let sequence = nodes
309                     .iter()
310                     .map(|node| format!("{}", node))
311                     .collect::<Vec<_>>()
312                     .join(" | ");
313                 write!(f, "({})", sequence)
314             }
315             OptimizedExpr::Opt(expr) => write!(f, "{}?", expr),
316             OptimizedExpr::Rep(expr) => write!(f, "{}*", expr),
317             #[cfg(feature = "grammar-extras")]
318             OptimizedExpr::RepOnce(expr) => write!(f, "{}+", expr),
319             OptimizedExpr::Skip(strings) => {
320                 let strings = strings
321                     .iter()
322                     .map(|s| format!("{:?}", s))
323                     .collect::<Vec<_>>()
324                     .join(" | ");
325                 write!(f, "(!({}) ~ ANY)*", strings)
326             }
327             OptimizedExpr::Push(expr) => write!(f, "PUSH({})", expr),
328             #[cfg(feature = "grammar-extras")]
329             OptimizedExpr::NodeTag(expr, tag) => {
330                 write!(f, "(#{} = {})", tag, expr)
331             }
332             OptimizedExpr::RestoreOnErr(expr) => core::fmt::Display::fmt(expr.as_ref(), f),
333         }
334     }
335 }
336 
337 /// A top-down iterator over an `OptimizedExpr`.
338 pub struct OptimizedExprTopDownIterator {
339     current: Option<OptimizedExpr>,
340     next: Option<OptimizedExpr>,
341     right_branches: Vec<OptimizedExpr>,
342 }
343 
344 impl OptimizedExprTopDownIterator {
345     /// Creates a new top down iterator from an `OptimizedExpr`.
new(expr: &OptimizedExpr) -> Self346     pub fn new(expr: &OptimizedExpr) -> Self {
347         let mut iter = OptimizedExprTopDownIterator {
348             current: None,
349             next: None,
350             right_branches: vec![],
351         };
352         iter.iterate_expr(expr.clone());
353         iter
354     }
355 
iterate_expr(&mut self, expr: OptimizedExpr)356     fn iterate_expr(&mut self, expr: OptimizedExpr) {
357         self.current = Some(expr.clone());
358         match expr {
359             OptimizedExpr::Seq(lhs, rhs) => {
360                 self.right_branches.push(*rhs);
361                 self.next = Some(*lhs);
362             }
363             OptimizedExpr::Choice(lhs, rhs) => {
364                 self.right_branches.push(*rhs);
365                 self.next = Some(*lhs);
366             }
367             OptimizedExpr::PosPred(expr)
368             | OptimizedExpr::NegPred(expr)
369             | OptimizedExpr::Rep(expr)
370             | OptimizedExpr::Opt(expr)
371             | OptimizedExpr::Push(expr) => {
372                 self.next = Some(*expr);
373             }
374             _ => {
375                 self.next = None;
376             }
377         }
378     }
379 }
380 
381 impl Iterator for OptimizedExprTopDownIterator {
382     type Item = OptimizedExpr;
383 
next(&mut self) -> Option<Self::Item>384     fn next(&mut self) -> Option<Self::Item> {
385         let result = self.current.take();
386 
387         if let Some(expr) = self.next.take() {
388             self.iterate_expr(expr);
389         } else if let Some(expr) = self.right_branches.pop() {
390             self.iterate_expr(expr);
391         }
392 
393         result
394     }
395 }
396 
397 #[cfg(test)]
398 mod tests {
399     use super::*;
400 
401     #[test]
rotate()402     fn rotate() {
403         let rules = {
404             use crate::ast::Expr::*;
405             vec![Rule {
406                 name: "rule".to_owned(),
407                 ty: RuleType::Normal,
408                 expr: box_tree!(Choice(
409                     Choice(
410                         Choice(Str(String::from("a")), Str(String::from("b"))),
411                         Str(String::from("c"))
412                     ),
413                     Str(String::from("d"))
414                 )),
415             }]
416         };
417         let rotated = {
418             use crate::optimizer::OptimizedExpr::*;
419             vec![OptimizedRule {
420                 name: "rule".to_owned(),
421                 ty: RuleType::Normal,
422                 expr: box_tree!(Choice(
423                     Str(String::from("a")),
424                     Choice(
425                         Str(String::from("b")),
426                         Choice(Str(String::from("c")), Str(String::from("d")))
427                     )
428                 )),
429             }]
430         };
431 
432         assert_eq!(optimize(rules), rotated);
433     }
434 
435     #[test]
skip()436     fn skip() {
437         let rules = {
438             use crate::ast::Expr::*;
439             vec![Rule {
440                 name: "rule".to_owned(),
441                 ty: RuleType::Atomic,
442                 expr: box_tree!(Rep(Seq(
443                     NegPred(Choice(Str(String::from("a")), Str(String::from("b")))),
444                     Ident(String::from("ANY"))
445                 ))),
446             }]
447         };
448         let skipped = vec![OptimizedRule {
449             name: "rule".to_owned(),
450             ty: RuleType::Atomic,
451             expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]),
452         }];
453 
454         assert_eq!(optimize(rules), skipped);
455     }
456 
457     #[test]
concat_strings()458     fn concat_strings() {
459         let rules = {
460             use crate::ast::Expr::*;
461             vec![Rule {
462                 name: "rule".to_owned(),
463                 ty: RuleType::Atomic,
464                 expr: box_tree!(Seq(
465                     Seq(Str(String::from("a")), Str(String::from("b"))),
466                     Seq(Str(String::from("c")), Str(String::from("d")))
467                 )),
468             }]
469         };
470         let concatenated = vec![OptimizedRule {
471             name: "rule".to_owned(),
472             ty: RuleType::Atomic,
473             expr: OptimizedExpr::Str(String::from("abcd")),
474         }];
475 
476         assert_eq!(optimize(rules), concatenated);
477     }
478 
479     #[test]
unroll_loop_exact()480     fn unroll_loop_exact() {
481         let rules = vec![Rule {
482             name: "rule".to_owned(),
483             ty: RuleType::Atomic,
484             expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3),
485         }];
486         let unrolled = {
487             use crate::optimizer::OptimizedExpr::*;
488             vec![OptimizedRule {
489                 name: "rule".to_owned(),
490                 ty: RuleType::Atomic,
491                 expr: box_tree!(Seq(
492                     Ident(String::from("a")),
493                     Seq(Ident(String::from("a")), Ident(String::from("a")))
494                 )),
495             }]
496         };
497 
498         assert_eq!(optimize(rules), unrolled);
499     }
500 
501     #[test]
unroll_loop_max()502     fn unroll_loop_max() {
503         let rules = vec![Rule {
504             name: "rule".to_owned(),
505             ty: RuleType::Atomic,
506             expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3),
507         }];
508         let unrolled = {
509             use crate::optimizer::OptimizedExpr::*;
510             vec![OptimizedRule {
511                 name: "rule".to_owned(),
512                 ty: RuleType::Atomic,
513                 expr: box_tree!(Seq(
514                     Opt(Str(String::from("a"))),
515                     Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a"))))
516                 )),
517             }]
518         };
519 
520         assert_eq!(optimize(rules), unrolled);
521     }
522 
523     #[test]
unroll_loop_min()524     fn unroll_loop_min() {
525         let rules = vec![Rule {
526             name: "rule".to_owned(),
527             ty: RuleType::Atomic,
528             expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2),
529         }];
530         let unrolled = {
531             use crate::optimizer::OptimizedExpr::*;
532             vec![OptimizedRule {
533                 name: "rule".to_owned(),
534                 ty: RuleType::Atomic,
535                 expr: box_tree!(Seq(
536                     Str(String::from("a")),
537                     Seq(Str(String::from("a")), Rep(Str(String::from("a"))))
538                 )),
539             }]
540         };
541 
542         assert_eq!(optimize(rules), unrolled);
543     }
544 
545     #[test]
unroll_loop_min_max()546     fn unroll_loop_min_max() {
547         let rules = vec![Rule {
548             name: "rule".to_owned(),
549             ty: RuleType::Atomic,
550             expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3),
551         }];
552         let unrolled = {
553             use crate::optimizer::OptimizedExpr::*;
554             vec![OptimizedRule {
555                 name: "rule".to_owned(),
556                 ty: RuleType::Atomic,
557                 /* TODO possible room for improvement here:
558                  * if the sequences were rolled out in the opposite
559                  * order, we could further optimize the strings
560                  * in cases like this.
561                 Str(String::from(("aa")),
562                 Opt(Str(String::from("a")))
563                 */
564                 expr: box_tree!(Seq(
565                     Str(String::from("a")),
566                     Seq(Str(String::from("a")), Opt(Str(String::from("a"))))
567                 )),
568             }]
569         };
570 
571         assert_eq!(optimize(rules), unrolled);
572     }
573 
574     #[test]
concat_insensitive_strings()575     fn concat_insensitive_strings() {
576         let rules = {
577             use crate::ast::Expr::*;
578             vec![Rule {
579                 name: "rule".to_owned(),
580                 ty: RuleType::Atomic,
581                 expr: box_tree!(Seq(
582                     Seq(Insens(String::from("a")), Insens(String::from("b"))),
583                     Seq(Insens(String::from("c")), Insens(String::from("d")))
584                 )),
585             }]
586         };
587         let concatenated = vec![OptimizedRule {
588             name: "rule".to_owned(),
589             ty: RuleType::Atomic,
590             expr: OptimizedExpr::Insens(String::from("abcd")),
591         }];
592 
593         assert_eq!(optimize(rules), concatenated);
594     }
595 
596     #[test]
long_common_sequence()597     fn long_common_sequence() {
598         let rules = {
599             use crate::ast::Expr::*;
600             vec![Rule {
601                 name: "rule".to_owned(),
602                 ty: RuleType::Silent,
603                 expr: box_tree!(Choice(
604                     Seq(
605                         Ident(String::from("a")),
606                         Seq(Ident(String::from("b")), Ident(String::from("c")))
607                     ),
608                     Seq(
609                         Seq(Ident(String::from("a")), Ident(String::from("b"))),
610                         Ident(String::from("d"))
611                     )
612                 )),
613             }]
614         };
615         let optimized = {
616             use crate::optimizer::OptimizedExpr::*;
617             vec![OptimizedRule {
618                 name: "rule".to_owned(),
619                 ty: RuleType::Silent,
620                 expr: box_tree!(Seq(
621                     Ident(String::from("a")),
622                     Seq(
623                         Ident(String::from("b")),
624                         Choice(Ident(String::from("c")), Ident(String::from("d")))
625                     )
626                 )),
627             }]
628         };
629 
630         assert_eq!(optimize(rules), optimized);
631     }
632 
633     #[test]
short_common_sequence()634     fn short_common_sequence() {
635         let rules = {
636             use crate::ast::Expr::*;
637             vec![Rule {
638                 name: "rule".to_owned(),
639                 ty: RuleType::Atomic,
640                 expr: box_tree!(Choice(
641                     Seq(Ident(String::from("a")), Ident(String::from("b"))),
642                     Ident(String::from("a"))
643                 )),
644             }]
645         };
646         let optimized = {
647             use crate::optimizer::OptimizedExpr::*;
648             vec![OptimizedRule {
649                 name: "rule".to_owned(),
650                 ty: RuleType::Atomic,
651                 expr: box_tree!(Seq(Ident(String::from("a")), Opt(Ident(String::from("b"))))),
652             }]
653         };
654 
655         assert_eq!(optimize(rules), optimized);
656     }
657 
658     #[test]
impossible_common_sequence()659     fn impossible_common_sequence() {
660         let rules = {
661             use crate::ast::Expr::*;
662             vec![Rule {
663                 name: "rule".to_owned(),
664                 ty: RuleType::Silent,
665                 expr: box_tree!(Choice(
666                     Ident(String::from("a")),
667                     Seq(Ident(String::from("a")), Ident(String::from("b")))
668                 )),
669             }]
670         };
671         let optimized = {
672             use crate::optimizer::OptimizedExpr::*;
673             vec![OptimizedRule {
674                 name: "rule".to_owned(),
675                 ty: RuleType::Silent,
676                 expr: box_tree!(Ident(String::from("a"))),
677             }]
678         };
679 
680         assert_eq!(optimize(rules), optimized);
681     }
682 
683     #[test]
lister()684     fn lister() {
685         let rules = {
686             use crate::ast::Expr::*;
687             vec![Rule {
688                 name: "rule".to_owned(),
689                 ty: RuleType::Silent,
690                 expr: box_tree!(Seq(
691                     Rep(Seq(Ident(String::from("a")), Ident(String::from("b")))),
692                     Ident(String::from("a"))
693                 )),
694             }]
695         };
696         let optimized = {
697             use crate::optimizer::OptimizedExpr::*;
698             vec![OptimizedRule {
699                 name: "rule".to_owned(),
700                 ty: RuleType::Silent,
701                 expr: box_tree!(Seq(
702                     Ident(String::from("a")),
703                     Rep(Seq(Ident(String::from("b")), Ident(String::from("a"))))
704                 )),
705             }]
706         };
707 
708         assert_eq!(optimize(rules), optimized);
709     }
710 
711     mod display {
712         use super::super::*;
713         /// In previous implementation of Display for OptimizedExpr
714         /// in commit 48e0a8bd3d43a17c1c78f099610b745d18ec0c5f (actually committed by me),
715         /// Str("\n") will be displayed as
716         /// "
717         /// "
718         ///
719         /// It will not break the compilation in normal use.
720         ///
721         /// But when I use it in automatically generating documents,
722         /// it will quite confusing and we'll be unable to distinguish \n and \r.
723         ///
724         /// And `cargo expand` will emit codes that can't be compiled,
725         /// for it expand `#[doc("...")]` to `/// ...`,
726         /// and when the document comment breaks the line,
727         /// it will be expanded into wrong codes.
728         #[test]
control_character()729         fn control_character() {
730             assert_eq!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\\n\"");
731             assert_eq!(
732                 OptimizedExpr::Insens("\n".to_owned()).to_string(),
733                 "^\"\\n\"",
734             );
735             assert_eq!(
736                 OptimizedExpr::Range("\n".to_owned(), "\r".to_owned()).to_string(),
737                 "('\\n'..'\\r')",
738             );
739             assert_eq!(
740                 OptimizedExpr::Skip(vec![
741                     "\n".to_owned(),
742                     "\r".to_owned(),
743                     "\n\r".to_owned(),
744                     "\0".to_owned(),
745                 ])
746                 .to_string(),
747                 r#"(!("\n" | "\r" | "\n\r" | "\0") ~ ANY)*"#,
748             );
749 
750             assert_ne!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\n\"");
751         }
752 
753         #[test]
str()754         fn str() {
755             assert_eq!(OptimizedExpr::Str("a".to_owned()).to_string(), r#""a""#);
756         }
757 
758         #[test]
insens()759         fn insens() {
760             assert_eq!(OptimizedExpr::Insens("a".to_owned()).to_string(), r#"^"a""#);
761         }
762 
763         #[test]
range()764         fn range() {
765             assert_eq!(
766                 OptimizedExpr::Range("a".to_owned(), "z".to_owned()).to_string(),
767                 r#"('a'..'z')"#,
768             );
769         }
770 
771         #[test]
ident()772         fn ident() {
773             assert_eq!(OptimizedExpr::Ident("a".to_owned()).to_string(), r#"a"#);
774         }
775 
776         #[test]
peek_slice()777         fn peek_slice() {
778             assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
779             assert_eq!(
780                 OptimizedExpr::PeekSlice(0, Some(-1)).to_string(),
781                 "PEEK[0..-1]",
782             );
783             assert_eq!(
784                 OptimizedExpr::PeekSlice(2, Some(3)).to_string(),
785                 "PEEK[2..3]",
786             );
787             assert_eq!(
788                 OptimizedExpr::PeekSlice(2, Some(-1)).to_string(),
789                 "PEEK[2..-1]",
790             );
791             assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
792         }
793 
794         #[test]
pos_pred()795         fn pos_pred() {
796             assert_eq!(
797                 OptimizedExpr::PosPred(Box::new(OptimizedExpr::NegPred(Box::new(
798                     OptimizedExpr::Ident("a".to_owned()),
799                 ))))
800                 .to_string(),
801                 "&!a",
802             );
803             assert_eq!(
804                 OptimizedExpr::PosPred(Box::new(OptimizedExpr::Choice(
805                     Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident(
806                         "a".to_owned(),
807                     )))),
808                     Box::new(OptimizedExpr::Str("a".to_owned())),
809                 )))
810                 .to_string(),
811                 r#"&(a* | "a")"#,
812             );
813             assert_eq!(
814                 OptimizedExpr::PosPred(Box::new(OptimizedExpr::RestoreOnErr(Box::new(
815                     OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("a".to_owned()))),
816                 ))))
817                 .to_string(),
818                 "&!a",
819             );
820         }
821 
822         #[test]
neg_pred()823         fn neg_pred() {
824             assert_eq!(
825                 OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
826                 r#"!e"#,
827             );
828             assert_eq!(
829                 OptimizedExpr::NegPred(Box::new(OptimizedExpr::Choice(
830                     Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
831                         "a".to_owned(),
832                     )))),
833                     Box::new(OptimizedExpr::Str("a".to_owned())),
834                 )))
835                 .to_string(),
836                 r#"!(PUSH(a) | "a")"#,
837             );
838         }
839 
840         #[test]
seq()841         fn seq() {
842             assert_eq!(
843                 OptimizedExpr::Seq(
844                     Box::new(OptimizedExpr::Ident("e1".to_owned())),
845                     Box::new(OptimizedExpr::Ident("e2".to_owned())),
846                 )
847                 .to_string(),
848                 r#"(e1 ~ e2)"#,
849             );
850             assert_eq!(
851                 Expr::Seq(
852                     Box::new(Expr::Ident("e1".to_owned())),
853                     Box::new(Expr::Seq(
854                         Box::new(Expr::Ident("e2".to_owned())),
855                         Box::new(Expr::Ident("e3".to_owned())),
856                     )),
857                 )
858                 .to_string(),
859                 "(e1 ~ e2 ~ e3)",
860             );
861             assert_eq!(
862                 Expr::Seq(
863                     Box::new(Expr::Ident("e1".to_owned())),
864                     Box::new(Expr::Seq(
865                         Box::new(Expr::Ident("e2".to_owned())),
866                         Box::new(Expr::Seq(
867                             Box::new(Expr::Ident("e3".to_owned())),
868                             Box::new(Expr::Ident("e4".to_owned())),
869                         )),
870                     )),
871                 )
872                 .to_string(),
873                 "(e1 ~ e2 ~ e3 ~ e4)",
874             );
875             assert_eq!(
876                 Expr::Seq(
877                     Box::new(Expr::Ident("e1".to_owned())),
878                     Box::new(Expr::Choice(
879                         Box::new(Expr::Ident("e2".to_owned())),
880                         Box::new(Expr::Seq(
881                             Box::new(Expr::Ident("e3".to_owned())),
882                             Box::new(Expr::Ident("e4".to_owned())),
883                         )),
884                     )),
885                 )
886                 .to_string(),
887                 "(e1 ~ (e2 | (e3 ~ e4)))",
888             );
889             assert_eq!(
890                 Expr::Seq(
891                     Box::new(Expr::Ident("e1".to_owned())),
892                     Box::new(Expr::Seq(
893                         Box::new(Expr::Ident("e2".to_owned())),
894                         Box::new(Expr::Choice(
895                             Box::new(Expr::Ident("e3".to_owned())),
896                             Box::new(Expr::Ident("e4".to_owned())),
897                         )),
898                     )),
899                 )
900                 .to_string(),
901                 "(e1 ~ e2 ~ (e3 | e4))",
902             );
903             assert_eq!(
904                 OptimizedExpr::Seq(
905                     Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Str(
906                         "a".to_owned(),
907                     )))),
908                     Box::new(OptimizedExpr::Seq(
909                         Box::new(OptimizedExpr::Ident("b".to_owned())),
910                         Box::new(OptimizedExpr::Insens("c".to_owned())),
911                     )),
912                 )
913                 .to_string(),
914                 r#"("a"* ~ b ~ ^"c")"#,
915             );
916             assert_eq!(
917                 OptimizedExpr::Seq(
918                     Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Range(
919                         "a".to_owned(),
920                         "z".to_owned(),
921                     )))),
922                     Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Opt(
923                         Box::new(OptimizedExpr::Range("A".to_owned(), "Z".to_owned())),
924                     )))),
925                 )
926                 .to_string(),
927                 "(&('a'..'z') ~ !('A'..'Z')?)",
928             );
929         }
930 
931         #[test]
choice()932         fn choice() {
933             assert_eq!(
934                 OptimizedExpr::Choice(
935                     Box::new(OptimizedExpr::Ident("e1".to_owned())),
936                     Box::new(OptimizedExpr::Ident("e2".to_owned())),
937                 )
938                 .to_string(),
939                 r#"(e1 | e2)"#,
940             );
941             assert_eq!(
942                 Expr::Choice(
943                     Box::new(Expr::Ident("e1".to_owned())),
944                     Box::new(Expr::Choice(
945                         Box::new(Expr::Ident("e2".to_owned())),
946                         Box::new(Expr::Ident("e3".to_owned())),
947                     )),
948                 )
949                 .to_string(),
950                 "(e1 | e2 | e3)",
951             );
952             assert_eq!(
953                 Expr::Choice(
954                     Box::new(Expr::Ident("e1".to_owned())),
955                     Box::new(Expr::Choice(
956                         Box::new(Expr::Ident("e2".to_owned())),
957                         Box::new(Expr::Choice(
958                             Box::new(Expr::Ident("e3".to_owned())),
959                             Box::new(Expr::Ident("e4".to_owned())),
960                         )),
961                     )),
962                 )
963                 .to_string(),
964                 "(e1 | e2 | e3 | e4)",
965             );
966             assert_eq!(
967                 Expr::Choice(
968                     Box::new(Expr::Ident("e1".to_owned())),
969                     Box::new(Expr::Seq(
970                         Box::new(Expr::Ident("e2".to_owned())),
971                         Box::new(Expr::Choice(
972                             Box::new(Expr::Ident("e3".to_owned())),
973                             Box::new(Expr::Ident("e4".to_owned())),
974                         )),
975                     )),
976                 )
977                 .to_string(),
978                 "(e1 | (e2 ~ (e3 | e4)))",
979             );
980             assert_eq!(
981                 OptimizedExpr::Choice(
982                     Box::new(OptimizedExpr::Str("a".to_owned())),
983                     Box::new(OptimizedExpr::Choice(
984                         Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
985                             "b".to_owned(),
986                         )))),
987                         Box::new(OptimizedExpr::Insens("c".to_owned())),
988                     )),
989                 )
990                 .to_string(),
991                 r#"("a" | PUSH(b) | ^"c")"#,
992             );
993         }
994 
995         #[test]
opt()996         fn opt() {
997             assert_eq!(
998                 OptimizedExpr::Opt(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
999                 "e?",
1000             );
1001         }
1002 
1003         #[test]
rep()1004         fn rep() {
1005             assert_eq!(
1006                 OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident("x".to_owned()))).to_string(),
1007                 "x*",
1008             );
1009             assert_eq!(
1010                 OptimizedExpr::Rep(Box::new(OptimizedExpr::Range(
1011                     "0".to_owned(),
1012                     "9".to_owned(),
1013                 )))
1014                 .to_string(),
1015                 "('0'..'9')*",
1016             );
1017         }
1018 
1019         #[test]
1020         #[cfg(feature = "grammar-extras")]
rep_once()1021         fn rep_once() {
1022             assert_eq!(
1023                 OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1024                 "e+",
1025             );
1026             assert_eq!(
1027                 OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Range(
1028                     "0".to_owned(),
1029                     "9".to_owned(),
1030                 )))
1031                 .to_string(),
1032                 "('0'..'9')+",
1033             );
1034         }
1035 
1036         #[test]
skip()1037         fn skip() {
1038             assert_eq!(
1039                 OptimizedExpr::Skip(
1040                     ["a", "bc"]
1041                         .into_iter()
1042                         .map(|s| s.to_owned())
1043                         .collect::<Vec<_>>(),
1044                 )
1045                 .to_string(),
1046                 r#"(!("a" | "bc") ~ ANY)*"#,
1047             );
1048         }
1049 
1050         #[test]
inline_skip()1051         fn inline_skip() {
1052             use crate::ast::Expr::*;
1053             let rules = vec![
1054                 Rule {
1055                     name: "inline".to_owned(),
1056                     ty: RuleType::Atomic,
1057                     expr: Str("a".to_owned()),
1058                 },
1059                 Rule {
1060                     name: "skip".to_owned(),
1061                     ty: RuleType::Atomic,
1062                     expr: box_tree!(Rep(Seq(
1063                         NegPred(Choice(
1064                             Ident(String::from("inline")),
1065                             Str(String::from("b"))
1066                         )),
1067                         Ident("ANY".to_owned())
1068                     ))),
1069                 },
1070             ];
1071             let map = to_hash_map(&rules);
1072             let rule = skipper::skip(rules[1].clone(), &map);
1073             assert!(matches!(rule, Rule { expr: Skip(..), .. }));
1074             let choices = match rule.expr {
1075                 Skip(choices) => choices,
1076                 _ => unreachable!(),
1077             };
1078             assert_eq!(choices, vec!["a".to_owned(), "b".to_owned()]);
1079         }
1080 
1081         #[test]
push()1082         fn push() {
1083             assert_eq!(
1084                 OptimizedExpr::Push(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1085                 "PUSH(e)",
1086             );
1087         }
1088 
1089         #[test]
1090         #[cfg(feature = "grammar-extras")]
node_tag()1091         fn node_tag() {
1092             assert_eq!(
1093                 OptimizedExpr::NodeTag(
1094                     Box::new(OptimizedExpr::Ident("expr".to_owned())),
1095                     "label".to_owned(),
1096                 )
1097                 .to_string(),
1098                 r#"(#label = expr)"#,
1099             );
1100             assert_eq!(
1101                 OptimizedExpr::NodeTag(
1102                     Box::new(OptimizedExpr::Ident("x".to_owned())),
1103                     "X".to_owned(),
1104                 )
1105                 .to_string(),
1106                 r#"(#X = x)"#,
1107             );
1108             assert_eq!(
1109                 OptimizedExpr::NodeTag(
1110                     Box::new(OptimizedExpr::Seq(
1111                         Box::new(OptimizedExpr::Ident("x".to_owned())),
1112                         Box::new(OptimizedExpr::Str("y".to_owned())),
1113                     )),
1114                     "X".to_owned(),
1115                 )
1116                 .to_string(),
1117                 r#"(#X = (x ~ "y"))"#,
1118             );
1119         }
1120 
1121         #[test]
restore_on_err()1122         fn restore_on_err() {
1123             assert_eq!(
1124                 OptimizedExpr::RestoreOnErr(Box::new(OptimizedExpr::Ident("e".to_owned())))
1125                     .to_string(),
1126                 "e",
1127             );
1128         }
1129     }
1130 }
1131