• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // pest. The Elegant Parser
2 // Copyright (c) 2018 Dragoș Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 //! Different optimizations for pest's ASTs.
11 
12 use crate::ast::*;
13 use std::collections::HashMap;
14 
15 #[cfg(test)]
16 macro_rules! box_tree {
17     ( $node:ident( $(                      $child:ident( $($args:tt)* )     ),+ ) ) => (
18       $node      ( $( Box::new( box_tree!( $child      ( $($args   )* ) ) ) ),+ )
19     );
20     ($expr:expr) => ($expr);
21 }
22 
23 mod concatenator;
24 mod factorizer;
25 mod lister;
26 mod restorer;
27 mod rotater;
28 mod skipper;
29 mod unroller;
30 
31 /// Takes pest's ASTs and optimizes them
optimize(rules: Vec<Rule>) -> Vec<OptimizedRule>32 pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> {
33     let optimized: Vec<OptimizedRule> = rules
34         .into_iter()
35         .map(rotater::rotate)
36         .map(skipper::skip)
37         .map(unroller::unroll)
38         .map(concatenator::concatenate)
39         .map(factorizer::factor)
40         .map(lister::list)
41         .map(rule_to_optimized_rule)
42         .collect();
43 
44     let rules = to_hash_map(&optimized);
45     optimized
46         .into_iter()
47         .map(|rule| restorer::restore_on_err(rule, &rules))
48         .collect()
49 }
50 
rule_to_optimized_rule(rule: Rule) -> OptimizedRule51 fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
52     fn to_optimized(expr: Expr) -> OptimizedExpr {
53         match expr {
54             Expr::Str(string) => OptimizedExpr::Str(string),
55             Expr::Insens(string) => OptimizedExpr::Insens(string),
56             Expr::Range(start, end) => OptimizedExpr::Range(start, end),
57             Expr::Ident(ident) => OptimizedExpr::Ident(ident),
58             Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
59             Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
60             Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
61             Expr::Seq(lhs, rhs) => {
62                 OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
63             }
64             Expr::Choice(lhs, rhs) => {
65                 OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
66             }
67             Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))),
68             Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
69             Expr::Skip(strings) => OptimizedExpr::Skip(strings),
70             Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
71             Expr::RepOnce(_)
72             | Expr::RepExact(..)
73             | Expr::RepMin(..)
74             | Expr::RepMax(..)
75             | Expr::RepMinMax(..) => unreachable!("No valid transformation to OptimizedRule"),
76         }
77     }
78 
79     OptimizedRule {
80         name: rule.name,
81         ty: rule.ty,
82         expr: to_optimized(rule.expr),
83     }
84 }
85 
to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr>86 fn to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr> {
87     rules
88         .iter()
89         .map(|r| (r.name.clone(), r.expr.clone()))
90         .collect()
91 }
92 
93 /// The optimized version of the pest AST's `Rule`.
94 #[derive(Clone, Debug, Eq, PartialEq)]
95 pub struct OptimizedRule {
96     /// The name of the rule.
97     pub name: String,
98     /// The type of the rule.
99     pub ty: RuleType,
100     /// The optimized expression of the rule.
101     pub expr: OptimizedExpr,
102 }
103 
104 /// The optimized version of the pest AST's `Expr`.
105 ///
106 /// # Warning: Semantic Versioning
107 /// There may be non-breaking changes to the meta-grammar
108 /// between minor versions. Those non-breaking changes, however,
109 /// may translate into semver-breaking changes due to the additional variants
110 /// propaged from the `Rule` enum. This is a known issue and will be fixed in the
111 /// future (e.g. by increasing MSRV and non_exhaustive annotations).
112 #[derive(Clone, Debug, Eq, PartialEq)]
113 pub enum OptimizedExpr {
114     /// Matches an exact string, e.g. `"a"`
115     Str(String),
116     /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
117     Insens(String),
118     /// Matches one character in the range, e.g. `'a'..'z'`
119     Range(String, String),
120     /// Matches the rule with the given name, e.g. `a`
121     Ident(String),
122     /// Matches a custom part of the stack, e.g. `PEEK[..]`
123     PeekSlice(i32, Option<i32>),
124     /// Positive lookahead; matches expression without making progress, e.g. `&e`
125     PosPred(Box<OptimizedExpr>),
126     /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
127     NegPred(Box<OptimizedExpr>),
128     /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
129     Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
130     /// Matches either of two expressions, e.g. `e1 | e2`
131     Choice(Box<OptimizedExpr>, Box<OptimizedExpr>),
132     /// Optionally matches an expression, e.g. `e?`
133     Opt(Box<OptimizedExpr>),
134     /// Matches an expression zero or more times, e.g. `e*`
135     Rep(Box<OptimizedExpr>),
136     /// Continues to match expressions until one of the strings in the `Vec` is found
137     Skip(Vec<String>),
138     /// Matches an expression and pushes it to the stack, e.g. `push(e)`
139     Push(Box<OptimizedExpr>),
140     /// Restores an expression's checkpoint
141     RestoreOnErr(Box<OptimizedExpr>),
142 }
143 
144 impl OptimizedExpr {
145     /// Returns a top-down iterator over the `OptimizedExpr`.
iter_top_down(&self) -> OptimizedExprTopDownIterator146     pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator {
147         OptimizedExprTopDownIterator::new(self)
148     }
149 
150     /// Applies `f` to the `OptimizedExpr` top-down.
map_top_down<F>(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr,151     pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr
152     where
153         F: FnMut(OptimizedExpr) -> OptimizedExpr,
154     {
155         fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
156         where
157             F: FnMut(OptimizedExpr) -> OptimizedExpr,
158         {
159             let expr = f(expr);
160 
161             match expr {
162                 // TODO: Use box syntax when it gets stabilized.
163                 OptimizedExpr::PosPred(expr) => {
164                     let mapped = Box::new(map_internal(*expr, f));
165                     OptimizedExpr::PosPred(mapped)
166                 }
167                 OptimizedExpr::NegPred(expr) => {
168                     let mapped = Box::new(map_internal(*expr, f));
169                     OptimizedExpr::NegPred(mapped)
170                 }
171                 OptimizedExpr::Seq(lhs, rhs) => {
172                     let mapped_lhs = Box::new(map_internal(*lhs, f));
173                     let mapped_rhs = Box::new(map_internal(*rhs, f));
174                     OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
175                 }
176                 OptimizedExpr::Choice(lhs, rhs) => {
177                     let mapped_lhs = Box::new(map_internal(*lhs, f));
178                     let mapped_rhs = Box::new(map_internal(*rhs, f));
179                     OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
180                 }
181                 OptimizedExpr::Rep(expr) => {
182                     let mapped = Box::new(map_internal(*expr, f));
183                     OptimizedExpr::Rep(mapped)
184                 }
185                 OptimizedExpr::Opt(expr) => {
186                     let mapped = Box::new(map_internal(*expr, f));
187                     OptimizedExpr::Opt(mapped)
188                 }
189                 OptimizedExpr::Push(expr) => {
190                     let mapped = Box::new(map_internal(*expr, f));
191                     OptimizedExpr::Push(mapped)
192                 }
193                 expr => expr,
194             }
195         }
196 
197         map_internal(self, &mut f)
198     }
199 
200     /// Applies `f` to the `OptimizedExpr` bottom-up.
map_bottom_up<F>(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr,201     pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr
202     where
203         F: FnMut(OptimizedExpr) -> OptimizedExpr,
204     {
205         fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
206         where
207             F: FnMut(OptimizedExpr) -> OptimizedExpr,
208         {
209             let mapped = match expr {
210                 OptimizedExpr::PosPred(expr) => {
211                     // TODO: Use box syntax when it gets stabilized.
212                     let mapped = Box::new(map_internal(*expr, f));
213                     OptimizedExpr::PosPred(mapped)
214                 }
215                 OptimizedExpr::NegPred(expr) => {
216                     let mapped = Box::new(map_internal(*expr, f));
217                     OptimizedExpr::NegPred(mapped)
218                 }
219                 OptimizedExpr::Seq(lhs, rhs) => {
220                     let mapped_lhs = Box::new(map_internal(*lhs, f));
221                     let mapped_rhs = Box::new(map_internal(*rhs, f));
222                     OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
223                 }
224                 OptimizedExpr::Choice(lhs, rhs) => {
225                     let mapped_lhs = Box::new(map_internal(*lhs, f));
226                     let mapped_rhs = Box::new(map_internal(*rhs, f));
227                     OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
228                 }
229                 OptimizedExpr::Rep(expr) => {
230                     let mapped = Box::new(map_internal(*expr, f));
231                     OptimizedExpr::Rep(mapped)
232                 }
233                 OptimizedExpr::Opt(expr) => {
234                     let mapped = Box::new(map_internal(*expr, f));
235                     OptimizedExpr::Opt(mapped)
236                 }
237                 OptimizedExpr::Push(expr) => {
238                     let mapped = Box::new(map_internal(*expr, f));
239                     OptimizedExpr::Push(mapped)
240                 }
241                 expr => expr,
242             };
243 
244             f(mapped)
245         }
246 
247         map_internal(self, &mut f)
248     }
249 }
250 
251 /// A top-down iterator over an `OptimizedExpr`.
252 pub struct OptimizedExprTopDownIterator {
253     current: Option<OptimizedExpr>,
254     next: Option<OptimizedExpr>,
255     right_branches: Vec<OptimizedExpr>,
256 }
257 
258 impl OptimizedExprTopDownIterator {
259     /// Creates a new top down iterator from an `OptimizedExpr`.
new(expr: &OptimizedExpr) -> Self260     pub fn new(expr: &OptimizedExpr) -> Self {
261         let mut iter = OptimizedExprTopDownIterator {
262             current: None,
263             next: None,
264             right_branches: vec![],
265         };
266         iter.iterate_expr(expr.clone());
267         iter
268     }
269 
iterate_expr(&mut self, expr: OptimizedExpr)270     fn iterate_expr(&mut self, expr: OptimizedExpr) {
271         self.current = Some(expr.clone());
272         match expr {
273             OptimizedExpr::Seq(lhs, rhs) => {
274                 self.right_branches.push(*rhs);
275                 self.next = Some(*lhs);
276             }
277             OptimizedExpr::Choice(lhs, rhs) => {
278                 self.right_branches.push(*rhs);
279                 self.next = Some(*lhs);
280             }
281             OptimizedExpr::PosPred(expr)
282             | OptimizedExpr::NegPred(expr)
283             | OptimizedExpr::Rep(expr)
284             | OptimizedExpr::Opt(expr)
285             | OptimizedExpr::Push(expr) => {
286                 self.next = Some(*expr);
287             }
288             _ => {
289                 self.next = None;
290             }
291         }
292     }
293 }
294 
295 impl Iterator for OptimizedExprTopDownIterator {
296     type Item = OptimizedExpr;
297 
next(&mut self) -> Option<Self::Item>298     fn next(&mut self) -> Option<Self::Item> {
299         let result = self.current.take();
300 
301         if let Some(expr) = self.next.take() {
302             self.iterate_expr(expr);
303         } else if let Some(expr) = self.right_branches.pop() {
304             self.iterate_expr(expr);
305         }
306 
307         result
308     }
309 }
310 
311 #[cfg(test)]
312 mod tests {
313     use super::*;
314 
315     #[test]
rotate()316     fn rotate() {
317         let rules = {
318             use crate::ast::Expr::*;
319             vec![Rule {
320                 name: "rule".to_owned(),
321                 ty: RuleType::Normal,
322                 expr: box_tree!(Choice(
323                     Choice(
324                         Choice(Str(String::from("a")), Str(String::from("b"))),
325                         Str(String::from("c"))
326                     ),
327                     Str(String::from("d"))
328                 )),
329             }]
330         };
331         let rotated = {
332             use crate::optimizer::OptimizedExpr::*;
333             vec![OptimizedRule {
334                 name: "rule".to_owned(),
335                 ty: RuleType::Normal,
336                 expr: box_tree!(Choice(
337                     Str(String::from("a")),
338                     Choice(
339                         Str(String::from("b")),
340                         Choice(Str(String::from("c")), Str(String::from("d")))
341                     )
342                 )),
343             }]
344         };
345 
346         assert_eq!(optimize(rules), rotated);
347     }
348 
349     #[test]
skip()350     fn skip() {
351         let rules = {
352             use crate::ast::Expr::*;
353             vec![Rule {
354                 name: "rule".to_owned(),
355                 ty: RuleType::Atomic,
356                 expr: box_tree!(Rep(Seq(
357                     NegPred(Choice(Str(String::from("a")), Str(String::from("b")))),
358                     Ident(String::from("ANY"))
359                 ))),
360             }]
361         };
362         let skipped = vec![OptimizedRule {
363             name: "rule".to_owned(),
364             ty: RuleType::Atomic,
365             expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]),
366         }];
367 
368         assert_eq!(optimize(rules), skipped);
369     }
370 
371     #[test]
concat_strings()372     fn concat_strings() {
373         let rules = {
374             use crate::ast::Expr::*;
375             vec![Rule {
376                 name: "rule".to_owned(),
377                 ty: RuleType::Atomic,
378                 expr: box_tree!(Seq(
379                     Seq(Str(String::from("a")), Str(String::from("b"))),
380                     Seq(Str(String::from("c")), Str(String::from("d")))
381                 )),
382             }]
383         };
384         let concatenated = vec![OptimizedRule {
385             name: "rule".to_owned(),
386             ty: RuleType::Atomic,
387             expr: OptimizedExpr::Str(String::from("abcd")),
388         }];
389 
390         assert_eq!(optimize(rules), concatenated);
391     }
392 
393     #[test]
unroll_loop_exact()394     fn unroll_loop_exact() {
395         let rules = vec![Rule {
396             name: "rule".to_owned(),
397             ty: RuleType::Atomic,
398             expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3),
399         }];
400         let unrolled = {
401             use crate::optimizer::OptimizedExpr::*;
402             vec![OptimizedRule {
403                 name: "rule".to_owned(),
404                 ty: RuleType::Atomic,
405                 expr: box_tree!(Seq(
406                     Ident(String::from("a")),
407                     Seq(Ident(String::from("a")), Ident(String::from("a")))
408                 )),
409             }]
410         };
411 
412         assert_eq!(optimize(rules), unrolled);
413     }
414 
415     #[test]
unroll_loop_max()416     fn unroll_loop_max() {
417         let rules = vec![Rule {
418             name: "rule".to_owned(),
419             ty: RuleType::Atomic,
420             expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3),
421         }];
422         let unrolled = {
423             use crate::optimizer::OptimizedExpr::*;
424             vec![OptimizedRule {
425                 name: "rule".to_owned(),
426                 ty: RuleType::Atomic,
427                 expr: box_tree!(Seq(
428                     Opt(Str(String::from("a"))),
429                     Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a"))))
430                 )),
431             }]
432         };
433 
434         assert_eq!(optimize(rules), unrolled);
435     }
436 
437     #[test]
unroll_loop_min()438     fn unroll_loop_min() {
439         let rules = vec![Rule {
440             name: "rule".to_owned(),
441             ty: RuleType::Atomic,
442             expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2),
443         }];
444         let unrolled = {
445             use crate::optimizer::OptimizedExpr::*;
446             vec![OptimizedRule {
447                 name: "rule".to_owned(),
448                 ty: RuleType::Atomic,
449                 expr: box_tree!(Seq(
450                     Str(String::from("a")),
451                     Seq(Str(String::from("a")), Rep(Str(String::from("a"))))
452                 )),
453             }]
454         };
455 
456         assert_eq!(optimize(rules), unrolled);
457     }
458 
459     #[test]
unroll_loop_min_max()460     fn unroll_loop_min_max() {
461         let rules = vec![Rule {
462             name: "rule".to_owned(),
463             ty: RuleType::Atomic,
464             expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3),
465         }];
466         let unrolled = {
467             use crate::optimizer::OptimizedExpr::*;
468             vec![OptimizedRule {
469                 name: "rule".to_owned(),
470                 ty: RuleType::Atomic,
471                 /* TODO possible room for improvement here:
472                  * if the sequences were rolled out in the opposite
473                  * order, we could further optimize the strings
474                  * in cases like this.
475                 Str(String::from(("aa")),
476                 Opt(Str(String::from("a")))
477                 */
478                 expr: box_tree!(Seq(
479                     Str(String::from("a")),
480                     Seq(Str(String::from("a")), Opt(Str(String::from("a"))))
481                 )),
482             }]
483         };
484 
485         assert_eq!(optimize(rules), unrolled);
486     }
487 
488     #[test]
concat_insensitive_strings()489     fn concat_insensitive_strings() {
490         let rules = {
491             use crate::ast::Expr::*;
492             vec![Rule {
493                 name: "rule".to_owned(),
494                 ty: RuleType::Atomic,
495                 expr: box_tree!(Seq(
496                     Seq(Insens(String::from("a")), Insens(String::from("b"))),
497                     Seq(Insens(String::from("c")), Insens(String::from("d")))
498                 )),
499             }]
500         };
501         let concatenated = vec![OptimizedRule {
502             name: "rule".to_owned(),
503             ty: RuleType::Atomic,
504             expr: OptimizedExpr::Insens(String::from("abcd")),
505         }];
506 
507         assert_eq!(optimize(rules), concatenated);
508     }
509 
510     #[test]
long_common_sequence()511     fn long_common_sequence() {
512         let rules = {
513             use crate::ast::Expr::*;
514             vec![Rule {
515                 name: "rule".to_owned(),
516                 ty: RuleType::Silent,
517                 expr: box_tree!(Choice(
518                     Seq(
519                         Ident(String::from("a")),
520                         Seq(Ident(String::from("b")), Ident(String::from("c")))
521                     ),
522                     Seq(
523                         Seq(Ident(String::from("a")), Ident(String::from("b"))),
524                         Ident(String::from("d"))
525                     )
526                 )),
527             }]
528         };
529         let optimized = {
530             use crate::optimizer::OptimizedExpr::*;
531             vec![OptimizedRule {
532                 name: "rule".to_owned(),
533                 ty: RuleType::Silent,
534                 expr: box_tree!(Seq(
535                     Ident(String::from("a")),
536                     Seq(
537                         Ident(String::from("b")),
538                         Choice(Ident(String::from("c")), Ident(String::from("d")))
539                     )
540                 )),
541             }]
542         };
543 
544         assert_eq!(optimize(rules), optimized);
545     }
546 
547     #[test]
short_common_sequence()548     fn short_common_sequence() {
549         let rules = {
550             use crate::ast::Expr::*;
551             vec![Rule {
552                 name: "rule".to_owned(),
553                 ty: RuleType::Atomic,
554                 expr: box_tree!(Choice(
555                     Seq(Ident(String::from("a")), Ident(String::from("b"))),
556                     Ident(String::from("a"))
557                 )),
558             }]
559         };
560         let optimized = {
561             use crate::optimizer::OptimizedExpr::*;
562             vec![OptimizedRule {
563                 name: "rule".to_owned(),
564                 ty: RuleType::Atomic,
565                 expr: box_tree!(Seq(Ident(String::from("a")), Opt(Ident(String::from("b"))))),
566             }]
567         };
568 
569         assert_eq!(optimize(rules), optimized);
570     }
571 
572     #[test]
impossible_common_sequence()573     fn impossible_common_sequence() {
574         let rules = {
575             use crate::ast::Expr::*;
576             vec![Rule {
577                 name: "rule".to_owned(),
578                 ty: RuleType::Silent,
579                 expr: box_tree!(Choice(
580                     Ident(String::from("a")),
581                     Seq(Ident(String::from("a")), Ident(String::from("b")))
582                 )),
583             }]
584         };
585         let optimized = {
586             use crate::optimizer::OptimizedExpr::*;
587             vec![OptimizedRule {
588                 name: "rule".to_owned(),
589                 ty: RuleType::Silent,
590                 expr: box_tree!(Ident(String::from("a"))),
591             }]
592         };
593 
594         assert_eq!(optimize(rules), optimized);
595     }
596 
597     #[test]
lister()598     fn lister() {
599         let rules = {
600             use crate::ast::Expr::*;
601             vec![Rule {
602                 name: "rule".to_owned(),
603                 ty: RuleType::Silent,
604                 expr: box_tree!(Seq(
605                     Rep(Seq(Ident(String::from("a")), Ident(String::from("b")))),
606                     Ident(String::from("a"))
607                 )),
608             }]
609         };
610         let optimized = {
611             use crate::optimizer::OptimizedExpr::*;
612             vec![OptimizedRule {
613                 name: "rule".to_owned(),
614                 ty: RuleType::Silent,
615                 expr: box_tree!(Seq(
616                     Ident(String::from("a")),
617                     Rep(Seq(Ident(String::from("b")), Ident(String::from("a"))))
618                 )),
619             }]
620         };
621 
622         assert_eq!(optimize(rules), optimized);
623     }
624 }
625