• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2 This module provides a regular expression printer for `Ast`.
3 */
4 
5 use std::fmt;
6 
7 use crate::ast::visitor::{self, Visitor};
8 use crate::ast::{self, Ast};
9 
10 /// A builder for constructing a printer.
11 ///
12 /// Note that since a printer doesn't have any configuration knobs, this type
13 /// remains unexported.
14 #[derive(Clone, Debug)]
15 struct PrinterBuilder {
16     _priv: (),
17 }
18 
19 impl Default for PrinterBuilder {
default() -> PrinterBuilder20     fn default() -> PrinterBuilder {
21         PrinterBuilder::new()
22     }
23 }
24 
25 impl PrinterBuilder {
new() -> PrinterBuilder26     fn new() -> PrinterBuilder {
27         PrinterBuilder { _priv: () }
28     }
29 
build(&self) -> Printer30     fn build(&self) -> Printer {
31         Printer { _priv: () }
32     }
33 }
34 
35 /// A printer for a regular expression abstract syntax tree.
36 ///
37 /// A printer converts an abstract syntax tree (AST) to a regular expression
38 /// pattern string. This particular printer uses constant stack space and heap
39 /// space proportional to the size of the AST.
40 ///
41 /// This printer will not necessarily preserve the original formatting of the
42 /// regular expression pattern string. For example, all whitespace and comments
43 /// are ignored.
44 #[derive(Debug)]
45 pub struct Printer {
46     _priv: (),
47 }
48 
49 impl Printer {
50     /// Create a new printer.
new() -> Printer51     pub fn new() -> Printer {
52         PrinterBuilder::new().build()
53     }
54 
55     /// Print the given `Ast` to the given writer. The writer must implement
56     /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
57     /// here are a `fmt::Formatter` (which is available in `fmt::Display`
58     /// implementations) or a `&mut String`.
print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result59     pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
60         visitor::visit(ast, Writer { wtr })
61     }
62 }
63 
64 #[derive(Debug)]
65 struct Writer<W> {
66     wtr: W,
67 }
68 
69 impl<W: fmt::Write> Visitor for Writer<W> {
70     type Output = ();
71     type Err = fmt::Error;
72 
finish(self) -> fmt::Result73     fn finish(self) -> fmt::Result {
74         Ok(())
75     }
76 
visit_pre(&mut self, ast: &Ast) -> fmt::Result77     fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
78         match *ast {
79             Ast::Group(ref x) => self.fmt_group_pre(x),
80             Ast::Class(ast::Class::Bracketed(ref x)) => {
81                 self.fmt_class_bracketed_pre(x)
82             }
83             _ => Ok(()),
84         }
85     }
86 
visit_post(&mut self, ast: &Ast) -> fmt::Result87     fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
88         use crate::ast::Class;
89 
90         match *ast {
91             Ast::Empty(_) => Ok(()),
92             Ast::Flags(ref x) => self.fmt_set_flags(x),
93             Ast::Literal(ref x) => self.fmt_literal(x),
94             Ast::Dot(_) => self.wtr.write_str("."),
95             Ast::Assertion(ref x) => self.fmt_assertion(x),
96             Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
97             Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
98             Ast::Class(Class::Bracketed(ref x)) => {
99                 self.fmt_class_bracketed_post(x)
100             }
101             Ast::Repetition(ref x) => self.fmt_repetition(x),
102             Ast::Group(ref x) => self.fmt_group_post(x),
103             Ast::Alternation(_) => Ok(()),
104             Ast::Concat(_) => Ok(()),
105         }
106     }
107 
visit_alternation_in(&mut self) -> fmt::Result108     fn visit_alternation_in(&mut self) -> fmt::Result {
109         self.wtr.write_str("|")
110     }
111 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<(), Self::Err>112     fn visit_class_set_item_pre(
113         &mut self,
114         ast: &ast::ClassSetItem,
115     ) -> Result<(), Self::Err> {
116         match *ast {
117             ast::ClassSetItem::Bracketed(ref x) => {
118                 self.fmt_class_bracketed_pre(x)
119             }
120             _ => Ok(()),
121         }
122     }
123 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<(), Self::Err>124     fn visit_class_set_item_post(
125         &mut self,
126         ast: &ast::ClassSetItem,
127     ) -> Result<(), Self::Err> {
128         use crate::ast::ClassSetItem::*;
129 
130         match *ast {
131             Empty(_) => Ok(()),
132             Literal(ref x) => self.fmt_literal(x),
133             Range(ref x) => {
134                 self.fmt_literal(&x.start)?;
135                 self.wtr.write_str("-")?;
136                 self.fmt_literal(&x.end)?;
137                 Ok(())
138             }
139             Ascii(ref x) => self.fmt_class_ascii(x),
140             Unicode(ref x) => self.fmt_class_unicode(x),
141             Perl(ref x) => self.fmt_class_perl(x),
142             Bracketed(ref x) => self.fmt_class_bracketed_post(x),
143             Union(_) => Ok(()),
144         }
145     }
146 
visit_class_set_binary_op_in( &mut self, ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err>147     fn visit_class_set_binary_op_in(
148         &mut self,
149         ast: &ast::ClassSetBinaryOp,
150     ) -> Result<(), Self::Err> {
151         self.fmt_class_set_binary_op_kind(&ast.kind)
152     }
153 }
154 
155 impl<W: fmt::Write> Writer<W> {
fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result156     fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
157         use crate::ast::GroupKind::*;
158         match ast.kind {
159             CaptureIndex(_) => self.wtr.write_str("("),
160             CaptureName(ref x) => {
161                 self.wtr.write_str("(?P<")?;
162                 self.wtr.write_str(&x.name)?;
163                 self.wtr.write_str(">")?;
164                 Ok(())
165             }
166             NonCapturing(ref flags) => {
167                 self.wtr.write_str("(?")?;
168                 self.fmt_flags(flags)?;
169                 self.wtr.write_str(":")?;
170                 Ok(())
171             }
172         }
173     }
174 
fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result175     fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
176         self.wtr.write_str(")")
177     }
178 
fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result179     fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
180         use crate::ast::RepetitionKind::*;
181         match ast.op.kind {
182             ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
183             ZeroOrOne => self.wtr.write_str("??"),
184             ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
185             ZeroOrMore => self.wtr.write_str("*?"),
186             OneOrMore if ast.greedy => self.wtr.write_str("+"),
187             OneOrMore => self.wtr.write_str("+?"),
188             Range(ref x) => {
189                 self.fmt_repetition_range(x)?;
190                 if !ast.greedy {
191                     self.wtr.write_str("?")?;
192                 }
193                 Ok(())
194             }
195         }
196     }
197 
fmt_repetition_range( &mut self, ast: &ast::RepetitionRange, ) -> fmt::Result198     fn fmt_repetition_range(
199         &mut self,
200         ast: &ast::RepetitionRange,
201     ) -> fmt::Result {
202         use crate::ast::RepetitionRange::*;
203         match *ast {
204             Exactly(x) => write!(self.wtr, "{{{}}}", x),
205             AtLeast(x) => write!(self.wtr, "{{{},}}", x),
206             Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
207         }
208     }
209 
fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result210     fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
211         use crate::ast::LiteralKind::*;
212 
213         match ast.kind {
214             Verbatim => self.wtr.write_char(ast.c),
215             Punctuation => write!(self.wtr, r"\{}", ast.c),
216             Octal => write!(self.wtr, r"\{:o}", ast.c as u32),
217             HexFixed(ast::HexLiteralKind::X) => {
218                 write!(self.wtr, r"\x{:02X}", ast.c as u32)
219             }
220             HexFixed(ast::HexLiteralKind::UnicodeShort) => {
221                 write!(self.wtr, r"\u{:04X}", ast.c as u32)
222             }
223             HexFixed(ast::HexLiteralKind::UnicodeLong) => {
224                 write!(self.wtr, r"\U{:08X}", ast.c as u32)
225             }
226             HexBrace(ast::HexLiteralKind::X) => {
227                 write!(self.wtr, r"\x{{{:X}}}", ast.c as u32)
228             }
229             HexBrace(ast::HexLiteralKind::UnicodeShort) => {
230                 write!(self.wtr, r"\u{{{:X}}}", ast.c as u32)
231             }
232             HexBrace(ast::HexLiteralKind::UnicodeLong) => {
233                 write!(self.wtr, r"\U{{{:X}}}", ast.c as u32)
234             }
235             Special(ast::SpecialLiteralKind::Bell) => {
236                 self.wtr.write_str(r"\a")
237             }
238             Special(ast::SpecialLiteralKind::FormFeed) => {
239                 self.wtr.write_str(r"\f")
240             }
241             Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"),
242             Special(ast::SpecialLiteralKind::LineFeed) => {
243                 self.wtr.write_str(r"\n")
244             }
245             Special(ast::SpecialLiteralKind::CarriageReturn) => {
246                 self.wtr.write_str(r"\r")
247             }
248             Special(ast::SpecialLiteralKind::VerticalTab) => {
249                 self.wtr.write_str(r"\v")
250             }
251             Special(ast::SpecialLiteralKind::Space) => {
252                 self.wtr.write_str(r"\ ")
253             }
254         }
255     }
256 
fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result257     fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
258         use crate::ast::AssertionKind::*;
259         match ast.kind {
260             StartLine => self.wtr.write_str("^"),
261             EndLine => self.wtr.write_str("$"),
262             StartText => self.wtr.write_str(r"\A"),
263             EndText => self.wtr.write_str(r"\z"),
264             WordBoundary => self.wtr.write_str(r"\b"),
265             NotWordBoundary => self.wtr.write_str(r"\B"),
266         }
267     }
268 
fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result269     fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
270         self.wtr.write_str("(?")?;
271         self.fmt_flags(&ast.flags)?;
272         self.wtr.write_str(")")?;
273         Ok(())
274     }
275 
fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result276     fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
277         use crate::ast::{Flag, FlagsItemKind};
278 
279         for item in &ast.items {
280             match item.kind {
281                 FlagsItemKind::Negation => self.wtr.write_str("-"),
282                 FlagsItemKind::Flag(ref flag) => match *flag {
283                     Flag::CaseInsensitive => self.wtr.write_str("i"),
284                     Flag::MultiLine => self.wtr.write_str("m"),
285                     Flag::DotMatchesNewLine => self.wtr.write_str("s"),
286                     Flag::SwapGreed => self.wtr.write_str("U"),
287                     Flag::Unicode => self.wtr.write_str("u"),
288                     Flag::IgnoreWhitespace => self.wtr.write_str("x"),
289                 },
290             }?;
291         }
292         Ok(())
293     }
294 
fmt_class_bracketed_pre( &mut self, ast: &ast::ClassBracketed, ) -> fmt::Result295     fn fmt_class_bracketed_pre(
296         &mut self,
297         ast: &ast::ClassBracketed,
298     ) -> fmt::Result {
299         if ast.negated {
300             self.wtr.write_str("[^")
301         } else {
302             self.wtr.write_str("[")
303         }
304     }
305 
fmt_class_bracketed_post( &mut self, _ast: &ast::ClassBracketed, ) -> fmt::Result306     fn fmt_class_bracketed_post(
307         &mut self,
308         _ast: &ast::ClassBracketed,
309     ) -> fmt::Result {
310         self.wtr.write_str("]")
311     }
312 
fmt_class_set_binary_op_kind( &mut self, ast: &ast::ClassSetBinaryOpKind, ) -> fmt::Result313     fn fmt_class_set_binary_op_kind(
314         &mut self,
315         ast: &ast::ClassSetBinaryOpKind,
316     ) -> fmt::Result {
317         use crate::ast::ClassSetBinaryOpKind::*;
318         match *ast {
319             Intersection => self.wtr.write_str("&&"),
320             Difference => self.wtr.write_str("--"),
321             SymmetricDifference => self.wtr.write_str("~~"),
322         }
323     }
324 
fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result325     fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
326         use crate::ast::ClassPerlKind::*;
327         match ast.kind {
328             Digit if ast.negated => self.wtr.write_str(r"\D"),
329             Digit => self.wtr.write_str(r"\d"),
330             Space if ast.negated => self.wtr.write_str(r"\S"),
331             Space => self.wtr.write_str(r"\s"),
332             Word if ast.negated => self.wtr.write_str(r"\W"),
333             Word => self.wtr.write_str(r"\w"),
334         }
335     }
336 
fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result337     fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
338         use crate::ast::ClassAsciiKind::*;
339         match ast.kind {
340             Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
341             Alnum => self.wtr.write_str("[:alnum:]"),
342             Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
343             Alpha => self.wtr.write_str("[:alpha:]"),
344             Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
345             Ascii => self.wtr.write_str("[:ascii:]"),
346             Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
347             Blank => self.wtr.write_str("[:blank:]"),
348             Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
349             Cntrl => self.wtr.write_str("[:cntrl:]"),
350             Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
351             Digit => self.wtr.write_str("[:digit:]"),
352             Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
353             Graph => self.wtr.write_str("[:graph:]"),
354             Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
355             Lower => self.wtr.write_str("[:lower:]"),
356             Print if ast.negated => self.wtr.write_str("[:^print:]"),
357             Print => self.wtr.write_str("[:print:]"),
358             Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
359             Punct => self.wtr.write_str("[:punct:]"),
360             Space if ast.negated => self.wtr.write_str("[:^space:]"),
361             Space => self.wtr.write_str("[:space:]"),
362             Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
363             Upper => self.wtr.write_str("[:upper:]"),
364             Word if ast.negated => self.wtr.write_str("[:^word:]"),
365             Word => self.wtr.write_str("[:word:]"),
366             Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
367             Xdigit => self.wtr.write_str("[:xdigit:]"),
368         }
369     }
370 
fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result371     fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
372         use crate::ast::ClassUnicodeKind::*;
373         use crate::ast::ClassUnicodeOpKind::*;
374 
375         if ast.negated {
376             self.wtr.write_str(r"\P")?;
377         } else {
378             self.wtr.write_str(r"\p")?;
379         }
380         match ast.kind {
381             OneLetter(c) => self.wtr.write_char(c),
382             Named(ref x) => write!(self.wtr, "{{{}}}", x),
383             NamedValue { op: Equal, ref name, ref value } => {
384                 write!(self.wtr, "{{{}={}}}", name, value)
385             }
386             NamedValue { op: Colon, ref name, ref value } => {
387                 write!(self.wtr, "{{{}:{}}}", name, value)
388             }
389             NamedValue { op: NotEqual, ref name, ref value } => {
390                 write!(self.wtr, "{{{}!={}}}", name, value)
391             }
392         }
393     }
394 }
395 
396 #[cfg(test)]
397 mod tests {
398     use super::Printer;
399     use crate::ast::parse::ParserBuilder;
400 
roundtrip(given: &str)401     fn roundtrip(given: &str) {
402         roundtrip_with(|b| b, given);
403     }
404 
roundtrip_with<F>(mut f: F, given: &str) where F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,405     fn roundtrip_with<F>(mut f: F, given: &str)
406     where
407         F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
408     {
409         let mut builder = ParserBuilder::new();
410         f(&mut builder);
411         let ast = builder.build().parse(given).unwrap();
412 
413         let mut printer = Printer::new();
414         let mut dst = String::new();
415         printer.print(&ast, &mut dst).unwrap();
416         assert_eq!(given, dst);
417     }
418 
419     #[test]
print_literal()420     fn print_literal() {
421         roundtrip("a");
422         roundtrip(r"\[");
423         roundtrip_with(|b| b.octal(true), r"\141");
424         roundtrip(r"\x61");
425         roundtrip(r"\x7F");
426         roundtrip(r"\u0061");
427         roundtrip(r"\U00000061");
428         roundtrip(r"\x{61}");
429         roundtrip(r"\x{7F}");
430         roundtrip(r"\u{61}");
431         roundtrip(r"\U{61}");
432 
433         roundtrip(r"\a");
434         roundtrip(r"\f");
435         roundtrip(r"\t");
436         roundtrip(r"\n");
437         roundtrip(r"\r");
438         roundtrip(r"\v");
439         roundtrip(r"(?x)\ ");
440     }
441 
442     #[test]
print_dot()443     fn print_dot() {
444         roundtrip(".");
445     }
446 
447     #[test]
print_concat()448     fn print_concat() {
449         roundtrip("ab");
450         roundtrip("abcde");
451         roundtrip("a(bcd)ef");
452     }
453 
454     #[test]
print_alternation()455     fn print_alternation() {
456         roundtrip("a|b");
457         roundtrip("a|b|c|d|e");
458         roundtrip("|a|b|c|d|e");
459         roundtrip("|a|b|c|d|e|");
460         roundtrip("a(b|c|d)|e|f");
461     }
462 
463     #[test]
print_assertion()464     fn print_assertion() {
465         roundtrip(r"^");
466         roundtrip(r"$");
467         roundtrip(r"\A");
468         roundtrip(r"\z");
469         roundtrip(r"\b");
470         roundtrip(r"\B");
471     }
472 
473     #[test]
print_repetition()474     fn print_repetition() {
475         roundtrip("a?");
476         roundtrip("a??");
477         roundtrip("a*");
478         roundtrip("a*?");
479         roundtrip("a+");
480         roundtrip("a+?");
481         roundtrip("a{5}");
482         roundtrip("a{5}?");
483         roundtrip("a{5,}");
484         roundtrip("a{5,}?");
485         roundtrip("a{5,10}");
486         roundtrip("a{5,10}?");
487     }
488 
489     #[test]
print_flags()490     fn print_flags() {
491         roundtrip("(?i)");
492         roundtrip("(?-i)");
493         roundtrip("(?s-i)");
494         roundtrip("(?-si)");
495         roundtrip("(?siUmux)");
496     }
497 
498     #[test]
print_group()499     fn print_group() {
500         roundtrip("(?i:a)");
501         roundtrip("(?P<foo>a)");
502         roundtrip("(a)");
503     }
504 
505     #[test]
print_class()506     fn print_class() {
507         roundtrip(r"[abc]");
508         roundtrip(r"[a-z]");
509         roundtrip(r"[^a-z]");
510         roundtrip(r"[a-z0-9]");
511         roundtrip(r"[-a-z0-9]");
512         roundtrip(r"[-a-z0-9]");
513         roundtrip(r"[a-z0-9---]");
514         roundtrip(r"[a-z&&m-n]");
515         roundtrip(r"[[a-z&&m-n]]");
516         roundtrip(r"[a-z--m-n]");
517         roundtrip(r"[a-z~~m-n]");
518         roundtrip(r"[a-z[0-9]]");
519         roundtrip(r"[a-z[^0-9]]");
520 
521         roundtrip(r"\d");
522         roundtrip(r"\D");
523         roundtrip(r"\s");
524         roundtrip(r"\S");
525         roundtrip(r"\w");
526         roundtrip(r"\W");
527 
528         roundtrip(r"[[:alnum:]]");
529         roundtrip(r"[[:^alnum:]]");
530         roundtrip(r"[[:alpha:]]");
531         roundtrip(r"[[:^alpha:]]");
532         roundtrip(r"[[:ascii:]]");
533         roundtrip(r"[[:^ascii:]]");
534         roundtrip(r"[[:blank:]]");
535         roundtrip(r"[[:^blank:]]");
536         roundtrip(r"[[:cntrl:]]");
537         roundtrip(r"[[:^cntrl:]]");
538         roundtrip(r"[[:digit:]]");
539         roundtrip(r"[[:^digit:]]");
540         roundtrip(r"[[:graph:]]");
541         roundtrip(r"[[:^graph:]]");
542         roundtrip(r"[[:lower:]]");
543         roundtrip(r"[[:^lower:]]");
544         roundtrip(r"[[:print:]]");
545         roundtrip(r"[[:^print:]]");
546         roundtrip(r"[[:punct:]]");
547         roundtrip(r"[[:^punct:]]");
548         roundtrip(r"[[:space:]]");
549         roundtrip(r"[[:^space:]]");
550         roundtrip(r"[[:upper:]]");
551         roundtrip(r"[[:^upper:]]");
552         roundtrip(r"[[:word:]]");
553         roundtrip(r"[[:^word:]]");
554         roundtrip(r"[[:xdigit:]]");
555         roundtrip(r"[[:^xdigit:]]");
556 
557         roundtrip(r"\pL");
558         roundtrip(r"\PL");
559         roundtrip(r"\p{L}");
560         roundtrip(r"\P{L}");
561         roundtrip(r"\p{X=Y}");
562         roundtrip(r"\P{X=Y}");
563         roundtrip(r"\p{X:Y}");
564         roundtrip(r"\P{X:Y}");
565         roundtrip(r"\p{X!=Y}");
566         roundtrip(r"\P{X!=Y}");
567     }
568 }
569