1 /*! 2 This module provides a regular expression printer for `Ast`. 3 */ 4 5 use std::fmt; 6 7 use crate::ast::visitor::{self, Visitor}; 8 use crate::ast::{self, Ast}; 9 10 /// A builder for constructing a printer. 11 /// 12 /// Note that since a printer doesn't have any configuration knobs, this type 13 /// remains unexported. 14 #[derive(Clone, Debug)] 15 struct PrinterBuilder { 16 _priv: (), 17 } 18 19 impl Default for PrinterBuilder { default() -> PrinterBuilder20 fn default() -> PrinterBuilder { 21 PrinterBuilder::new() 22 } 23 } 24 25 impl PrinterBuilder { new() -> PrinterBuilder26 fn new() -> PrinterBuilder { 27 PrinterBuilder { _priv: () } 28 } 29 build(&self) -> Printer30 fn build(&self) -> Printer { 31 Printer { _priv: () } 32 } 33 } 34 35 /// A printer for a regular expression abstract syntax tree. 36 /// 37 /// A printer converts an abstract syntax tree (AST) to a regular expression 38 /// pattern string. This particular printer uses constant stack space and heap 39 /// space proportional to the size of the AST. 40 /// 41 /// This printer will not necessarily preserve the original formatting of the 42 /// regular expression pattern string. For example, all whitespace and comments 43 /// are ignored. 44 #[derive(Debug)] 45 pub struct Printer { 46 _priv: (), 47 } 48 49 impl Printer { 50 /// Create a new printer. new() -> Printer51 pub fn new() -> Printer { 52 PrinterBuilder::new().build() 53 } 54 55 /// Print the given `Ast` to the given writer. The writer must implement 56 /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used 57 /// here are a `fmt::Formatter` (which is available in `fmt::Display` 58 /// implementations) or a `&mut String`. print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result59 pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result { 60 visitor::visit(ast, Writer { wtr }) 61 } 62 } 63 64 #[derive(Debug)] 65 struct Writer<W> { 66 wtr: W, 67 } 68 69 impl<W: fmt::Write> Visitor for Writer<W> { 70 type Output = (); 71 type Err = fmt::Error; 72 finish(self) -> fmt::Result73 fn finish(self) -> fmt::Result { 74 Ok(()) 75 } 76 visit_pre(&mut self, ast: &Ast) -> fmt::Result77 fn visit_pre(&mut self, ast: &Ast) -> fmt::Result { 78 match *ast { 79 Ast::Group(ref x) => self.fmt_group_pre(x), 80 Ast::Class(ast::Class::Bracketed(ref x)) => { 81 self.fmt_class_bracketed_pre(x) 82 } 83 _ => Ok(()), 84 } 85 } 86 visit_post(&mut self, ast: &Ast) -> fmt::Result87 fn visit_post(&mut self, ast: &Ast) -> fmt::Result { 88 use crate::ast::Class; 89 90 match *ast { 91 Ast::Empty(_) => Ok(()), 92 Ast::Flags(ref x) => self.fmt_set_flags(x), 93 Ast::Literal(ref x) => self.fmt_literal(x), 94 Ast::Dot(_) => self.wtr.write_str("."), 95 Ast::Assertion(ref x) => self.fmt_assertion(x), 96 Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x), 97 Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x), 98 Ast::Class(Class::Bracketed(ref x)) => { 99 self.fmt_class_bracketed_post(x) 100 } 101 Ast::Repetition(ref x) => self.fmt_repetition(x), 102 Ast::Group(ref x) => self.fmt_group_post(x), 103 Ast::Alternation(_) => Ok(()), 104 Ast::Concat(_) => Ok(()), 105 } 106 } 107 visit_alternation_in(&mut self) -> fmt::Result108 fn visit_alternation_in(&mut self) -> fmt::Result { 109 self.wtr.write_str("|") 110 } 111 visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<(), Self::Err>112 fn visit_class_set_item_pre( 113 &mut self, 114 ast: &ast::ClassSetItem, 115 ) -> Result<(), Self::Err> { 116 match *ast { 117 ast::ClassSetItem::Bracketed(ref x) => { 118 self.fmt_class_bracketed_pre(x) 119 } 120 _ => Ok(()), 121 } 122 } 123 visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<(), Self::Err>124 fn visit_class_set_item_post( 125 &mut self, 126 ast: &ast::ClassSetItem, 127 ) -> Result<(), Self::Err> { 128 use crate::ast::ClassSetItem::*; 129 130 match *ast { 131 Empty(_) => Ok(()), 132 Literal(ref x) => self.fmt_literal(x), 133 Range(ref x) => { 134 self.fmt_literal(&x.start)?; 135 self.wtr.write_str("-")?; 136 self.fmt_literal(&x.end)?; 137 Ok(()) 138 } 139 Ascii(ref x) => self.fmt_class_ascii(x), 140 Unicode(ref x) => self.fmt_class_unicode(x), 141 Perl(ref x) => self.fmt_class_perl(x), 142 Bracketed(ref x) => self.fmt_class_bracketed_post(x), 143 Union(_) => Ok(()), 144 } 145 } 146 visit_class_set_binary_op_in( &mut self, ast: &ast::ClassSetBinaryOp, ) -> Result<(), Self::Err>147 fn visit_class_set_binary_op_in( 148 &mut self, 149 ast: &ast::ClassSetBinaryOp, 150 ) -> Result<(), Self::Err> { 151 self.fmt_class_set_binary_op_kind(&ast.kind) 152 } 153 } 154 155 impl<W: fmt::Write> Writer<W> { fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result156 fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result { 157 use crate::ast::GroupKind::*; 158 match ast.kind { 159 CaptureIndex(_) => self.wtr.write_str("("), 160 CaptureName(ref x) => { 161 self.wtr.write_str("(?P<")?; 162 self.wtr.write_str(&x.name)?; 163 self.wtr.write_str(">")?; 164 Ok(()) 165 } 166 NonCapturing(ref flags) => { 167 self.wtr.write_str("(?")?; 168 self.fmt_flags(flags)?; 169 self.wtr.write_str(":")?; 170 Ok(()) 171 } 172 } 173 } 174 fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result175 fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result { 176 self.wtr.write_str(")") 177 } 178 fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result179 fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result { 180 use crate::ast::RepetitionKind::*; 181 match ast.op.kind { 182 ZeroOrOne if ast.greedy => self.wtr.write_str("?"), 183 ZeroOrOne => self.wtr.write_str("??"), 184 ZeroOrMore if ast.greedy => self.wtr.write_str("*"), 185 ZeroOrMore => self.wtr.write_str("*?"), 186 OneOrMore if ast.greedy => self.wtr.write_str("+"), 187 OneOrMore => self.wtr.write_str("+?"), 188 Range(ref x) => { 189 self.fmt_repetition_range(x)?; 190 if !ast.greedy { 191 self.wtr.write_str("?")?; 192 } 193 Ok(()) 194 } 195 } 196 } 197 fmt_repetition_range( &mut self, ast: &ast::RepetitionRange, ) -> fmt::Result198 fn fmt_repetition_range( 199 &mut self, 200 ast: &ast::RepetitionRange, 201 ) -> fmt::Result { 202 use crate::ast::RepetitionRange::*; 203 match *ast { 204 Exactly(x) => write!(self.wtr, "{{{}}}", x), 205 AtLeast(x) => write!(self.wtr, "{{{},}}", x), 206 Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y), 207 } 208 } 209 fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result210 fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result { 211 use crate::ast::LiteralKind::*; 212 213 match ast.kind { 214 Verbatim => self.wtr.write_char(ast.c), 215 Punctuation => write!(self.wtr, r"\{}", ast.c), 216 Octal => write!(self.wtr, r"\{:o}", ast.c as u32), 217 HexFixed(ast::HexLiteralKind::X) => { 218 write!(self.wtr, r"\x{:02X}", ast.c as u32) 219 } 220 HexFixed(ast::HexLiteralKind::UnicodeShort) => { 221 write!(self.wtr, r"\u{:04X}", ast.c as u32) 222 } 223 HexFixed(ast::HexLiteralKind::UnicodeLong) => { 224 write!(self.wtr, r"\U{:08X}", ast.c as u32) 225 } 226 HexBrace(ast::HexLiteralKind::X) => { 227 write!(self.wtr, r"\x{{{:X}}}", ast.c as u32) 228 } 229 HexBrace(ast::HexLiteralKind::UnicodeShort) => { 230 write!(self.wtr, r"\u{{{:X}}}", ast.c as u32) 231 } 232 HexBrace(ast::HexLiteralKind::UnicodeLong) => { 233 write!(self.wtr, r"\U{{{:X}}}", ast.c as u32) 234 } 235 Special(ast::SpecialLiteralKind::Bell) => { 236 self.wtr.write_str(r"\a") 237 } 238 Special(ast::SpecialLiteralKind::FormFeed) => { 239 self.wtr.write_str(r"\f") 240 } 241 Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"), 242 Special(ast::SpecialLiteralKind::LineFeed) => { 243 self.wtr.write_str(r"\n") 244 } 245 Special(ast::SpecialLiteralKind::CarriageReturn) => { 246 self.wtr.write_str(r"\r") 247 } 248 Special(ast::SpecialLiteralKind::VerticalTab) => { 249 self.wtr.write_str(r"\v") 250 } 251 Special(ast::SpecialLiteralKind::Space) => { 252 self.wtr.write_str(r"\ ") 253 } 254 } 255 } 256 fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result257 fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result { 258 use crate::ast::AssertionKind::*; 259 match ast.kind { 260 StartLine => self.wtr.write_str("^"), 261 EndLine => self.wtr.write_str("$"), 262 StartText => self.wtr.write_str(r"\A"), 263 EndText => self.wtr.write_str(r"\z"), 264 WordBoundary => self.wtr.write_str(r"\b"), 265 NotWordBoundary => self.wtr.write_str(r"\B"), 266 } 267 } 268 fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result269 fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result { 270 self.wtr.write_str("(?")?; 271 self.fmt_flags(&ast.flags)?; 272 self.wtr.write_str(")")?; 273 Ok(()) 274 } 275 fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result276 fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result { 277 use crate::ast::{Flag, FlagsItemKind}; 278 279 for item in &ast.items { 280 match item.kind { 281 FlagsItemKind::Negation => self.wtr.write_str("-"), 282 FlagsItemKind::Flag(ref flag) => match *flag { 283 Flag::CaseInsensitive => self.wtr.write_str("i"), 284 Flag::MultiLine => self.wtr.write_str("m"), 285 Flag::DotMatchesNewLine => self.wtr.write_str("s"), 286 Flag::SwapGreed => self.wtr.write_str("U"), 287 Flag::Unicode => self.wtr.write_str("u"), 288 Flag::IgnoreWhitespace => self.wtr.write_str("x"), 289 }, 290 }?; 291 } 292 Ok(()) 293 } 294 fmt_class_bracketed_pre( &mut self, ast: &ast::ClassBracketed, ) -> fmt::Result295 fn fmt_class_bracketed_pre( 296 &mut self, 297 ast: &ast::ClassBracketed, 298 ) -> fmt::Result { 299 if ast.negated { 300 self.wtr.write_str("[^") 301 } else { 302 self.wtr.write_str("[") 303 } 304 } 305 fmt_class_bracketed_post( &mut self, _ast: &ast::ClassBracketed, ) -> fmt::Result306 fn fmt_class_bracketed_post( 307 &mut self, 308 _ast: &ast::ClassBracketed, 309 ) -> fmt::Result { 310 self.wtr.write_str("]") 311 } 312 fmt_class_set_binary_op_kind( &mut self, ast: &ast::ClassSetBinaryOpKind, ) -> fmt::Result313 fn fmt_class_set_binary_op_kind( 314 &mut self, 315 ast: &ast::ClassSetBinaryOpKind, 316 ) -> fmt::Result { 317 use crate::ast::ClassSetBinaryOpKind::*; 318 match *ast { 319 Intersection => self.wtr.write_str("&&"), 320 Difference => self.wtr.write_str("--"), 321 SymmetricDifference => self.wtr.write_str("~~"), 322 } 323 } 324 fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result325 fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result { 326 use crate::ast::ClassPerlKind::*; 327 match ast.kind { 328 Digit if ast.negated => self.wtr.write_str(r"\D"), 329 Digit => self.wtr.write_str(r"\d"), 330 Space if ast.negated => self.wtr.write_str(r"\S"), 331 Space => self.wtr.write_str(r"\s"), 332 Word if ast.negated => self.wtr.write_str(r"\W"), 333 Word => self.wtr.write_str(r"\w"), 334 } 335 } 336 fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result337 fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result { 338 use crate::ast::ClassAsciiKind::*; 339 match ast.kind { 340 Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"), 341 Alnum => self.wtr.write_str("[:alnum:]"), 342 Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"), 343 Alpha => self.wtr.write_str("[:alpha:]"), 344 Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"), 345 Ascii => self.wtr.write_str("[:ascii:]"), 346 Blank if ast.negated => self.wtr.write_str("[:^blank:]"), 347 Blank => self.wtr.write_str("[:blank:]"), 348 Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"), 349 Cntrl => self.wtr.write_str("[:cntrl:]"), 350 Digit if ast.negated => self.wtr.write_str("[:^digit:]"), 351 Digit => self.wtr.write_str("[:digit:]"), 352 Graph if ast.negated => self.wtr.write_str("[:^graph:]"), 353 Graph => self.wtr.write_str("[:graph:]"), 354 Lower if ast.negated => self.wtr.write_str("[:^lower:]"), 355 Lower => self.wtr.write_str("[:lower:]"), 356 Print if ast.negated => self.wtr.write_str("[:^print:]"), 357 Print => self.wtr.write_str("[:print:]"), 358 Punct if ast.negated => self.wtr.write_str("[:^punct:]"), 359 Punct => self.wtr.write_str("[:punct:]"), 360 Space if ast.negated => self.wtr.write_str("[:^space:]"), 361 Space => self.wtr.write_str("[:space:]"), 362 Upper if ast.negated => self.wtr.write_str("[:^upper:]"), 363 Upper => self.wtr.write_str("[:upper:]"), 364 Word if ast.negated => self.wtr.write_str("[:^word:]"), 365 Word => self.wtr.write_str("[:word:]"), 366 Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"), 367 Xdigit => self.wtr.write_str("[:xdigit:]"), 368 } 369 } 370 fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result371 fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result { 372 use crate::ast::ClassUnicodeKind::*; 373 use crate::ast::ClassUnicodeOpKind::*; 374 375 if ast.negated { 376 self.wtr.write_str(r"\P")?; 377 } else { 378 self.wtr.write_str(r"\p")?; 379 } 380 match ast.kind { 381 OneLetter(c) => self.wtr.write_char(c), 382 Named(ref x) => write!(self.wtr, "{{{}}}", x), 383 NamedValue { op: Equal, ref name, ref value } => { 384 write!(self.wtr, "{{{}={}}}", name, value) 385 } 386 NamedValue { op: Colon, ref name, ref value } => { 387 write!(self.wtr, "{{{}:{}}}", name, value) 388 } 389 NamedValue { op: NotEqual, ref name, ref value } => { 390 write!(self.wtr, "{{{}!={}}}", name, value) 391 } 392 } 393 } 394 } 395 396 #[cfg(test)] 397 mod tests { 398 use super::Printer; 399 use crate::ast::parse::ParserBuilder; 400 roundtrip(given: &str)401 fn roundtrip(given: &str) { 402 roundtrip_with(|b| b, given); 403 } 404 roundtrip_with<F>(mut f: F, given: &str) where F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,405 fn roundtrip_with<F>(mut f: F, given: &str) 406 where 407 F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder, 408 { 409 let mut builder = ParserBuilder::new(); 410 f(&mut builder); 411 let ast = builder.build().parse(given).unwrap(); 412 413 let mut printer = Printer::new(); 414 let mut dst = String::new(); 415 printer.print(&ast, &mut dst).unwrap(); 416 assert_eq!(given, dst); 417 } 418 419 #[test] print_literal()420 fn print_literal() { 421 roundtrip("a"); 422 roundtrip(r"\["); 423 roundtrip_with(|b| b.octal(true), r"\141"); 424 roundtrip(r"\x61"); 425 roundtrip(r"\x7F"); 426 roundtrip(r"\u0061"); 427 roundtrip(r"\U00000061"); 428 roundtrip(r"\x{61}"); 429 roundtrip(r"\x{7F}"); 430 roundtrip(r"\u{61}"); 431 roundtrip(r"\U{61}"); 432 433 roundtrip(r"\a"); 434 roundtrip(r"\f"); 435 roundtrip(r"\t"); 436 roundtrip(r"\n"); 437 roundtrip(r"\r"); 438 roundtrip(r"\v"); 439 roundtrip(r"(?x)\ "); 440 } 441 442 #[test] print_dot()443 fn print_dot() { 444 roundtrip("."); 445 } 446 447 #[test] print_concat()448 fn print_concat() { 449 roundtrip("ab"); 450 roundtrip("abcde"); 451 roundtrip("a(bcd)ef"); 452 } 453 454 #[test] print_alternation()455 fn print_alternation() { 456 roundtrip("a|b"); 457 roundtrip("a|b|c|d|e"); 458 roundtrip("|a|b|c|d|e"); 459 roundtrip("|a|b|c|d|e|"); 460 roundtrip("a(b|c|d)|e|f"); 461 } 462 463 #[test] print_assertion()464 fn print_assertion() { 465 roundtrip(r"^"); 466 roundtrip(r"$"); 467 roundtrip(r"\A"); 468 roundtrip(r"\z"); 469 roundtrip(r"\b"); 470 roundtrip(r"\B"); 471 } 472 473 #[test] print_repetition()474 fn print_repetition() { 475 roundtrip("a?"); 476 roundtrip("a??"); 477 roundtrip("a*"); 478 roundtrip("a*?"); 479 roundtrip("a+"); 480 roundtrip("a+?"); 481 roundtrip("a{5}"); 482 roundtrip("a{5}?"); 483 roundtrip("a{5,}"); 484 roundtrip("a{5,}?"); 485 roundtrip("a{5,10}"); 486 roundtrip("a{5,10}?"); 487 } 488 489 #[test] print_flags()490 fn print_flags() { 491 roundtrip("(?i)"); 492 roundtrip("(?-i)"); 493 roundtrip("(?s-i)"); 494 roundtrip("(?-si)"); 495 roundtrip("(?siUmux)"); 496 } 497 498 #[test] print_group()499 fn print_group() { 500 roundtrip("(?i:a)"); 501 roundtrip("(?P<foo>a)"); 502 roundtrip("(a)"); 503 } 504 505 #[test] print_class()506 fn print_class() { 507 roundtrip(r"[abc]"); 508 roundtrip(r"[a-z]"); 509 roundtrip(r"[^a-z]"); 510 roundtrip(r"[a-z0-9]"); 511 roundtrip(r"[-a-z0-9]"); 512 roundtrip(r"[-a-z0-9]"); 513 roundtrip(r"[a-z0-9---]"); 514 roundtrip(r"[a-z&&m-n]"); 515 roundtrip(r"[[a-z&&m-n]]"); 516 roundtrip(r"[a-z--m-n]"); 517 roundtrip(r"[a-z~~m-n]"); 518 roundtrip(r"[a-z[0-9]]"); 519 roundtrip(r"[a-z[^0-9]]"); 520 521 roundtrip(r"\d"); 522 roundtrip(r"\D"); 523 roundtrip(r"\s"); 524 roundtrip(r"\S"); 525 roundtrip(r"\w"); 526 roundtrip(r"\W"); 527 528 roundtrip(r"[[:alnum:]]"); 529 roundtrip(r"[[:^alnum:]]"); 530 roundtrip(r"[[:alpha:]]"); 531 roundtrip(r"[[:^alpha:]]"); 532 roundtrip(r"[[:ascii:]]"); 533 roundtrip(r"[[:^ascii:]]"); 534 roundtrip(r"[[:blank:]]"); 535 roundtrip(r"[[:^blank:]]"); 536 roundtrip(r"[[:cntrl:]]"); 537 roundtrip(r"[[:^cntrl:]]"); 538 roundtrip(r"[[:digit:]]"); 539 roundtrip(r"[[:^digit:]]"); 540 roundtrip(r"[[:graph:]]"); 541 roundtrip(r"[[:^graph:]]"); 542 roundtrip(r"[[:lower:]]"); 543 roundtrip(r"[[:^lower:]]"); 544 roundtrip(r"[[:print:]]"); 545 roundtrip(r"[[:^print:]]"); 546 roundtrip(r"[[:punct:]]"); 547 roundtrip(r"[[:^punct:]]"); 548 roundtrip(r"[[:space:]]"); 549 roundtrip(r"[[:^space:]]"); 550 roundtrip(r"[[:upper:]]"); 551 roundtrip(r"[[:^upper:]]"); 552 roundtrip(r"[[:word:]]"); 553 roundtrip(r"[[:^word:]]"); 554 roundtrip(r"[[:xdigit:]]"); 555 roundtrip(r"[[:^xdigit:]]"); 556 557 roundtrip(r"\pL"); 558 roundtrip(r"\PL"); 559 roundtrip(r"\p{L}"); 560 roundtrip(r"\P{L}"); 561 roundtrip(r"\p{X=Y}"); 562 roundtrip(r"\P{X=Y}"); 563 roundtrip(r"\p{X:Y}"); 564 roundtrip(r"\P{X:Y}"); 565 roundtrip(r"\p{X!=Y}"); 566 roundtrip(r"\P{X!=Y}"); 567 } 568 } 569