• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2 Defines a translator that converts an `Ast` to an `Hir`.
3 */
4 
5 use std::cell::{Cell, RefCell};
6 use std::result;
7 
8 use ast::{self, Ast, Span, Visitor};
9 use hir::{self, Error, ErrorKind, Hir};
10 use unicode::{self, ClassQuery};
11 
12 type Result<T> = result::Result<T, Error>;
13 
14 /// A builder for constructing an AST->HIR translator.
15 #[derive(Clone, Debug)]
16 pub struct TranslatorBuilder {
17     allow_invalid_utf8: bool,
18     flags: Flags,
19 }
20 
21 impl Default for TranslatorBuilder {
default() -> TranslatorBuilder22     fn default() -> TranslatorBuilder {
23         TranslatorBuilder::new()
24     }
25 }
26 
27 impl TranslatorBuilder {
28     /// Create a new translator builder with a default c onfiguration.
new() -> TranslatorBuilder29     pub fn new() -> TranslatorBuilder {
30         TranslatorBuilder {
31             allow_invalid_utf8: false,
32             flags: Flags::default(),
33         }
34     }
35 
36     /// Build a translator using the current configuration.
build(&self) -> Translator37     pub fn build(&self) -> Translator {
38         Translator {
39             stack: RefCell::new(vec![]),
40             flags: Cell::new(self.flags),
41             allow_invalid_utf8: self.allow_invalid_utf8,
42         }
43     }
44 
45     /// When enabled, translation will permit the construction of a regular
46     /// expression that may match invalid UTF-8.
47     ///
48     /// When disabled (the default), the translator is guaranteed to produce
49     /// an expression that will only ever match valid UTF-8 (otherwise, the
50     /// translator will return an error).
51     ///
52     /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53     /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54     /// the parser to return an error. Namely, a negated ASCII word boundary
55     /// can result in matching positions that aren't valid UTF-8 boundaries.
allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder56     pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
57         self.allow_invalid_utf8 = yes;
58         self
59     }
60 
61     /// Enable or disable the case insensitive flag (`i`) by default.
case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder62     pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
63         self.flags.case_insensitive = if yes { Some(true) } else { None };
64         self
65     }
66 
67     /// Enable or disable the multi-line matching flag (`m`) by default.
multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder68     pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
69         self.flags.multi_line = if yes { Some(true) } else { None };
70         self
71     }
72 
73     /// Enable or disable the "dot matches any character" flag (`s`) by
74     /// default.
dot_matches_new_line( &mut self, yes: bool, ) -> &mut TranslatorBuilder75     pub fn dot_matches_new_line(
76         &mut self,
77         yes: bool,
78     ) -> &mut TranslatorBuilder {
79         self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
80         self
81     }
82 
83     /// Enable or disable the "swap greed" flag (`U`) by default.
swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder84     pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
85         self.flags.swap_greed = if yes { Some(true) } else { None };
86         self
87     }
88 
89     /// Enable or disable the Unicode flag (`u`) by default.
unicode(&mut self, yes: bool) -> &mut TranslatorBuilder90     pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
91         self.flags.unicode = if yes { None } else { Some(false) };
92         self
93     }
94 }
95 
96 /// A translator maps abstract syntax to a high level intermediate
97 /// representation.
98 ///
99 /// A translator may be benefit from reuse. That is, a translator can translate
100 /// many abstract syntax trees.
101 ///
102 /// A `Translator` can be configured in more detail via a
103 /// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104 #[derive(Clone, Debug)]
105 pub struct Translator {
106     /// Our call stack, but on the heap.
107     stack: RefCell<Vec<HirFrame>>,
108     /// The current flag settings.
109     flags: Cell<Flags>,
110     /// Whether we're allowed to produce HIR that can match arbitrary bytes.
111     allow_invalid_utf8: bool,
112 }
113 
114 impl Translator {
115     /// Create a new translator using the default configuration.
new() -> Translator116     pub fn new() -> Translator {
117         TranslatorBuilder::new().build()
118     }
119 
120     /// Translate the given abstract syntax tree (AST) into a high level
121     /// intermediate representation (HIR).
122     ///
123     /// If there was a problem doing the translation, then an HIR-specific
124     /// error is returned.
125     ///
126     /// The original pattern string used to produce the `Ast` *must* also be
127     /// provided. The translator does not use the pattern string during any
128     /// correct translation, but is used for error reporting.
translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir>129     pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
130         ast::visit(ast, TranslatorI::new(self, pattern))
131     }
132 }
133 
134 /// An HirFrame is a single stack frame, represented explicitly, which is
135 /// created for each item in the Ast that we traverse.
136 ///
137 /// Note that technically, this type doesn't represent our entire stack
138 /// frame. In particular, the Ast visitor represents any state associated with
139 /// traversing the Ast itself.
140 #[derive(Clone, Debug)]
141 enum HirFrame {
142     /// An arbitrary HIR expression. These get pushed whenever we hit a base
143     /// case in the Ast. They get popped after an inductive (i.e., recursive)
144     /// step is complete.
145     Expr(Hir),
146     /// A Unicode character class. This frame is mutated as we descend into
147     /// the Ast of a character class (which is itself its own mini recursive
148     /// structure).
149     ClassUnicode(hir::ClassUnicode),
150     /// A byte-oriented character class. This frame is mutated as we descend
151     /// into the Ast of a character class (which is itself its own mini
152     /// recursive structure).
153     ///
154     /// Byte character classes are created when Unicode mode (`u`) is disabled.
155     /// If `allow_invalid_utf8` is disabled (the default), then a byte
156     /// character is only permitted to match ASCII text.
157     ClassBytes(hir::ClassBytes),
158     /// This is pushed on to the stack upon first seeing any kind of group,
159     /// indicated by parentheses (including non-capturing groups). It is popped
160     /// upon leaving a group.
161     Group {
162         /// The old active flags when this group was opened.
163         ///
164         /// If this group sets flags, then the new active flags are set to the
165         /// result of merging the old flags with the flags introduced by this
166         /// group. If the group doesn't set any flags, then this is simply
167         /// equivalent to whatever flags were set when the group was opened.
168         ///
169         /// When this group is popped, the active flags should be restored to
170         /// the flags set here.
171         ///
172         /// The "active" flags correspond to whatever flags are set in the
173         /// Translator.
174         old_flags: Flags,
175     },
176     /// This is pushed whenever a concatenation is observed. After visiting
177     /// every sub-expression in the concatenation, the translator's stack is
178     /// popped until it sees a Concat frame.
179     Concat,
180     /// This is pushed whenever an alternation is observed. After visiting
181     /// every sub-expression in the alternation, the translator's stack is
182     /// popped until it sees an Alternation frame.
183     Alternation,
184 }
185 
186 impl HirFrame {
187     /// Assert that the current stack frame is an Hir expression and return it.
unwrap_expr(self) -> Hir188     fn unwrap_expr(self) -> Hir {
189         match self {
190             HirFrame::Expr(expr) => expr,
191             _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
192         }
193     }
194 
195     /// Assert that the current stack frame is a Unicode class expression and
196     /// return it.
unwrap_class_unicode(self) -> hir::ClassUnicode197     fn unwrap_class_unicode(self) -> hir::ClassUnicode {
198         match self {
199             HirFrame::ClassUnicode(cls) => cls,
200             _ => panic!(
201                 "tried to unwrap Unicode class \
202                  from HirFrame, got: {:?}",
203                 self
204             ),
205         }
206     }
207 
208     /// Assert that the current stack frame is a byte class expression and
209     /// return it.
unwrap_class_bytes(self) -> hir::ClassBytes210     fn unwrap_class_bytes(self) -> hir::ClassBytes {
211         match self {
212             HirFrame::ClassBytes(cls) => cls,
213             _ => panic!(
214                 "tried to unwrap byte class \
215                  from HirFrame, got: {:?}",
216                 self
217             ),
218         }
219     }
220 
221     /// Assert that the current stack frame is a group indicator and return
222     /// its corresponding flags (the flags that were active at the time the
223     /// group was entered).
unwrap_group(self) -> Flags224     fn unwrap_group(self) -> Flags {
225         match self {
226             HirFrame::Group { old_flags } => old_flags,
227             _ => {
228                 panic!("tried to unwrap group from HirFrame, got: {:?}", self)
229             }
230         }
231     }
232 }
233 
234 impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
235     type Output = Hir;
236     type Err = Error;
237 
finish(self) -> Result<Hir>238     fn finish(self) -> Result<Hir> {
239         // ... otherwise, we should have exactly one HIR on the stack.
240         assert_eq!(self.trans().stack.borrow().len(), 1);
241         Ok(self.pop().unwrap().unwrap_expr())
242     }
243 
visit_pre(&mut self, ast: &Ast) -> Result<()>244     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
245         match *ast {
246             Ast::Class(ast::Class::Bracketed(_)) => {
247                 if self.flags().unicode() {
248                     let cls = hir::ClassUnicode::empty();
249                     self.push(HirFrame::ClassUnicode(cls));
250                 } else {
251                     let cls = hir::ClassBytes::empty();
252                     self.push(HirFrame::ClassBytes(cls));
253                 }
254             }
255             Ast::Group(ref x) => {
256                 let old_flags = x
257                     .flags()
258                     .map(|ast| self.set_flags(ast))
259                     .unwrap_or_else(|| self.flags());
260                 self.push(HirFrame::Group { old_flags });
261             }
262             Ast::Concat(ref x) if x.asts.is_empty() => {}
263             Ast::Concat(_) => {
264                 self.push(HirFrame::Concat);
265             }
266             Ast::Alternation(ref x) if x.asts.is_empty() => {}
267             Ast::Alternation(_) => {
268                 self.push(HirFrame::Alternation);
269             }
270             _ => {}
271         }
272         Ok(())
273     }
274 
visit_post(&mut self, ast: &Ast) -> Result<()>275     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
276         match *ast {
277             Ast::Empty(_) => {
278                 self.push(HirFrame::Expr(Hir::empty()));
279             }
280             Ast::Flags(ref x) => {
281                 self.set_flags(&x.flags);
282                 // Flags in the AST are generally considered directives and
283                 // not actual sub-expressions. However, they can be used in
284                 // the concrete syntax like `((?i))`, and we need some kind of
285                 // indication of an expression there, and Empty is the correct
286                 // choice.
287                 //
288                 // There can also be things like `(?i)+`, but we rule those out
289                 // in the parser. In the future, we might allow them for
290                 // consistency sake.
291                 self.push(HirFrame::Expr(Hir::empty()));
292             }
293             Ast::Literal(ref x) => {
294                 self.push(HirFrame::Expr(self.hir_literal(x)?));
295             }
296             Ast::Dot(span) => {
297                 self.push(HirFrame::Expr(self.hir_dot(span)?));
298             }
299             Ast::Assertion(ref x) => {
300                 self.push(HirFrame::Expr(self.hir_assertion(x)?));
301             }
302             Ast::Class(ast::Class::Perl(ref x)) => {
303                 if self.flags().unicode() {
304                     let cls = self.hir_perl_unicode_class(x)?;
305                     let hcls = hir::Class::Unicode(cls);
306                     self.push(HirFrame::Expr(Hir::class(hcls)));
307                 } else {
308                     let cls = self.hir_perl_byte_class(x);
309                     let hcls = hir::Class::Bytes(cls);
310                     self.push(HirFrame::Expr(Hir::class(hcls)));
311                 }
312             }
313             Ast::Class(ast::Class::Unicode(ref x)) => {
314                 let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
315                 self.push(HirFrame::Expr(Hir::class(cls)));
316             }
317             Ast::Class(ast::Class::Bracketed(ref ast)) => {
318                 if self.flags().unicode() {
319                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
320                     self.unicode_fold_and_negate(
321                         &ast.span,
322                         ast.negated,
323                         &mut cls,
324                     )?;
325                     if cls.ranges().is_empty() {
326                         return Err(self.error(
327                             ast.span,
328                             ErrorKind::EmptyClassNotAllowed,
329                         ));
330                     }
331                     let expr = Hir::class(hir::Class::Unicode(cls));
332                     self.push(HirFrame::Expr(expr));
333                 } else {
334                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
335                     self.bytes_fold_and_negate(
336                         &ast.span,
337                         ast.negated,
338                         &mut cls,
339                     )?;
340                     if cls.ranges().is_empty() {
341                         return Err(self.error(
342                             ast.span,
343                             ErrorKind::EmptyClassNotAllowed,
344                         ));
345                     }
346 
347                     let expr = Hir::class(hir::Class::Bytes(cls));
348                     self.push(HirFrame::Expr(expr));
349                 }
350             }
351             Ast::Repetition(ref x) => {
352                 let expr = self.pop().unwrap().unwrap_expr();
353                 self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
354             }
355             Ast::Group(ref x) => {
356                 let expr = self.pop().unwrap().unwrap_expr();
357                 let old_flags = self.pop().unwrap().unwrap_group();
358                 self.trans().flags.set(old_flags);
359                 self.push(HirFrame::Expr(self.hir_group(x, expr)));
360             }
361             Ast::Concat(_) => {
362                 let mut exprs = vec![];
363                 while let Some(HirFrame::Expr(expr)) = self.pop() {
364                     if !expr.kind().is_empty() {
365                         exprs.push(expr);
366                     }
367                 }
368                 exprs.reverse();
369                 self.push(HirFrame::Expr(Hir::concat(exprs)));
370             }
371             Ast::Alternation(_) => {
372                 let mut exprs = vec![];
373                 while let Some(HirFrame::Expr(expr)) = self.pop() {
374                     exprs.push(expr);
375                 }
376                 exprs.reverse();
377                 self.push(HirFrame::Expr(Hir::alternation(exprs)));
378             }
379         }
380         Ok(())
381     }
382 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>383     fn visit_class_set_item_pre(
384         &mut self,
385         ast: &ast::ClassSetItem,
386     ) -> Result<()> {
387         match *ast {
388             ast::ClassSetItem::Bracketed(_) => {
389                 if self.flags().unicode() {
390                     let cls = hir::ClassUnicode::empty();
391                     self.push(HirFrame::ClassUnicode(cls));
392                 } else {
393                     let cls = hir::ClassBytes::empty();
394                     self.push(HirFrame::ClassBytes(cls));
395                 }
396             }
397             // We needn't handle the Union case here since the visitor will
398             // do it for us.
399             _ => {}
400         }
401         Ok(())
402     }
403 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>404     fn visit_class_set_item_post(
405         &mut self,
406         ast: &ast::ClassSetItem,
407     ) -> Result<()> {
408         match *ast {
409             ast::ClassSetItem::Empty(_) => {}
410             ast::ClassSetItem::Literal(ref x) => {
411                 if self.flags().unicode() {
412                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
413                     cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
414                     self.push(HirFrame::ClassUnicode(cls));
415                 } else {
416                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
417                     let byte = self.class_literal_byte(x)?;
418                     cls.push(hir::ClassBytesRange::new(byte, byte));
419                     self.push(HirFrame::ClassBytes(cls));
420                 }
421             }
422             ast::ClassSetItem::Range(ref x) => {
423                 if self.flags().unicode() {
424                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
425                     cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
426                     self.push(HirFrame::ClassUnicode(cls));
427                 } else {
428                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
429                     let start = self.class_literal_byte(&x.start)?;
430                     let end = self.class_literal_byte(&x.end)?;
431                     cls.push(hir::ClassBytesRange::new(start, end));
432                     self.push(HirFrame::ClassBytes(cls));
433                 }
434             }
435             ast::ClassSetItem::Ascii(ref x) => {
436                 if self.flags().unicode() {
437                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
438                     for &(s, e) in ascii_class(&x.kind) {
439                         cls.push(hir::ClassUnicodeRange::new(s, e));
440                     }
441                     self.unicode_fold_and_negate(
442                         &x.span, x.negated, &mut cls,
443                     )?;
444                     self.push(HirFrame::ClassUnicode(cls));
445                 } else {
446                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
447                     for &(s, e) in ascii_class(&x.kind) {
448                         cls.push(hir::ClassBytesRange::new(s as u8, e as u8));
449                     }
450                     self.bytes_fold_and_negate(&x.span, x.negated, &mut cls)?;
451                     self.push(HirFrame::ClassBytes(cls));
452                 }
453             }
454             ast::ClassSetItem::Unicode(ref x) => {
455                 let xcls = self.hir_unicode_class(x)?;
456                 let mut cls = self.pop().unwrap().unwrap_class_unicode();
457                 cls.union(&xcls);
458                 self.push(HirFrame::ClassUnicode(cls));
459             }
460             ast::ClassSetItem::Perl(ref x) => {
461                 if self.flags().unicode() {
462                     let xcls = self.hir_perl_unicode_class(x)?;
463                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
464                     cls.union(&xcls);
465                     self.push(HirFrame::ClassUnicode(cls));
466                 } else {
467                     let xcls = self.hir_perl_byte_class(x);
468                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
469                     cls.union(&xcls);
470                     self.push(HirFrame::ClassBytes(cls));
471                 }
472             }
473             ast::ClassSetItem::Bracketed(ref ast) => {
474                 if self.flags().unicode() {
475                     let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
476                     self.unicode_fold_and_negate(
477                         &ast.span,
478                         ast.negated,
479                         &mut cls1,
480                     )?;
481 
482                     let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
483                     cls2.union(&cls1);
484                     self.push(HirFrame::ClassUnicode(cls2));
485                 } else {
486                     let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
487                     self.bytes_fold_and_negate(
488                         &ast.span,
489                         ast.negated,
490                         &mut cls1,
491                     )?;
492 
493                     let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
494                     cls2.union(&cls1);
495                     self.push(HirFrame::ClassBytes(cls2));
496                 }
497             }
498             // This is handled automatically by the visitor.
499             ast::ClassSetItem::Union(_) => {}
500         }
501         Ok(())
502     }
503 
visit_class_set_binary_op_pre( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>504     fn visit_class_set_binary_op_pre(
505         &mut self,
506         _op: &ast::ClassSetBinaryOp,
507     ) -> Result<()> {
508         if self.flags().unicode() {
509             let cls = hir::ClassUnicode::empty();
510             self.push(HirFrame::ClassUnicode(cls));
511         } else {
512             let cls = hir::ClassBytes::empty();
513             self.push(HirFrame::ClassBytes(cls));
514         }
515         Ok(())
516     }
517 
visit_class_set_binary_op_in( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>518     fn visit_class_set_binary_op_in(
519         &mut self,
520         _op: &ast::ClassSetBinaryOp,
521     ) -> Result<()> {
522         if self.flags().unicode() {
523             let cls = hir::ClassUnicode::empty();
524             self.push(HirFrame::ClassUnicode(cls));
525         } else {
526             let cls = hir::ClassBytes::empty();
527             self.push(HirFrame::ClassBytes(cls));
528         }
529         Ok(())
530     }
531 
visit_class_set_binary_op_post( &mut self, op: &ast::ClassSetBinaryOp, ) -> Result<()>532     fn visit_class_set_binary_op_post(
533         &mut self,
534         op: &ast::ClassSetBinaryOp,
535     ) -> Result<()> {
536         use ast::ClassSetBinaryOpKind::*;
537 
538         if self.flags().unicode() {
539             let mut rhs = self.pop().unwrap().unwrap_class_unicode();
540             let mut lhs = self.pop().unwrap().unwrap_class_unicode();
541             let mut cls = self.pop().unwrap().unwrap_class_unicode();
542             if self.flags().case_insensitive() {
543                 rhs.try_case_fold_simple().map_err(|_| {
544                     self.error(
545                         op.rhs.span().clone(),
546                         ErrorKind::UnicodeCaseUnavailable,
547                     )
548                 })?;
549                 lhs.try_case_fold_simple().map_err(|_| {
550                     self.error(
551                         op.lhs.span().clone(),
552                         ErrorKind::UnicodeCaseUnavailable,
553                     )
554                 })?;
555             }
556             match op.kind {
557                 Intersection => lhs.intersect(&rhs),
558                 Difference => lhs.difference(&rhs),
559                 SymmetricDifference => lhs.symmetric_difference(&rhs),
560             }
561             cls.union(&lhs);
562             self.push(HirFrame::ClassUnicode(cls));
563         } else {
564             let mut rhs = self.pop().unwrap().unwrap_class_bytes();
565             let mut lhs = self.pop().unwrap().unwrap_class_bytes();
566             let mut cls = self.pop().unwrap().unwrap_class_bytes();
567             if self.flags().case_insensitive() {
568                 rhs.case_fold_simple();
569                 lhs.case_fold_simple();
570             }
571             match op.kind {
572                 Intersection => lhs.intersect(&rhs),
573                 Difference => lhs.difference(&rhs),
574                 SymmetricDifference => lhs.symmetric_difference(&rhs),
575             }
576             cls.union(&lhs);
577             self.push(HirFrame::ClassBytes(cls));
578         }
579         Ok(())
580     }
581 }
582 
583 /// The internal implementation of a translator.
584 ///
585 /// This type is responsible for carrying around the original pattern string,
586 /// which is not tied to the internal state of a translator.
587 ///
588 /// A TranslatorI exists for the time it takes to translate a single Ast.
589 #[derive(Clone, Debug)]
590 struct TranslatorI<'t, 'p> {
591     trans: &'t Translator,
592     pattern: &'p str,
593 }
594 
595 impl<'t, 'p> TranslatorI<'t, 'p> {
596     /// Build a new internal translator.
new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p>597     fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
598         TranslatorI { trans: trans, pattern: pattern }
599     }
600 
601     /// Return a reference to the underlying translator.
trans(&self) -> &Translator602     fn trans(&self) -> &Translator {
603         &self.trans
604     }
605 
606     /// Push the given frame on to the call stack.
push(&self, frame: HirFrame)607     fn push(&self, frame: HirFrame) {
608         self.trans().stack.borrow_mut().push(frame);
609     }
610 
611     /// Pop the top of the call stack. If the call stack is empty, return None.
pop(&self) -> Option<HirFrame>612     fn pop(&self) -> Option<HirFrame> {
613         self.trans().stack.borrow_mut().pop()
614     }
615 
616     /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ErrorKind) -> Error617     fn error(&self, span: Span, kind: ErrorKind) -> Error {
618         Error { kind: kind, pattern: self.pattern.to_string(), span: span }
619     }
620 
621     /// Return a copy of the active flags.
flags(&self) -> Flags622     fn flags(&self) -> Flags {
623         self.trans().flags.get()
624     }
625 
626     /// Set the flags of this translator from the flags set in the given AST.
627     /// Then, return the old flags.
set_flags(&self, ast_flags: &ast::Flags) -> Flags628     fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
629         let old_flags = self.flags();
630         let mut new_flags = Flags::from_ast(ast_flags);
631         new_flags.merge(&old_flags);
632         self.trans().flags.set(new_flags);
633         old_flags
634     }
635 
hir_literal(&self, lit: &ast::Literal) -> Result<Hir>636     fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
637         let ch = match self.literal_to_char(lit)? {
638             byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
639             hir::Literal::Unicode(ch) => ch,
640         };
641         if self.flags().case_insensitive() {
642             self.hir_from_char_case_insensitive(lit.span, ch)
643         } else {
644             self.hir_from_char(lit.span, ch)
645         }
646     }
647 
648     /// Convert an Ast literal to its scalar representation.
649     ///
650     /// When Unicode mode is enabled, then this always succeeds and returns a
651     /// `char` (Unicode scalar value).
652     ///
653     /// When Unicode mode is disabled, then a raw byte is returned. If that
654     /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
655     /// an error.
literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal>656     fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
657         if self.flags().unicode() {
658             return Ok(hir::Literal::Unicode(lit.c));
659         }
660         let byte = match lit.byte() {
661             None => return Ok(hir::Literal::Unicode(lit.c)),
662             Some(byte) => byte,
663         };
664         if byte <= 0x7F {
665             return Ok(hir::Literal::Unicode(byte as char));
666         }
667         if !self.trans().allow_invalid_utf8 {
668             return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
669         }
670         Ok(hir::Literal::Byte(byte))
671     }
672 
hir_from_char(&self, span: Span, c: char) -> Result<Hir>673     fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
674         if !self.flags().unicode() && c.len_utf8() > 1 {
675             return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
676         }
677         Ok(Hir::literal(hir::Literal::Unicode(c)))
678     }
679 
hir_from_char_case_insensitive( &self, span: Span, c: char, ) -> Result<Hir>680     fn hir_from_char_case_insensitive(
681         &self,
682         span: Span,
683         c: char,
684     ) -> Result<Hir> {
685         if self.flags().unicode() {
686             // If case folding won't do anything, then don't bother trying.
687             let map =
688                 unicode::contains_simple_case_mapping(c, c).map_err(|_| {
689                     self.error(span, ErrorKind::UnicodeCaseUnavailable)
690                 })?;
691             if !map {
692                 return self.hir_from_char(span, c);
693             }
694             let mut cls =
695                 hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
696                     c, c,
697                 )]);
698             cls.try_case_fold_simple().map_err(|_| {
699                 self.error(span, ErrorKind::UnicodeCaseUnavailable)
700             })?;
701             Ok(Hir::class(hir::Class::Unicode(cls)))
702         } else {
703             if c.len_utf8() > 1 {
704                 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
705             }
706             // If case folding won't do anything, then don't bother trying.
707             match c {
708                 'A'..='Z' | 'a'..='z' => {}
709                 _ => return self.hir_from_char(span, c),
710             }
711             let mut cls =
712                 hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
713                     c as u8, c as u8,
714                 )]);
715             cls.case_fold_simple();
716             Ok(Hir::class(hir::Class::Bytes(cls)))
717         }
718     }
719 
hir_dot(&self, span: Span) -> Result<Hir>720     fn hir_dot(&self, span: Span) -> Result<Hir> {
721         let unicode = self.flags().unicode();
722         if !unicode && !self.trans().allow_invalid_utf8 {
723             return Err(self.error(span, ErrorKind::InvalidUtf8));
724         }
725         Ok(if self.flags().dot_matches_new_line() {
726             Hir::any(!unicode)
727         } else {
728             Hir::dot(!unicode)
729         })
730     }
731 
hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir>732     fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
733         let unicode = self.flags().unicode();
734         let multi_line = self.flags().multi_line();
735         Ok(match asst.kind {
736             ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
737                 hir::Anchor::StartLine
738             } else {
739                 hir::Anchor::StartText
740             }),
741             ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
742                 hir::Anchor::EndLine
743             } else {
744                 hir::Anchor::EndText
745             }),
746             ast::AssertionKind::StartText => {
747                 Hir::anchor(hir::Anchor::StartText)
748             }
749             ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
750             ast::AssertionKind::WordBoundary => {
751                 Hir::word_boundary(if unicode {
752                     hir::WordBoundary::Unicode
753                 } else {
754                     hir::WordBoundary::Ascii
755                 })
756             }
757             ast::AssertionKind::NotWordBoundary => {
758                 Hir::word_boundary(if unicode {
759                     hir::WordBoundary::UnicodeNegate
760                 } else {
761                     // It is possible for negated ASCII word boundaries to
762                     // match at invalid UTF-8 boundaries, even when searching
763                     // valid UTF-8.
764                     if !self.trans().allow_invalid_utf8 {
765                         return Err(
766                             self.error(asst.span, ErrorKind::InvalidUtf8)
767                         );
768                     }
769                     hir::WordBoundary::AsciiNegate
770                 })
771             }
772         })
773     }
774 
hir_group(&self, group: &ast::Group, expr: Hir) -> Hir775     fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
776         let kind = match group.kind {
777             ast::GroupKind::CaptureIndex(idx) => {
778                 hir::GroupKind::CaptureIndex(idx)
779             }
780             ast::GroupKind::CaptureName(ref capname) => {
781                 hir::GroupKind::CaptureName {
782                     name: capname.name.clone(),
783                     index: capname.index,
784                 }
785             }
786             ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
787         };
788         Hir::group(hir::Group { kind: kind, hir: Box::new(expr) })
789     }
790 
hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir791     fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
792         let kind = match rep.op.kind {
793             ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
794             ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
795             ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
796             ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
797                 hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
798             }
799             ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
800                 hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
801             }
802             ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
803                 m,
804                 n,
805             )) => {
806                 hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
807             }
808         };
809         let greedy =
810             if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
811         Hir::repetition(hir::Repetition {
812             kind: kind,
813             greedy: greedy,
814             hir: Box::new(expr),
815         })
816     }
817 
hir_unicode_class( &self, ast_class: &ast::ClassUnicode, ) -> Result<hir::ClassUnicode>818     fn hir_unicode_class(
819         &self,
820         ast_class: &ast::ClassUnicode,
821     ) -> Result<hir::ClassUnicode> {
822         use ast::ClassUnicodeKind::*;
823 
824         if !self.flags().unicode() {
825             return Err(
826                 self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
827             );
828         }
829         let query = match ast_class.kind {
830             OneLetter(name) => ClassQuery::OneLetter(name),
831             Named(ref name) => ClassQuery::Binary(name),
832             NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
833                 property_name: name,
834                 property_value: value,
835             },
836         };
837         let mut result = self.convert_unicode_class_error(
838             &ast_class.span,
839             unicode::class(query),
840         );
841         if let Ok(ref mut class) = result {
842             self.unicode_fold_and_negate(
843                 &ast_class.span,
844                 ast_class.negated,
845                 class,
846             )?;
847             if class.ranges().is_empty() {
848                 let err = self
849                     .error(ast_class.span, ErrorKind::EmptyClassNotAllowed);
850                 return Err(err);
851             }
852         }
853         result
854     }
855 
hir_perl_unicode_class( &self, ast_class: &ast::ClassPerl, ) -> Result<hir::ClassUnicode>856     fn hir_perl_unicode_class(
857         &self,
858         ast_class: &ast::ClassPerl,
859     ) -> Result<hir::ClassUnicode> {
860         use ast::ClassPerlKind::*;
861 
862         assert!(self.flags().unicode());
863         let result = match ast_class.kind {
864             Digit => unicode::perl_digit(),
865             Space => unicode::perl_space(),
866             Word => unicode::perl_word(),
867         };
868         let mut class =
869             self.convert_unicode_class_error(&ast_class.span, result)?;
870         // We needn't apply case folding here because the Perl Unicode classes
871         // are already closed under Unicode simple case folding.
872         if ast_class.negated {
873             class.negate();
874         }
875         Ok(class)
876     }
877 
hir_perl_byte_class( &self, ast_class: &ast::ClassPerl, ) -> hir::ClassBytes878     fn hir_perl_byte_class(
879         &self,
880         ast_class: &ast::ClassPerl,
881     ) -> hir::ClassBytes {
882         use ast::ClassPerlKind::*;
883 
884         assert!(!self.flags().unicode());
885         let mut class = match ast_class.kind {
886             Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
887             Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
888             Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
889         };
890         // We needn't apply case folding here because the Perl ASCII classes
891         // are already closed (under ASCII case folding).
892         if ast_class.negated {
893             class.negate();
894         }
895         class
896     }
897 
898     /// Converts the given Unicode specific error to an HIR translation error.
899     ///
900     /// The span given should approximate the position at which an error would
901     /// occur.
convert_unicode_class_error( &self, span: &Span, result: unicode::Result<hir::ClassUnicode>, ) -> Result<hir::ClassUnicode>902     fn convert_unicode_class_error(
903         &self,
904         span: &Span,
905         result: unicode::Result<hir::ClassUnicode>,
906     ) -> Result<hir::ClassUnicode> {
907         result.map_err(|err| {
908             let sp = span.clone();
909             match err {
910                 unicode::Error::PropertyNotFound => {
911                     self.error(sp, ErrorKind::UnicodePropertyNotFound)
912                 }
913                 unicode::Error::PropertyValueNotFound => {
914                     self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
915                 }
916                 unicode::Error::PerlClassNotFound => {
917                     self.error(sp, ErrorKind::UnicodePerlClassNotFound)
918                 }
919             }
920         })
921     }
922 
unicode_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassUnicode, ) -> Result<()>923     fn unicode_fold_and_negate(
924         &self,
925         span: &Span,
926         negated: bool,
927         class: &mut hir::ClassUnicode,
928     ) -> Result<()> {
929         // Note that we must apply case folding before negation!
930         // Consider `(?i)[^x]`. If we applied negation field, then
931         // the result would be the character class that matched any
932         // Unicode scalar value.
933         if self.flags().case_insensitive() {
934             class.try_case_fold_simple().map_err(|_| {
935                 self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
936             })?;
937         }
938         if negated {
939             class.negate();
940         }
941         Ok(())
942     }
943 
bytes_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassBytes, ) -> Result<()>944     fn bytes_fold_and_negate(
945         &self,
946         span: &Span,
947         negated: bool,
948         class: &mut hir::ClassBytes,
949     ) -> Result<()> {
950         // Note that we must apply case folding before negation!
951         // Consider `(?i)[^x]`. If we applied negation field, then
952         // the result would be the character class that matched any
953         // Unicode scalar value.
954         if self.flags().case_insensitive() {
955             class.case_fold_simple();
956         }
957         if negated {
958             class.negate();
959         }
960         if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
961             return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
962         }
963         Ok(())
964     }
965 
966     /// Return a scalar byte value suitable for use as a literal in a byte
967     /// character class.
class_literal_byte(&self, ast: &ast::Literal) -> Result<u8>968     fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
969         match self.literal_to_char(ast)? {
970             hir::Literal::Byte(byte) => Ok(byte),
971             hir::Literal::Unicode(ch) => {
972                 if ch <= 0x7F as char {
973                     Ok(ch as u8)
974                 } else {
975                     // We can't feasibly support Unicode in
976                     // byte oriented classes. Byte classes don't
977                     // do Unicode case folding.
978                     Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
979                 }
980             }
981         }
982     }
983 }
984 
985 /// A translator's representation of a regular expression's flags at any given
986 /// moment in time.
987 ///
988 /// Each flag can be in one of three states: absent, present but disabled or
989 /// present but enabled.
990 #[derive(Clone, Copy, Debug, Default)]
991 struct Flags {
992     case_insensitive: Option<bool>,
993     multi_line: Option<bool>,
994     dot_matches_new_line: Option<bool>,
995     swap_greed: Option<bool>,
996     unicode: Option<bool>,
997     // Note that `ignore_whitespace` is omitted here because it is handled
998     // entirely in the parser.
999 }
1000 
1001 impl Flags {
from_ast(ast: &ast::Flags) -> Flags1002     fn from_ast(ast: &ast::Flags) -> Flags {
1003         let mut flags = Flags::default();
1004         let mut enable = true;
1005         for item in &ast.items {
1006             match item.kind {
1007                 ast::FlagsItemKind::Negation => {
1008                     enable = false;
1009                 }
1010                 ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1011                     flags.case_insensitive = Some(enable);
1012                 }
1013                 ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1014                     flags.multi_line = Some(enable);
1015                 }
1016                 ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1017                     flags.dot_matches_new_line = Some(enable);
1018                 }
1019                 ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1020                     flags.swap_greed = Some(enable);
1021                 }
1022                 ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1023                     flags.unicode = Some(enable);
1024                 }
1025                 ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1026             }
1027         }
1028         flags
1029     }
1030 
merge(&mut self, previous: &Flags)1031     fn merge(&mut self, previous: &Flags) {
1032         if self.case_insensitive.is_none() {
1033             self.case_insensitive = previous.case_insensitive;
1034         }
1035         if self.multi_line.is_none() {
1036             self.multi_line = previous.multi_line;
1037         }
1038         if self.dot_matches_new_line.is_none() {
1039             self.dot_matches_new_line = previous.dot_matches_new_line;
1040         }
1041         if self.swap_greed.is_none() {
1042             self.swap_greed = previous.swap_greed;
1043         }
1044         if self.unicode.is_none() {
1045             self.unicode = previous.unicode;
1046         }
1047     }
1048 
case_insensitive(&self) -> bool1049     fn case_insensitive(&self) -> bool {
1050         self.case_insensitive.unwrap_or(false)
1051     }
1052 
multi_line(&self) -> bool1053     fn multi_line(&self) -> bool {
1054         self.multi_line.unwrap_or(false)
1055     }
1056 
dot_matches_new_line(&self) -> bool1057     fn dot_matches_new_line(&self) -> bool {
1058         self.dot_matches_new_line.unwrap_or(false)
1059     }
1060 
swap_greed(&self) -> bool1061     fn swap_greed(&self) -> bool {
1062         self.swap_greed.unwrap_or(false)
1063     }
1064 
unicode(&self) -> bool1065     fn unicode(&self) -> bool {
1066         self.unicode.unwrap_or(true)
1067     }
1068 }
1069 
hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes1070 fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1071     let ranges: Vec<_> = ascii_class(kind)
1072         .iter()
1073         .cloned()
1074         .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
1075         .collect();
1076     hir::ClassBytes::new(ranges)
1077 }
1078 
ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)]1079 fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
1080     use ast::ClassAsciiKind::*;
1081     match *kind {
1082         Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
1083         Alpha => &[('A', 'Z'), ('a', 'z')],
1084         Ascii => &[('\x00', '\x7F')],
1085         Blank => &[('\t', '\t'), (' ', ' ')],
1086         Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
1087         Digit => &[('0', '9')],
1088         Graph => &[('!', '~')],
1089         Lower => &[('a', 'z')],
1090         Print => &[(' ', '~')],
1091         Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
1092         Space => &[
1093             ('\t', '\t'),
1094             ('\n', '\n'),
1095             ('\x0B', '\x0B'),
1096             ('\x0C', '\x0C'),
1097             ('\r', '\r'),
1098             (' ', ' '),
1099         ],
1100         Upper => &[('A', 'Z')],
1101         Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
1102         Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
1103     }
1104 }
1105 
1106 #[cfg(test)]
1107 mod tests {
1108     use ast::parse::ParserBuilder;
1109     use ast::{self, Ast, Position, Span};
1110     use hir::{self, Hir, HirKind};
1111     use unicode::{self, ClassQuery};
1112 
1113     use super::{ascii_class, TranslatorBuilder};
1114 
1115     // We create these errors to compare with real hir::Errors in the tests.
1116     // We define equality between TestError and hir::Error to disregard the
1117     // pattern string in hir::Error, which is annoying to provide in tests.
1118     #[derive(Clone, Debug)]
1119     struct TestError {
1120         span: Span,
1121         kind: hir::ErrorKind,
1122     }
1123 
1124     impl PartialEq<hir::Error> for TestError {
eq(&self, other: &hir::Error) -> bool1125         fn eq(&self, other: &hir::Error) -> bool {
1126             self.span == other.span && self.kind == other.kind
1127         }
1128     }
1129 
1130     impl PartialEq<TestError> for hir::Error {
eq(&self, other: &TestError) -> bool1131         fn eq(&self, other: &TestError) -> bool {
1132             self.span == other.span && self.kind == other.kind
1133         }
1134     }
1135 
parse(pattern: &str) -> Ast1136     fn parse(pattern: &str) -> Ast {
1137         ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1138     }
1139 
t(pattern: &str) -> Hir1140     fn t(pattern: &str) -> Hir {
1141         TranslatorBuilder::new()
1142             .allow_invalid_utf8(false)
1143             .build()
1144             .translate(pattern, &parse(pattern))
1145             .unwrap()
1146     }
1147 
t_err(pattern: &str) -> hir::Error1148     fn t_err(pattern: &str) -> hir::Error {
1149         TranslatorBuilder::new()
1150             .allow_invalid_utf8(false)
1151             .build()
1152             .translate(pattern, &parse(pattern))
1153             .unwrap_err()
1154     }
1155 
t_bytes(pattern: &str) -> Hir1156     fn t_bytes(pattern: &str) -> Hir {
1157         TranslatorBuilder::new()
1158             .allow_invalid_utf8(true)
1159             .build()
1160             .translate(pattern, &parse(pattern))
1161             .unwrap()
1162     }
1163 
hir_lit(s: &str) -> Hir1164     fn hir_lit(s: &str) -> Hir {
1165         match s.len() {
1166             0 => Hir::empty(),
1167             _ => {
1168                 let lits = s
1169                     .chars()
1170                     .map(hir::Literal::Unicode)
1171                     .map(Hir::literal)
1172                     .collect();
1173                 Hir::concat(lits)
1174             }
1175         }
1176     }
1177 
hir_blit(s: &[u8]) -> Hir1178     fn hir_blit(s: &[u8]) -> Hir {
1179         match s.len() {
1180             0 => Hir::empty(),
1181             1 => Hir::literal(hir::Literal::Byte(s[0])),
1182             _ => {
1183                 let lits = s
1184                     .iter()
1185                     .cloned()
1186                     .map(hir::Literal::Byte)
1187                     .map(Hir::literal)
1188                     .collect();
1189                 Hir::concat(lits)
1190             }
1191         }
1192     }
1193 
hir_group(i: u32, expr: Hir) -> Hir1194     fn hir_group(i: u32, expr: Hir) -> Hir {
1195         Hir::group(hir::Group {
1196             kind: hir::GroupKind::CaptureIndex(i),
1197             hir: Box::new(expr),
1198         })
1199     }
1200 
hir_group_name(i: u32, name: &str, expr: Hir) -> Hir1201     fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
1202         Hir::group(hir::Group {
1203             kind: hir::GroupKind::CaptureName {
1204                 name: name.to_string(),
1205                 index: i,
1206             },
1207             hir: Box::new(expr),
1208         })
1209     }
1210 
hir_group_nocap(expr: Hir) -> Hir1211     fn hir_group_nocap(expr: Hir) -> Hir {
1212         Hir::group(hir::Group {
1213             kind: hir::GroupKind::NonCapturing,
1214             hir: Box::new(expr),
1215         })
1216     }
1217 
hir_quest(greedy: bool, expr: Hir) -> Hir1218     fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1219         Hir::repetition(hir::Repetition {
1220             kind: hir::RepetitionKind::ZeroOrOne,
1221             greedy: greedy,
1222             hir: Box::new(expr),
1223         })
1224     }
1225 
hir_star(greedy: bool, expr: Hir) -> Hir1226     fn hir_star(greedy: bool, expr: Hir) -> Hir {
1227         Hir::repetition(hir::Repetition {
1228             kind: hir::RepetitionKind::ZeroOrMore,
1229             greedy: greedy,
1230             hir: Box::new(expr),
1231         })
1232     }
1233 
hir_plus(greedy: bool, expr: Hir) -> Hir1234     fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1235         Hir::repetition(hir::Repetition {
1236             kind: hir::RepetitionKind::OneOrMore,
1237             greedy: greedy,
1238             hir: Box::new(expr),
1239         })
1240     }
1241 
hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir1242     fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
1243         Hir::repetition(hir::Repetition {
1244             kind: hir::RepetitionKind::Range(range),
1245             greedy: greedy,
1246             hir: Box::new(expr),
1247         })
1248     }
1249 
hir_alt(alts: Vec<Hir>) -> Hir1250     fn hir_alt(alts: Vec<Hir>) -> Hir {
1251         Hir::alternation(alts)
1252     }
1253 
hir_cat(exprs: Vec<Hir>) -> Hir1254     fn hir_cat(exprs: Vec<Hir>) -> Hir {
1255         Hir::concat(exprs)
1256     }
1257 
1258     #[allow(dead_code)]
hir_uclass_query(query: ClassQuery) -> Hir1259     fn hir_uclass_query(query: ClassQuery) -> Hir {
1260         Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1261     }
1262 
1263     #[allow(dead_code)]
hir_uclass_perl_word() -> Hir1264     fn hir_uclass_perl_word() -> Hir {
1265         Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1266     }
1267 
hir_uclass(ranges: &[(char, char)]) -> Hir1268     fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1269         let ranges: Vec<hir::ClassUnicodeRange> = ranges
1270             .iter()
1271             .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1272             .collect();
1273         Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
1274     }
1275 
hir_bclass(ranges: &[(u8, u8)]) -> Hir1276     fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1277         let ranges: Vec<hir::ClassBytesRange> = ranges
1278             .iter()
1279             .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1280             .collect();
1281         Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1282     }
1283 
hir_bclass_from_char(ranges: &[(char, char)]) -> Hir1284     fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
1285         let ranges: Vec<hir::ClassBytesRange> = ranges
1286             .iter()
1287             .map(|&(s, e)| {
1288                 assert!(s as u32 <= 0x7F);
1289                 assert!(e as u32 <= 0x7F);
1290                 hir::ClassBytesRange::new(s as u8, e as u8)
1291             })
1292             .collect();
1293         Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1294     }
1295 
hir_case_fold(expr: Hir) -> Hir1296     fn hir_case_fold(expr: Hir) -> Hir {
1297         match expr.into_kind() {
1298             HirKind::Class(mut cls) => {
1299                 cls.case_fold_simple();
1300                 Hir::class(cls)
1301             }
1302             _ => panic!("cannot case fold non-class Hir expr"),
1303         }
1304     }
1305 
hir_negate(expr: Hir) -> Hir1306     fn hir_negate(expr: Hir) -> Hir {
1307         match expr.into_kind() {
1308             HirKind::Class(mut cls) => {
1309                 cls.negate();
1310                 Hir::class(cls)
1311             }
1312             _ => panic!("cannot negate non-class Hir expr"),
1313         }
1314     }
1315 
1316     #[allow(dead_code)]
hir_union(expr1: Hir, expr2: Hir) -> Hir1317     fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1318         use hir::Class::{Bytes, Unicode};
1319 
1320         match (expr1.into_kind(), expr2.into_kind()) {
1321             (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1322                 c1.union(&c2);
1323                 Hir::class(hir::Class::Unicode(c1))
1324             }
1325             (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1326                 c1.union(&c2);
1327                 Hir::class(hir::Class::Bytes(c1))
1328             }
1329             _ => panic!("cannot union non-class Hir exprs"),
1330         }
1331     }
1332 
1333     #[allow(dead_code)]
hir_difference(expr1: Hir, expr2: Hir) -> Hir1334     fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1335         use hir::Class::{Bytes, Unicode};
1336 
1337         match (expr1.into_kind(), expr2.into_kind()) {
1338             (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1339                 c1.difference(&c2);
1340                 Hir::class(hir::Class::Unicode(c1))
1341             }
1342             (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1343                 c1.difference(&c2);
1344                 Hir::class(hir::Class::Bytes(c1))
1345             }
1346             _ => panic!("cannot difference non-class Hir exprs"),
1347         }
1348     }
1349 
hir_anchor(anchor: hir::Anchor) -> Hir1350     fn hir_anchor(anchor: hir::Anchor) -> Hir {
1351         Hir::anchor(anchor)
1352     }
1353 
hir_word(wb: hir::WordBoundary) -> Hir1354     fn hir_word(wb: hir::WordBoundary) -> Hir {
1355         Hir::word_boundary(wb)
1356     }
1357 
1358     #[test]
empty()1359     fn empty() {
1360         assert_eq!(t(""), Hir::empty());
1361         assert_eq!(t("(?i)"), Hir::empty());
1362         assert_eq!(t("()"), hir_group(1, Hir::empty()));
1363         assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1364         assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
1365         assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1366         assert_eq!(
1367             t("()|()"),
1368             hir_alt(vec![
1369                 hir_group(1, Hir::empty()),
1370                 hir_group(2, Hir::empty()),
1371             ])
1372         );
1373         assert_eq!(
1374             t("(|b)"),
1375             hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1376         );
1377         assert_eq!(
1378             t("(a|)"),
1379             hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1380         );
1381         assert_eq!(
1382             t("(a||c)"),
1383             hir_group(
1384                 1,
1385                 hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1386             )
1387         );
1388         assert_eq!(
1389             t("(||)"),
1390             hir_group(
1391                 1,
1392                 hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1393             )
1394         );
1395     }
1396 
1397     #[test]
literal()1398     fn literal() {
1399         assert_eq!(t("a"), hir_lit("a"));
1400         assert_eq!(t("(?-u)a"), hir_lit("a"));
1401         assert_eq!(t("☃"), hir_lit("☃"));
1402         assert_eq!(t("abcd"), hir_lit("abcd"));
1403 
1404         assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1405         assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1406         assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1407         assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1408 
1409         assert_eq!(
1410             t_err("(?-u)☃"),
1411             TestError {
1412                 kind: hir::ErrorKind::UnicodeNotAllowed,
1413                 span: Span::new(
1414                     Position::new(5, 1, 6),
1415                     Position::new(8, 1, 7)
1416                 ),
1417             }
1418         );
1419         assert_eq!(
1420             t_err(r"(?-u)\xFF"),
1421             TestError {
1422                 kind: hir::ErrorKind::InvalidUtf8,
1423                 span: Span::new(
1424                     Position::new(5, 1, 6),
1425                     Position::new(9, 1, 10)
1426                 ),
1427             }
1428         );
1429     }
1430 
1431     #[test]
literal_case_insensitive()1432     fn literal_case_insensitive() {
1433         #[cfg(feature = "unicode-case")]
1434         assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1435         #[cfg(feature = "unicode-case")]
1436         assert_eq!(
1437             t("(?i:a)"),
1438             hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
1439         );
1440         #[cfg(feature = "unicode-case")]
1441         assert_eq!(
1442             t("a(?i)a(?-i)a"),
1443             hir_cat(vec![
1444                 hir_lit("a"),
1445                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1446                 hir_lit("a"),
1447             ])
1448         );
1449         #[cfg(feature = "unicode-case")]
1450         assert_eq!(
1451             t("(?i)ab@c"),
1452             hir_cat(vec![
1453                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1454                 hir_uclass(&[('B', 'B'), ('b', 'b')]),
1455                 hir_lit("@"),
1456                 hir_uclass(&[('C', 'C'), ('c', 'c')]),
1457             ])
1458         );
1459         #[cfg(feature = "unicode-case")]
1460         assert_eq!(
1461             t("(?i)β"),
1462             hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1463         );
1464 
1465         assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1466         #[cfg(feature = "unicode-case")]
1467         assert_eq!(
1468             t("(?-u)a(?i)a(?-i)a"),
1469             hir_cat(vec![
1470                 hir_lit("a"),
1471                 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1472                 hir_lit("a"),
1473             ])
1474         );
1475         assert_eq!(
1476             t("(?i-u)ab@c"),
1477             hir_cat(vec![
1478                 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1479                 hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1480                 hir_lit("@"),
1481                 hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1482             ])
1483         );
1484 
1485         assert_eq!(
1486             t_bytes("(?i-u)a"),
1487             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1488         );
1489         assert_eq!(
1490             t_bytes("(?i-u)\x61"),
1491             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1492         );
1493         assert_eq!(
1494             t_bytes(r"(?i-u)\x61"),
1495             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1496         );
1497         assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1498 
1499         assert_eq!(
1500             t_err("(?i-u)β"),
1501             TestError {
1502                 kind: hir::ErrorKind::UnicodeNotAllowed,
1503                 span: Span::new(
1504                     Position::new(6, 1, 7),
1505                     Position::new(8, 1, 8),
1506                 ),
1507             }
1508         );
1509     }
1510 
1511     #[test]
dot()1512     fn dot() {
1513         assert_eq!(
1514             t("."),
1515             hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
1516         );
1517         assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
1518         assert_eq!(
1519             t_bytes("(?-u)."),
1520             hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
1521         );
1522         assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1523 
1524         // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1525         assert_eq!(
1526             t_err("(?-u)."),
1527             TestError {
1528                 kind: hir::ErrorKind::InvalidUtf8,
1529                 span: Span::new(
1530                     Position::new(5, 1, 6),
1531                     Position::new(6, 1, 7)
1532                 ),
1533             }
1534         );
1535         assert_eq!(
1536             t_err("(?s-u)."),
1537             TestError {
1538                 kind: hir::ErrorKind::InvalidUtf8,
1539                 span: Span::new(
1540                     Position::new(6, 1, 7),
1541                     Position::new(7, 1, 8)
1542                 ),
1543             }
1544         );
1545     }
1546 
1547     #[test]
assertions()1548     fn assertions() {
1549         assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
1550         assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
1551         assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
1552         assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
1553         assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
1554         assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
1555         assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
1556         assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
1557 
1558         assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
1559         assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
1560         assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
1561         assert_eq!(
1562             t_bytes(r"(?-u)\B"),
1563             hir_word(hir::WordBoundary::AsciiNegate)
1564         );
1565 
1566         assert_eq!(
1567             t_err(r"(?-u)\B"),
1568             TestError {
1569                 kind: hir::ErrorKind::InvalidUtf8,
1570                 span: Span::new(
1571                     Position::new(5, 1, 6),
1572                     Position::new(7, 1, 8)
1573                 ),
1574             }
1575         );
1576     }
1577 
1578     #[test]
group()1579     fn group() {
1580         assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
1581         assert_eq!(
1582             t("(a)(b)"),
1583             hir_cat(vec![
1584                 hir_group(1, hir_lit("a")),
1585                 hir_group(2, hir_lit("b")),
1586             ])
1587         );
1588         assert_eq!(
1589             t("(a)|(b)"),
1590             hir_alt(vec![
1591                 hir_group(1, hir_lit("a")),
1592                 hir_group(2, hir_lit("b")),
1593             ])
1594         );
1595         assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
1596         assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1597         assert_eq!(
1598             t("(?P<foo>a)(?P<bar>b)"),
1599             hir_cat(vec![
1600                 hir_group_name(1, "foo", hir_lit("a")),
1601                 hir_group_name(2, "bar", hir_lit("b")),
1602             ])
1603         );
1604         assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1605         assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1606         assert_eq!(
1607             t("(?:a)(b)"),
1608             hir_cat(vec![
1609                 hir_group_nocap(hir_lit("a")),
1610                 hir_group(1, hir_lit("b")),
1611             ])
1612         );
1613         assert_eq!(
1614             t("(a)(?:b)(c)"),
1615             hir_cat(vec![
1616                 hir_group(1, hir_lit("a")),
1617                 hir_group_nocap(hir_lit("b")),
1618                 hir_group(2, hir_lit("c")),
1619             ])
1620         );
1621         assert_eq!(
1622             t("(a)(?P<foo>b)(c)"),
1623             hir_cat(vec![
1624                 hir_group(1, hir_lit("a")),
1625                 hir_group_name(2, "foo", hir_lit("b")),
1626                 hir_group(3, hir_lit("c")),
1627             ])
1628         );
1629         assert_eq!(t("()"), hir_group(1, Hir::empty()));
1630         assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
1631         assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
1632         assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
1633     }
1634 
1635     #[test]
flags()1636     fn flags() {
1637         #[cfg(feature = "unicode-case")]
1638         assert_eq!(
1639             t("(?i:a)a"),
1640             hir_cat(vec![
1641                 hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
1642                 hir_lit("a"),
1643             ])
1644         );
1645         assert_eq!(
1646             t("(?i-u:a)β"),
1647             hir_cat(vec![
1648                 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1649                 hir_lit("β"),
1650             ])
1651         );
1652         assert_eq!(
1653             t("(?:(?i-u)a)b"),
1654             hir_cat(vec![
1655                 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1656                 hir_lit("b"),
1657             ])
1658         );
1659         assert_eq!(
1660             t("((?i-u)a)b"),
1661             hir_cat(vec![
1662                 hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1663                 hir_lit("b"),
1664             ])
1665         );
1666         #[cfg(feature = "unicode-case")]
1667         assert_eq!(
1668             t("(?i)(?-i:a)a"),
1669             hir_cat(vec![
1670                 hir_group_nocap(hir_lit("a")),
1671                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1672             ])
1673         );
1674         #[cfg(feature = "unicode-case")]
1675         assert_eq!(
1676             t("(?im)a^"),
1677             hir_cat(vec![
1678                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1679                 hir_anchor(hir::Anchor::StartLine),
1680             ])
1681         );
1682         #[cfg(feature = "unicode-case")]
1683         assert_eq!(
1684             t("(?im)a^(?i-m)a^"),
1685             hir_cat(vec![
1686                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1687                 hir_anchor(hir::Anchor::StartLine),
1688                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1689                 hir_anchor(hir::Anchor::StartText),
1690             ])
1691         );
1692         assert_eq!(
1693             t("(?U)a*a*?(?-U)a*a*?"),
1694             hir_cat(vec![
1695                 hir_star(false, hir_lit("a")),
1696                 hir_star(true, hir_lit("a")),
1697                 hir_star(true, hir_lit("a")),
1698                 hir_star(false, hir_lit("a")),
1699             ])
1700         );
1701         #[cfg(feature = "unicode-case")]
1702         assert_eq!(
1703             t("(?:a(?i)a)a"),
1704             hir_cat(vec![
1705                 hir_group_nocap(hir_cat(vec![
1706                     hir_lit("a"),
1707                     hir_uclass(&[('A', 'A'), ('a', 'a')]),
1708                 ])),
1709                 hir_lit("a"),
1710             ])
1711         );
1712         #[cfg(feature = "unicode-case")]
1713         assert_eq!(
1714             t("(?i)(?:a(?-i)a)a"),
1715             hir_cat(vec![
1716                 hir_group_nocap(hir_cat(vec![
1717                     hir_uclass(&[('A', 'A'), ('a', 'a')]),
1718                     hir_lit("a"),
1719                 ])),
1720                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1721             ])
1722         );
1723     }
1724 
1725     #[test]
escape()1726     fn escape() {
1727         assert_eq!(
1728             t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1729             hir_lit(r"\.+*?()|[]{}^$#")
1730         );
1731     }
1732 
1733     #[test]
repetition()1734     fn repetition() {
1735         assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
1736         assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
1737         assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
1738         assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
1739         assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
1740         assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
1741 
1742         assert_eq!(
1743             t("a{1}"),
1744             hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1745         );
1746         assert_eq!(
1747             t("a{1,}"),
1748             hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1749         );
1750         assert_eq!(
1751             t("a{1,2}"),
1752             hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
1753         );
1754         assert_eq!(
1755             t("a{1}?"),
1756             hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1757         );
1758         assert_eq!(
1759             t("a{1,}?"),
1760             hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1761         );
1762         assert_eq!(
1763             t("a{1,2}?"),
1764             hir_range(
1765                 false,
1766                 hir::RepetitionRange::Bounded(1, 2),
1767                 hir_lit("a"),
1768             )
1769         );
1770 
1771         assert_eq!(
1772             t("ab?"),
1773             hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1774         );
1775         assert_eq!(
1776             t("(ab)?"),
1777             hir_quest(
1778                 true,
1779                 hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1780             )
1781         );
1782         assert_eq!(
1783             t("a|b?"),
1784             hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1785         );
1786     }
1787 
1788     #[test]
cat_alt()1789     fn cat_alt() {
1790         assert_eq!(
1791             t("(ab)"),
1792             hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1793         );
1794         assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
1795         assert_eq!(
1796             t("a|b|c"),
1797             hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1798         );
1799         assert_eq!(
1800             t("ab|bc|cd"),
1801             hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1802         );
1803         assert_eq!(
1804             t("(a|b)"),
1805             hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
1806         );
1807         assert_eq!(
1808             t("(a|b|c)"),
1809             hir_group(
1810                 1,
1811                 hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1812             )
1813         );
1814         assert_eq!(
1815             t("(ab|bc|cd)"),
1816             hir_group(
1817                 1,
1818                 hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1819             )
1820         );
1821         assert_eq!(
1822             t("(ab|(bc|(cd)))"),
1823             hir_group(
1824                 1,
1825                 hir_alt(vec![
1826                     hir_lit("ab"),
1827                     hir_group(
1828                         2,
1829                         hir_alt(vec![
1830                             hir_lit("bc"),
1831                             hir_group(3, hir_lit("cd")),
1832                         ])
1833                     ),
1834                 ])
1835             )
1836         );
1837     }
1838 
1839     #[test]
class_ascii()1840     fn class_ascii() {
1841         assert_eq!(
1842             t("[[:alnum:]]"),
1843             hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
1844         );
1845         assert_eq!(
1846             t("[[:alpha:]]"),
1847             hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
1848         );
1849         assert_eq!(
1850             t("[[:ascii:]]"),
1851             hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
1852         );
1853         assert_eq!(
1854             t("[[:blank:]]"),
1855             hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
1856         );
1857         assert_eq!(
1858             t("[[:cntrl:]]"),
1859             hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
1860         );
1861         assert_eq!(
1862             t("[[:digit:]]"),
1863             hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
1864         );
1865         assert_eq!(
1866             t("[[:graph:]]"),
1867             hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
1868         );
1869         assert_eq!(
1870             t("[[:lower:]]"),
1871             hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
1872         );
1873         assert_eq!(
1874             t("[[:print:]]"),
1875             hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
1876         );
1877         assert_eq!(
1878             t("[[:punct:]]"),
1879             hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
1880         );
1881         assert_eq!(
1882             t("[[:space:]]"),
1883             hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
1884         );
1885         assert_eq!(
1886             t("[[:upper:]]"),
1887             hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
1888         );
1889         assert_eq!(
1890             t("[[:word:]]"),
1891             hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
1892         );
1893         assert_eq!(
1894             t("[[:xdigit:]]"),
1895             hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
1896         );
1897 
1898         assert_eq!(
1899             t("[[:^lower:]]"),
1900             hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
1901         );
1902         #[cfg(feature = "unicode-case")]
1903         assert_eq!(
1904             t("(?i)[[:lower:]]"),
1905             hir_uclass(&[
1906                 ('A', 'Z'),
1907                 ('a', 'z'),
1908                 ('\u{17F}', '\u{17F}'),
1909                 ('\u{212A}', '\u{212A}'),
1910             ])
1911         );
1912 
1913         assert_eq!(
1914             t("(?-u)[[:lower:]]"),
1915             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
1916         );
1917         assert_eq!(
1918             t("(?i-u)[[:lower:]]"),
1919             hir_case_fold(hir_bclass_from_char(ascii_class(
1920                 &ast::ClassAsciiKind::Lower
1921             )))
1922         );
1923 
1924         assert_eq!(
1925             t_err("(?-u)[[:^lower:]]"),
1926             TestError {
1927                 kind: hir::ErrorKind::InvalidUtf8,
1928                 span: Span::new(
1929                     Position::new(6, 1, 7),
1930                     Position::new(16, 1, 17)
1931                 ),
1932             }
1933         );
1934         assert_eq!(
1935             t_err("(?i-u)[[:^lower:]]"),
1936             TestError {
1937                 kind: hir::ErrorKind::InvalidUtf8,
1938                 span: Span::new(
1939                     Position::new(7, 1, 8),
1940                     Position::new(17, 1, 18)
1941                 ),
1942             }
1943         );
1944     }
1945 
1946     #[test]
1947     #[cfg(feature = "unicode-perl")]
class_perl()1948     fn class_perl() {
1949         // Unicode
1950         assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
1951         assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
1952         assert_eq!(t(r"\w"), hir_uclass_perl_word());
1953         #[cfg(feature = "unicode-case")]
1954         assert_eq!(
1955             t(r"(?i)\d"),
1956             hir_uclass_query(ClassQuery::Binary("digit"))
1957         );
1958         #[cfg(feature = "unicode-case")]
1959         assert_eq!(
1960             t(r"(?i)\s"),
1961             hir_uclass_query(ClassQuery::Binary("space"))
1962         );
1963         #[cfg(feature = "unicode-case")]
1964         assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
1965 
1966         // Unicode, negated
1967         assert_eq!(
1968             t(r"\D"),
1969             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1970         );
1971         assert_eq!(
1972             t(r"\S"),
1973             hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1974         );
1975         assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
1976         #[cfg(feature = "unicode-case")]
1977         assert_eq!(
1978             t(r"(?i)\D"),
1979             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1980         );
1981         #[cfg(feature = "unicode-case")]
1982         assert_eq!(
1983             t(r"(?i)\S"),
1984             hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1985         );
1986         #[cfg(feature = "unicode-case")]
1987         assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
1988 
1989         // ASCII only
1990         assert_eq!(
1991             t(r"(?-u)\d"),
1992             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
1993         );
1994         assert_eq!(
1995             t(r"(?-u)\s"),
1996             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
1997         );
1998         assert_eq!(
1999             t(r"(?-u)\w"),
2000             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2001         );
2002         assert_eq!(
2003             t(r"(?i-u)\d"),
2004             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2005         );
2006         assert_eq!(
2007             t(r"(?i-u)\s"),
2008             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2009         );
2010         assert_eq!(
2011             t(r"(?i-u)\w"),
2012             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2013         );
2014 
2015         // ASCII only, negated
2016         assert_eq!(
2017             t(r"(?-u)\D"),
2018             hir_negate(hir_bclass_from_char(ascii_class(
2019                 &ast::ClassAsciiKind::Digit
2020             )))
2021         );
2022         assert_eq!(
2023             t(r"(?-u)\S"),
2024             hir_negate(hir_bclass_from_char(ascii_class(
2025                 &ast::ClassAsciiKind::Space
2026             )))
2027         );
2028         assert_eq!(
2029             t(r"(?-u)\W"),
2030             hir_negate(hir_bclass_from_char(ascii_class(
2031                 &ast::ClassAsciiKind::Word
2032             )))
2033         );
2034         assert_eq!(
2035             t(r"(?i-u)\D"),
2036             hir_negate(hir_bclass_from_char(ascii_class(
2037                 &ast::ClassAsciiKind::Digit
2038             )))
2039         );
2040         assert_eq!(
2041             t(r"(?i-u)\S"),
2042             hir_negate(hir_bclass_from_char(ascii_class(
2043                 &ast::ClassAsciiKind::Space
2044             )))
2045         );
2046         assert_eq!(
2047             t(r"(?i-u)\W"),
2048             hir_negate(hir_bclass_from_char(ascii_class(
2049                 &ast::ClassAsciiKind::Word
2050             )))
2051         );
2052     }
2053 
2054     #[test]
2055     #[cfg(not(feature = "unicode-perl"))]
class_perl_word_disabled()2056     fn class_perl_word_disabled() {
2057         assert_eq!(
2058             t_err(r"\w"),
2059             TestError {
2060                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2061                 span: Span::new(
2062                     Position::new(0, 1, 1),
2063                     Position::new(2, 1, 3)
2064                 ),
2065             }
2066         );
2067     }
2068 
2069     #[test]
2070     #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
class_perl_space_disabled()2071     fn class_perl_space_disabled() {
2072         assert_eq!(
2073             t_err(r"\s"),
2074             TestError {
2075                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2076                 span: Span::new(
2077                     Position::new(0, 1, 1),
2078                     Position::new(2, 1, 3)
2079                 ),
2080             }
2081         );
2082     }
2083 
2084     #[test]
2085     #[cfg(all(
2086         not(feature = "unicode-perl"),
2087         not(feature = "unicode-gencat")
2088     ))]
class_perl_digit_disabled()2089     fn class_perl_digit_disabled() {
2090         assert_eq!(
2091             t_err(r"\d"),
2092             TestError {
2093                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2094                 span: Span::new(
2095                     Position::new(0, 1, 1),
2096                     Position::new(2, 1, 3)
2097                 ),
2098             }
2099         );
2100     }
2101 
2102     #[test]
2103     #[cfg(feature = "unicode-gencat")]
class_unicode_gencat()2104     fn class_unicode_gencat() {
2105         assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2106         assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2107         assert_eq!(
2108             t(r"\p{Separator}"),
2109             hir_uclass_query(ClassQuery::Binary("Z"))
2110         );
2111         assert_eq!(
2112             t(r"\p{se      PaRa ToR}"),
2113             hir_uclass_query(ClassQuery::Binary("Z"))
2114         );
2115         assert_eq!(
2116             t(r"\p{gc:Separator}"),
2117             hir_uclass_query(ClassQuery::Binary("Z"))
2118         );
2119         assert_eq!(
2120             t(r"\p{gc=Separator}"),
2121             hir_uclass_query(ClassQuery::Binary("Z"))
2122         );
2123         assert_eq!(
2124             t(r"\p{Other}"),
2125             hir_uclass_query(ClassQuery::Binary("Other"))
2126         );
2127         assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2128 
2129         assert_eq!(
2130             t(r"\PZ"),
2131             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2132         );
2133         assert_eq!(
2134             t(r"\P{separator}"),
2135             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2136         );
2137         assert_eq!(
2138             t(r"\P{gc!=separator}"),
2139             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2140         );
2141 
2142         assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2143         assert_eq!(
2144             t(r"\p{assigned}"),
2145             hir_uclass_query(ClassQuery::Binary("Assigned"))
2146         );
2147         assert_eq!(
2148             t(r"\p{ascii}"),
2149             hir_uclass_query(ClassQuery::Binary("ASCII"))
2150         );
2151         assert_eq!(
2152             t(r"\p{gc:any}"),
2153             hir_uclass_query(ClassQuery::Binary("Any"))
2154         );
2155         assert_eq!(
2156             t(r"\p{gc:assigned}"),
2157             hir_uclass_query(ClassQuery::Binary("Assigned"))
2158         );
2159         assert_eq!(
2160             t(r"\p{gc:ascii}"),
2161             hir_uclass_query(ClassQuery::Binary("ASCII"))
2162         );
2163 
2164         assert_eq!(
2165             t_err(r"(?-u)\pZ"),
2166             TestError {
2167                 kind: hir::ErrorKind::UnicodeNotAllowed,
2168                 span: Span::new(
2169                     Position::new(5, 1, 6),
2170                     Position::new(8, 1, 9)
2171                 ),
2172             }
2173         );
2174         assert_eq!(
2175             t_err(r"(?-u)\p{Separator}"),
2176             TestError {
2177                 kind: hir::ErrorKind::UnicodeNotAllowed,
2178                 span: Span::new(
2179                     Position::new(5, 1, 6),
2180                     Position::new(18, 1, 19)
2181                 ),
2182             }
2183         );
2184         assert_eq!(
2185             t_err(r"\pE"),
2186             TestError {
2187                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2188                 span: Span::new(
2189                     Position::new(0, 1, 1),
2190                     Position::new(3, 1, 4)
2191                 ),
2192             }
2193         );
2194         assert_eq!(
2195             t_err(r"\p{Foo}"),
2196             TestError {
2197                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2198                 span: Span::new(
2199                     Position::new(0, 1, 1),
2200                     Position::new(7, 1, 8)
2201                 ),
2202             }
2203         );
2204         assert_eq!(
2205             t_err(r"\p{gc:Foo}"),
2206             TestError {
2207                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2208                 span: Span::new(
2209                     Position::new(0, 1, 1),
2210                     Position::new(10, 1, 11)
2211                 ),
2212             }
2213         );
2214     }
2215 
2216     #[test]
2217     #[cfg(not(feature = "unicode-gencat"))]
class_unicode_gencat_disabled()2218     fn class_unicode_gencat_disabled() {
2219         assert_eq!(
2220             t_err(r"\p{Separator}"),
2221             TestError {
2222                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2223                 span: Span::new(
2224                     Position::new(0, 1, 1),
2225                     Position::new(13, 1, 14)
2226                 ),
2227             }
2228         );
2229 
2230         assert_eq!(
2231             t_err(r"\p{Any}"),
2232             TestError {
2233                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2234                 span: Span::new(
2235                     Position::new(0, 1, 1),
2236                     Position::new(7, 1, 8)
2237                 ),
2238             }
2239         );
2240     }
2241 
2242     #[test]
2243     #[cfg(feature = "unicode-script")]
class_unicode_script()2244     fn class_unicode_script() {
2245         assert_eq!(
2246             t(r"\p{Greek}"),
2247             hir_uclass_query(ClassQuery::Binary("Greek"))
2248         );
2249         #[cfg(feature = "unicode-case")]
2250         assert_eq!(
2251             t(r"(?i)\p{Greek}"),
2252             hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2253         );
2254         #[cfg(feature = "unicode-case")]
2255         assert_eq!(
2256             t(r"(?i)\P{Greek}"),
2257             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2258                 "Greek"
2259             ))))
2260         );
2261 
2262         assert_eq!(
2263             t_err(r"\p{sc:Foo}"),
2264             TestError {
2265                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2266                 span: Span::new(
2267                     Position::new(0, 1, 1),
2268                     Position::new(10, 1, 11)
2269                 ),
2270             }
2271         );
2272         assert_eq!(
2273             t_err(r"\p{scx:Foo}"),
2274             TestError {
2275                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2276                 span: Span::new(
2277                     Position::new(0, 1, 1),
2278                     Position::new(11, 1, 12)
2279                 ),
2280             }
2281         );
2282     }
2283 
2284     #[test]
2285     #[cfg(not(feature = "unicode-script"))]
class_unicode_script_disabled()2286     fn class_unicode_script_disabled() {
2287         assert_eq!(
2288             t_err(r"\p{Greek}"),
2289             TestError {
2290                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2291                 span: Span::new(
2292                     Position::new(0, 1, 1),
2293                     Position::new(9, 1, 10)
2294                 ),
2295             }
2296         );
2297 
2298         assert_eq!(
2299             t_err(r"\p{scx:Greek}"),
2300             TestError {
2301                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2302                 span: Span::new(
2303                     Position::new(0, 1, 1),
2304                     Position::new(13, 1, 14)
2305                 ),
2306             }
2307         );
2308     }
2309 
2310     #[test]
2311     #[cfg(feature = "unicode-age")]
class_unicode_age()2312     fn class_unicode_age() {
2313         assert_eq!(
2314             t_err(r"\p{age:Foo}"),
2315             TestError {
2316                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2317                 span: Span::new(
2318                     Position::new(0, 1, 1),
2319                     Position::new(11, 1, 12)
2320                 ),
2321             }
2322         );
2323     }
2324 
2325     #[test]
2326     #[cfg(feature = "unicode-gencat")]
class_unicode_any_empty()2327     fn class_unicode_any_empty() {
2328         assert_eq!(
2329             t_err(r"\P{any}"),
2330             TestError {
2331                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2332                 span: Span::new(
2333                     Position::new(0, 1, 1),
2334                     Position::new(7, 1, 8)
2335                 ),
2336             }
2337         );
2338     }
2339 
2340     #[test]
2341     #[cfg(not(feature = "unicode-age"))]
class_unicode_age_disabled()2342     fn class_unicode_age_disabled() {
2343         assert_eq!(
2344             t_err(r"\p{age:3.0}"),
2345             TestError {
2346                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2347                 span: Span::new(
2348                     Position::new(0, 1, 1),
2349                     Position::new(11, 1, 12)
2350                 ),
2351             }
2352         );
2353     }
2354 
2355     #[test]
class_bracketed()2356     fn class_bracketed() {
2357         assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
2358         assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
2359         assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2360         assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2361         assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2362         assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2363         assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2364         assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2365         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2366         assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2367         #[cfg(feature = "unicode-gencat")]
2368         assert_eq!(
2369             t(r"[\pZ]"),
2370             hir_uclass_query(ClassQuery::Binary("separator"))
2371         );
2372         #[cfg(feature = "unicode-gencat")]
2373         assert_eq!(
2374             t(r"[\p{separator}]"),
2375             hir_uclass_query(ClassQuery::Binary("separator"))
2376         );
2377         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2378         assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2379         #[cfg(feature = "unicode-gencat")]
2380         assert_eq!(
2381             t(r"[^\PZ]"),
2382             hir_uclass_query(ClassQuery::Binary("separator"))
2383         );
2384         #[cfg(feature = "unicode-gencat")]
2385         assert_eq!(
2386             t(r"[^\P{separator}]"),
2387             hir_uclass_query(ClassQuery::Binary("separator"))
2388         );
2389         #[cfg(all(
2390             feature = "unicode-case",
2391             any(feature = "unicode-perl", feature = "unicode-gencat")
2392         ))]
2393         assert_eq!(
2394             t(r"(?i)[^\D]"),
2395             hir_uclass_query(ClassQuery::Binary("digit"))
2396         );
2397         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2398         assert_eq!(
2399             t(r"(?i)[^\P{greek}]"),
2400             hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2401         );
2402 
2403         assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2404         assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2405         assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2406 
2407         #[cfg(feature = "unicode-case")]
2408         assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2409         #[cfg(feature = "unicode-case")]
2410         assert_eq!(
2411             t("(?i)[k]"),
2412             hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2413         );
2414         #[cfg(feature = "unicode-case")]
2415         assert_eq!(
2416             t("(?i)[β]"),
2417             hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2418         );
2419         assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2420 
2421         assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
2422         assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
2423         assert_eq!(
2424             t_bytes("(?-u)[^a]"),
2425             hir_negate(hir_bclass(&[(b'a', b'a')]))
2426         );
2427         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2428         assert_eq!(
2429             t(r"[^\d]"),
2430             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2431         );
2432         #[cfg(feature = "unicode-gencat")]
2433         assert_eq!(
2434             t(r"[^\pZ]"),
2435             hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2436         );
2437         #[cfg(feature = "unicode-gencat")]
2438         assert_eq!(
2439             t(r"[^\p{separator}]"),
2440             hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2441         );
2442         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2443         assert_eq!(
2444             t(r"(?i)[^\p{greek}]"),
2445             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2446                 "greek"
2447             ))))
2448         );
2449         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2450         assert_eq!(
2451             t(r"(?i)[\P{greek}]"),
2452             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2453                 "greek"
2454             ))))
2455         );
2456 
2457         // Test some weird cases.
2458         assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2459 
2460         assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2461         assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2462         assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2463         assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2464         assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2465 
2466         assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2467         assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2468         assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2469         assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2470         assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2471 
2472         assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2473         assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2474         assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2475         assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2476         assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2477 
2478         assert_eq!(
2479             t_err("(?-u)[^a]"),
2480             TestError {
2481                 kind: hir::ErrorKind::InvalidUtf8,
2482                 span: Span::new(
2483                     Position::new(5, 1, 6),
2484                     Position::new(9, 1, 10)
2485                 ),
2486             }
2487         );
2488         #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2489         assert_eq!(
2490             t_err(r"[^\s\S]"),
2491             TestError {
2492                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2493                 span: Span::new(
2494                     Position::new(0, 1, 1),
2495                     Position::new(7, 1, 8)
2496                 ),
2497             }
2498         );
2499         #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2500         assert_eq!(
2501             t_err(r"(?-u)[^\s\S]"),
2502             TestError {
2503                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2504                 span: Span::new(
2505                     Position::new(5, 1, 6),
2506                     Position::new(12, 1, 13)
2507                 ),
2508             }
2509         );
2510     }
2511 
2512     #[test]
class_bracketed_union()2513     fn class_bracketed_union() {
2514         assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2515         #[cfg(feature = "unicode-gencat")]
2516         assert_eq!(
2517             t(r"[a\pZb]"),
2518             hir_union(
2519                 hir_uclass(&[('a', 'b')]),
2520                 hir_uclass_query(ClassQuery::Binary("separator"))
2521             )
2522         );
2523         #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2524         assert_eq!(
2525             t(r"[\pZ\p{Greek}]"),
2526             hir_union(
2527                 hir_uclass_query(ClassQuery::Binary("greek")),
2528                 hir_uclass_query(ClassQuery::Binary("separator"))
2529             )
2530         );
2531         #[cfg(all(
2532             feature = "unicode-age",
2533             feature = "unicode-gencat",
2534             feature = "unicode-script"
2535         ))]
2536         assert_eq!(
2537             t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2538             hir_union(
2539                 hir_uclass_query(ClassQuery::ByValue {
2540                     property_name: "age",
2541                     property_value: "3.0",
2542                 }),
2543                 hir_union(
2544                     hir_uclass_query(ClassQuery::Binary("greek")),
2545                     hir_uclass_query(ClassQuery::Binary("separator"))
2546                 )
2547             )
2548         );
2549         #[cfg(all(
2550             feature = "unicode-age",
2551             feature = "unicode-gencat",
2552             feature = "unicode-script"
2553         ))]
2554         assert_eq!(
2555             t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2556             hir_union(
2557                 hir_uclass_query(ClassQuery::ByValue {
2558                     property_name: "age",
2559                     property_value: "3.0",
2560                 }),
2561                 hir_union(
2562                     hir_uclass_query(ClassQuery::Binary("cyrillic")),
2563                     hir_union(
2564                         hir_uclass_query(ClassQuery::Binary("greek")),
2565                         hir_uclass_query(ClassQuery::Binary("separator"))
2566                     )
2567                 )
2568             )
2569         );
2570 
2571         #[cfg(all(
2572             feature = "unicode-age",
2573             feature = "unicode-case",
2574             feature = "unicode-gencat",
2575             feature = "unicode-script"
2576         ))]
2577         assert_eq!(
2578             t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2579             hir_case_fold(hir_union(
2580                 hir_uclass_query(ClassQuery::ByValue {
2581                     property_name: "age",
2582                     property_value: "3.0",
2583                 }),
2584                 hir_union(
2585                     hir_uclass_query(ClassQuery::Binary("greek")),
2586                     hir_uclass_query(ClassQuery::Binary("separator"))
2587                 )
2588             ))
2589         );
2590         #[cfg(all(
2591             feature = "unicode-age",
2592             feature = "unicode-gencat",
2593             feature = "unicode-script"
2594         ))]
2595         assert_eq!(
2596             t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2597             hir_negate(hir_union(
2598                 hir_uclass_query(ClassQuery::ByValue {
2599                     property_name: "age",
2600                     property_value: "3.0",
2601                 }),
2602                 hir_union(
2603                     hir_uclass_query(ClassQuery::Binary("greek")),
2604                     hir_uclass_query(ClassQuery::Binary("separator"))
2605                 )
2606             ))
2607         );
2608         #[cfg(all(
2609             feature = "unicode-age",
2610             feature = "unicode-case",
2611             feature = "unicode-gencat",
2612             feature = "unicode-script"
2613         ))]
2614         assert_eq!(
2615             t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2616             hir_negate(hir_case_fold(hir_union(
2617                 hir_uclass_query(ClassQuery::ByValue {
2618                     property_name: "age",
2619                     property_value: "3.0",
2620                 }),
2621                 hir_union(
2622                     hir_uclass_query(ClassQuery::Binary("greek")),
2623                     hir_uclass_query(ClassQuery::Binary("separator"))
2624                 )
2625             )))
2626         );
2627     }
2628 
2629     #[test]
class_bracketed_nested()2630     fn class_bracketed_nested() {
2631         assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2632         assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2633         assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2634 
2635         assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2636         assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2637 
2638         #[cfg(feature = "unicode-case")]
2639         assert_eq!(
2640             t(r"(?i)[a[^c]]"),
2641             hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2642         );
2643         #[cfg(feature = "unicode-case")]
2644         assert_eq!(
2645             t(r"(?i)[a-b[^c]]"),
2646             hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2647         );
2648 
2649         #[cfg(feature = "unicode-case")]
2650         assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2651         #[cfg(feature = "unicode-case")]
2652         assert_eq!(
2653             t(r"(?i)[^a-b[^c]]"),
2654             hir_uclass(&[('C', 'C'), ('c', 'c')])
2655         );
2656 
2657         assert_eq!(
2658             t_err(r"[^a-c[^c]]"),
2659             TestError {
2660                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2661                 span: Span::new(
2662                     Position::new(0, 1, 1),
2663                     Position::new(10, 1, 11)
2664                 ),
2665             }
2666         );
2667         #[cfg(feature = "unicode-case")]
2668         assert_eq!(
2669             t_err(r"(?i)[^a-c[^c]]"),
2670             TestError {
2671                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2672                 span: Span::new(
2673                     Position::new(4, 1, 5),
2674                     Position::new(14, 1, 15)
2675                 ),
2676             }
2677         );
2678     }
2679 
2680     #[test]
class_bracketed_intersect()2681     fn class_bracketed_intersect() {
2682         assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2683         assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2684         assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2685         assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2686         assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2687         assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2688         assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2689         assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2690         assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2691 
2692         assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2693         assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2694         assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2695         assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
2696         assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
2697         assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
2698 
2699         #[cfg(feature = "unicode-case")]
2700         assert_eq!(
2701             t("(?i)[abc&&b-c]"),
2702             hir_case_fold(hir_uclass(&[('b', 'c')]))
2703         );
2704         #[cfg(feature = "unicode-case")]
2705         assert_eq!(
2706             t("(?i)[abc&&[b-c]]"),
2707             hir_case_fold(hir_uclass(&[('b', 'c')]))
2708         );
2709         #[cfg(feature = "unicode-case")]
2710         assert_eq!(
2711             t("(?i)[[abc]&&[b-c]]"),
2712             hir_case_fold(hir_uclass(&[('b', 'c')]))
2713         );
2714         #[cfg(feature = "unicode-case")]
2715         assert_eq!(
2716             t("(?i)[a-z&&b-y&&c-x]"),
2717             hir_case_fold(hir_uclass(&[('c', 'x')]))
2718         );
2719         #[cfg(feature = "unicode-case")]
2720         assert_eq!(
2721             t("(?i)[c-da-b&&a-d]"),
2722             hir_case_fold(hir_uclass(&[('a', 'd')]))
2723         );
2724         #[cfg(feature = "unicode-case")]
2725         assert_eq!(
2726             t("(?i)[a-d&&c-da-b]"),
2727             hir_case_fold(hir_uclass(&[('a', 'd')]))
2728         );
2729 
2730         assert_eq!(
2731             t("(?i-u)[abc&&b-c]"),
2732             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2733         );
2734         assert_eq!(
2735             t("(?i-u)[abc&&[b-c]]"),
2736             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2737         );
2738         assert_eq!(
2739             t("(?i-u)[[abc]&&[b-c]]"),
2740             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2741         );
2742         assert_eq!(
2743             t("(?i-u)[a-z&&b-y&&c-x]"),
2744             hir_case_fold(hir_bclass(&[(b'c', b'x')]))
2745         );
2746         assert_eq!(
2747             t("(?i-u)[c-da-b&&a-d]"),
2748             hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2749         );
2750         assert_eq!(
2751             t("(?i-u)[a-d&&c-da-b]"),
2752             hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2753         );
2754 
2755         // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2756         // `^` is also allowed to be unescaped after `&&`.
2757         assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
2758         // `]` needs to be escaped after `&&` since it's not at start of class.
2759         assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
2760         assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
2761         assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
2762         assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
2763         // Test precedence.
2764         assert_eq!(
2765             t(r"[a-w&&[^c-g]z]"),
2766             hir_uclass(&[('a', 'b'), ('h', 'w')])
2767         );
2768     }
2769 
2770     #[test]
class_bracketed_intersect_negate()2771     fn class_bracketed_intersect_negate() {
2772         #[cfg(feature = "unicode-perl")]
2773         assert_eq!(
2774             t(r"[^\w&&\d]"),
2775             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2776         );
2777         assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2778         #[cfg(feature = "unicode-perl")]
2779         assert_eq!(
2780             t(r"[^[\w&&\d]]"),
2781             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2782         );
2783         #[cfg(feature = "unicode-perl")]
2784         assert_eq!(
2785             t(r"[^[^\w&&\d]]"),
2786             hir_uclass_query(ClassQuery::Binary("digit"))
2787         );
2788         #[cfg(feature = "unicode-perl")]
2789         assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2790 
2791         #[cfg(feature = "unicode-perl")]
2792         assert_eq!(
2793             t_bytes(r"(?-u)[^\w&&\d]"),
2794             hir_negate(hir_bclass_from_char(ascii_class(
2795                 &ast::ClassAsciiKind::Digit
2796             )))
2797         );
2798         assert_eq!(
2799             t_bytes(r"(?-u)[^[a-z&&a-c]]"),
2800             hir_negate(hir_bclass(&[(b'a', b'c')]))
2801         );
2802         assert_eq!(
2803             t_bytes(r"(?-u)[^[\w&&\d]]"),
2804             hir_negate(hir_bclass_from_char(ascii_class(
2805                 &ast::ClassAsciiKind::Digit
2806             )))
2807         );
2808         assert_eq!(
2809             t_bytes(r"(?-u)[^[^\w&&\d]]"),
2810             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2811         );
2812         assert_eq!(
2813             t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
2814             hir_negate(hir_bclass_from_char(ascii_class(
2815                 &ast::ClassAsciiKind::Word
2816             )))
2817         );
2818     }
2819 
2820     #[test]
class_bracketed_difference()2821     fn class_bracketed_difference() {
2822         #[cfg(feature = "unicode-gencat")]
2823         assert_eq!(
2824             t(r"[\pL--[:ascii:]]"),
2825             hir_difference(
2826                 hir_uclass_query(ClassQuery::Binary("letter")),
2827                 hir_uclass(&[('\0', '\x7F')])
2828             )
2829         );
2830 
2831         assert_eq!(
2832             t(r"(?-u)[[:alpha:]--[:lower:]]"),
2833             hir_bclass(&[(b'A', b'Z')])
2834         );
2835     }
2836 
2837     #[test]
class_bracketed_symmetric_difference()2838     fn class_bracketed_symmetric_difference() {
2839         #[cfg(feature = "unicode-script")]
2840         assert_eq!(
2841             t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
2842             hir_uclass(&[
2843                 ('\u{0342}', '\u{0342}'),
2844                 ('\u{0345}', '\u{0345}'),
2845                 ('\u{1DC0}', '\u{1DC1}'),
2846             ])
2847         );
2848         assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
2849 
2850         assert_eq!(
2851             t(r"(?-u)[a-g~~c-j]"),
2852             hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
2853         );
2854     }
2855 
2856     #[test]
ignore_whitespace()2857     fn ignore_whitespace() {
2858         assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
2859         assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
2860         assert_eq!(
2861             t(r"(?x)\x # comment
2862 { # comment
2863     53 # comment
2864 } #comment"),
2865             hir_lit("S")
2866         );
2867 
2868         assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
2869         assert_eq!(
2870             t(r"(?x)\x # comment
2871         53 # comment"),
2872             hir_lit("S")
2873         );
2874         assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
2875 
2876         #[cfg(feature = "unicode-gencat")]
2877         assert_eq!(
2878             t(r"(?x)\p # comment
2879 { # comment
2880     Separator # comment
2881 } # comment"),
2882             hir_uclass_query(ClassQuery::Binary("separator"))
2883         );
2884 
2885         assert_eq!(
2886             t(r"(?x)a # comment
2887 { # comment
2888     5 # comment
2889     , # comment
2890     10 # comment
2891 } # comment"),
2892             hir_range(
2893                 true,
2894                 hir::RepetitionRange::Bounded(5, 10),
2895                 hir_lit("a")
2896             )
2897         );
2898 
2899         assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
2900     }
2901 
2902     #[test]
analysis_is_always_utf8()2903     fn analysis_is_always_utf8() {
2904         // Positive examples.
2905         assert!(t_bytes(r"a").is_always_utf8());
2906         assert!(t_bytes(r"ab").is_always_utf8());
2907         assert!(t_bytes(r"(?-u)a").is_always_utf8());
2908         assert!(t_bytes(r"(?-u)ab").is_always_utf8());
2909         assert!(t_bytes(r"\xFF").is_always_utf8());
2910         assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
2911         assert!(t_bytes(r"[^a]").is_always_utf8());
2912         assert!(t_bytes(r"[^a][^a]").is_always_utf8());
2913         assert!(t_bytes(r"\b").is_always_utf8());
2914         assert!(t_bytes(r"\B").is_always_utf8());
2915         assert!(t_bytes(r"(?-u)\b").is_always_utf8());
2916 
2917         // Negative examples.
2918         assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
2919         assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
2920         assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
2921         assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
2922         assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
2923     }
2924 
2925     #[test]
analysis_is_all_assertions()2926     fn analysis_is_all_assertions() {
2927         // Positive examples.
2928         assert!(t(r"\b").is_all_assertions());
2929         assert!(t(r"\B").is_all_assertions());
2930         assert!(t(r"^").is_all_assertions());
2931         assert!(t(r"$").is_all_assertions());
2932         assert!(t(r"\A").is_all_assertions());
2933         assert!(t(r"\z").is_all_assertions());
2934         assert!(t(r"$^\z\A\b\B").is_all_assertions());
2935         assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
2936         assert!(t(r"^$|$^").is_all_assertions());
2937         assert!(t(r"((\b)+())*^").is_all_assertions());
2938 
2939         // Negative examples.
2940         assert!(!t(r"^a").is_all_assertions());
2941     }
2942 
2943     #[test]
analysis_is_anchored()2944     fn analysis_is_anchored() {
2945         // Positive examples.
2946         assert!(t(r"^").is_anchored_start());
2947         assert!(t(r"$").is_anchored_end());
2948         assert!(t(r"^").is_line_anchored_start());
2949         assert!(t(r"$").is_line_anchored_end());
2950 
2951         assert!(t(r"^^").is_anchored_start());
2952         assert!(t(r"$$").is_anchored_end());
2953         assert!(t(r"^^").is_line_anchored_start());
2954         assert!(t(r"$$").is_line_anchored_end());
2955 
2956         assert!(t(r"^$").is_anchored_start());
2957         assert!(t(r"^$").is_anchored_end());
2958         assert!(t(r"^$").is_line_anchored_start());
2959         assert!(t(r"^$").is_line_anchored_end());
2960 
2961         assert!(t(r"^foo").is_anchored_start());
2962         assert!(t(r"foo$").is_anchored_end());
2963         assert!(t(r"^foo").is_line_anchored_start());
2964         assert!(t(r"foo$").is_line_anchored_end());
2965 
2966         assert!(t(r"^foo|^bar").is_anchored_start());
2967         assert!(t(r"foo$|bar$").is_anchored_end());
2968         assert!(t(r"^foo|^bar").is_line_anchored_start());
2969         assert!(t(r"foo$|bar$").is_line_anchored_end());
2970 
2971         assert!(t(r"^(foo|bar)").is_anchored_start());
2972         assert!(t(r"(foo|bar)$").is_anchored_end());
2973         assert!(t(r"^(foo|bar)").is_line_anchored_start());
2974         assert!(t(r"(foo|bar)$").is_line_anchored_end());
2975 
2976         assert!(t(r"^+").is_anchored_start());
2977         assert!(t(r"$+").is_anchored_end());
2978         assert!(t(r"^+").is_line_anchored_start());
2979         assert!(t(r"$+").is_line_anchored_end());
2980         assert!(t(r"^++").is_anchored_start());
2981         assert!(t(r"$++").is_anchored_end());
2982         assert!(t(r"^++").is_line_anchored_start());
2983         assert!(t(r"$++").is_line_anchored_end());
2984         assert!(t(r"(^)+").is_anchored_start());
2985         assert!(t(r"($)+").is_anchored_end());
2986         assert!(t(r"(^)+").is_line_anchored_start());
2987         assert!(t(r"($)+").is_line_anchored_end());
2988 
2989         assert!(t(r"$^").is_anchored_start());
2990         assert!(t(r"$^").is_anchored_start());
2991         assert!(t(r"$^").is_line_anchored_end());
2992         assert!(t(r"$^").is_line_anchored_end());
2993         assert!(t(r"$^|^$").is_anchored_start());
2994         assert!(t(r"$^|^$").is_anchored_end());
2995         assert!(t(r"$^|^$").is_line_anchored_start());
2996         assert!(t(r"$^|^$").is_line_anchored_end());
2997 
2998         assert!(t(r"\b^").is_anchored_start());
2999         assert!(t(r"$\b").is_anchored_end());
3000         assert!(t(r"\b^").is_line_anchored_start());
3001         assert!(t(r"$\b").is_line_anchored_end());
3002         assert!(t(r"^(?m:^)").is_anchored_start());
3003         assert!(t(r"(?m:$)$").is_anchored_end());
3004         assert!(t(r"^(?m:^)").is_line_anchored_start());
3005         assert!(t(r"(?m:$)$").is_line_anchored_end());
3006         assert!(t(r"(?m:^)^").is_anchored_start());
3007         assert!(t(r"$(?m:$)").is_anchored_end());
3008         assert!(t(r"(?m:^)^").is_line_anchored_start());
3009         assert!(t(r"$(?m:$)").is_line_anchored_end());
3010 
3011         // Negative examples.
3012         assert!(!t(r"(?m)^").is_anchored_start());
3013         assert!(!t(r"(?m)$").is_anchored_end());
3014         assert!(!t(r"(?m:^$)|$^").is_anchored_start());
3015         assert!(!t(r"(?m:^$)|$^").is_anchored_end());
3016         assert!(!t(r"$^|(?m:^$)").is_anchored_start());
3017         assert!(!t(r"$^|(?m:^$)").is_anchored_end());
3018 
3019         assert!(!t(r"a^").is_anchored_start());
3020         assert!(!t(r"$a").is_anchored_start());
3021         assert!(!t(r"a^").is_line_anchored_start());
3022         assert!(!t(r"$a").is_line_anchored_start());
3023 
3024         assert!(!t(r"a^").is_anchored_end());
3025         assert!(!t(r"$a").is_anchored_end());
3026         assert!(!t(r"a^").is_line_anchored_end());
3027         assert!(!t(r"$a").is_line_anchored_end());
3028 
3029         assert!(!t(r"^foo|bar").is_anchored_start());
3030         assert!(!t(r"foo|bar$").is_anchored_end());
3031         assert!(!t(r"^foo|bar").is_line_anchored_start());
3032         assert!(!t(r"foo|bar$").is_line_anchored_end());
3033 
3034         assert!(!t(r"^*").is_anchored_start());
3035         assert!(!t(r"$*").is_anchored_end());
3036         assert!(!t(r"^*").is_line_anchored_start());
3037         assert!(!t(r"$*").is_line_anchored_end());
3038         assert!(!t(r"^*+").is_anchored_start());
3039         assert!(!t(r"$*+").is_anchored_end());
3040         assert!(!t(r"^*+").is_line_anchored_start());
3041         assert!(!t(r"$*+").is_line_anchored_end());
3042         assert!(!t(r"^+*").is_anchored_start());
3043         assert!(!t(r"$+*").is_anchored_end());
3044         assert!(!t(r"^+*").is_line_anchored_start());
3045         assert!(!t(r"$+*").is_line_anchored_end());
3046         assert!(!t(r"(^)*").is_anchored_start());
3047         assert!(!t(r"($)*").is_anchored_end());
3048         assert!(!t(r"(^)*").is_line_anchored_start());
3049         assert!(!t(r"($)*").is_line_anchored_end());
3050     }
3051 
3052     #[test]
analysis_is_line_anchored()3053     fn analysis_is_line_anchored() {
3054         assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
3055         assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
3056 
3057         assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
3058         assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
3059 
3060         assert!(t(r"(?m)^").is_line_anchored_start());
3061         assert!(t(r"(?m)$").is_line_anchored_end());
3062 
3063         assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
3064         assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
3065 
3066         assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
3067         assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
3068     }
3069 
3070     #[test]
analysis_is_any_anchored()3071     fn analysis_is_any_anchored() {
3072         // Positive examples.
3073         assert!(t(r"^").is_any_anchored_start());
3074         assert!(t(r"$").is_any_anchored_end());
3075         assert!(t(r"\A").is_any_anchored_start());
3076         assert!(t(r"\z").is_any_anchored_end());
3077 
3078         // Negative examples.
3079         assert!(!t(r"(?m)^").is_any_anchored_start());
3080         assert!(!t(r"(?m)$").is_any_anchored_end());
3081         assert!(!t(r"$").is_any_anchored_start());
3082         assert!(!t(r"^").is_any_anchored_end());
3083     }
3084 
3085     #[test]
analysis_is_match_empty()3086     fn analysis_is_match_empty() {
3087         // Positive examples.
3088         assert!(t(r"").is_match_empty());
3089         assert!(t(r"()").is_match_empty());
3090         assert!(t(r"()*").is_match_empty());
3091         assert!(t(r"()+").is_match_empty());
3092         assert!(t(r"()?").is_match_empty());
3093         assert!(t(r"a*").is_match_empty());
3094         assert!(t(r"a?").is_match_empty());
3095         assert!(t(r"a{0}").is_match_empty());
3096         assert!(t(r"a{0,}").is_match_empty());
3097         assert!(t(r"a{0,1}").is_match_empty());
3098         assert!(t(r"a{0,10}").is_match_empty());
3099         #[cfg(feature = "unicode-gencat")]
3100         assert!(t(r"\pL*").is_match_empty());
3101         assert!(t(r"a*|b").is_match_empty());
3102         assert!(t(r"b|a*").is_match_empty());
3103         assert!(t(r"a*a?(abcd)*").is_match_empty());
3104         assert!(t(r"^").is_match_empty());
3105         assert!(t(r"$").is_match_empty());
3106         assert!(t(r"(?m)^").is_match_empty());
3107         assert!(t(r"(?m)$").is_match_empty());
3108         assert!(t(r"\A").is_match_empty());
3109         assert!(t(r"\z").is_match_empty());
3110         assert!(t(r"\B").is_match_empty());
3111         assert!(t_bytes(r"(?-u)\B").is_match_empty());
3112 
3113         // Negative examples.
3114         assert!(!t(r"a+").is_match_empty());
3115         assert!(!t(r"a{1}").is_match_empty());
3116         assert!(!t(r"a{1,}").is_match_empty());
3117         assert!(!t(r"a{1,2}").is_match_empty());
3118         assert!(!t(r"a{1,10}").is_match_empty());
3119         assert!(!t(r"b|a").is_match_empty());
3120         assert!(!t(r"a*a+(abcd)*").is_match_empty());
3121         assert!(!t(r"\b").is_match_empty());
3122         assert!(!t(r"(?-u)\b").is_match_empty());
3123     }
3124 
3125     #[test]
analysis_is_literal()3126     fn analysis_is_literal() {
3127         // Positive examples.
3128         assert!(t(r"a").is_literal());
3129         assert!(t(r"ab").is_literal());
3130         assert!(t(r"abc").is_literal());
3131         assert!(t(r"(?m)abc").is_literal());
3132 
3133         // Negative examples.
3134         assert!(!t(r"").is_literal());
3135         assert!(!t(r"^").is_literal());
3136         assert!(!t(r"a|b").is_literal());
3137         assert!(!t(r"(a)").is_literal());
3138         assert!(!t(r"a+").is_literal());
3139         assert!(!t(r"foo(a)").is_literal());
3140         assert!(!t(r"(a)foo").is_literal());
3141         assert!(!t(r"[a]").is_literal());
3142     }
3143 
3144     #[test]
analysis_is_alternation_literal()3145     fn analysis_is_alternation_literal() {
3146         // Positive examples.
3147         assert!(t(r"a").is_alternation_literal());
3148         assert!(t(r"ab").is_alternation_literal());
3149         assert!(t(r"abc").is_alternation_literal());
3150         assert!(t(r"(?m)abc").is_alternation_literal());
3151         assert!(t(r"a|b").is_alternation_literal());
3152         assert!(t(r"a|b|c").is_alternation_literal());
3153         assert!(t(r"foo|bar").is_alternation_literal());
3154         assert!(t(r"foo|bar|baz").is_alternation_literal());
3155 
3156         // Negative examples.
3157         assert!(!t(r"").is_alternation_literal());
3158         assert!(!t(r"^").is_alternation_literal());
3159         assert!(!t(r"(a)").is_alternation_literal());
3160         assert!(!t(r"a+").is_alternation_literal());
3161         assert!(!t(r"foo(a)").is_alternation_literal());
3162         assert!(!t(r"(a)foo").is_alternation_literal());
3163         assert!(!t(r"[a]").is_alternation_literal());
3164         assert!(!t(r"[a]|b").is_alternation_literal());
3165         assert!(!t(r"a|[b]").is_alternation_literal());
3166         assert!(!t(r"(a)|b").is_alternation_literal());
3167         assert!(!t(r"a|(b)").is_alternation_literal());
3168     }
3169 }
3170