• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2 Defines a translator that converts an `Ast` to an `Hir`.
3 */
4 
5 use std::cell::{Cell, RefCell};
6 use std::result;
7 
8 use crate::ast::{self, Ast, Span, Visitor};
9 use crate::hir::{self, Error, ErrorKind, Hir};
10 use crate::unicode::{self, ClassQuery};
11 
12 type Result<T> = result::Result<T, Error>;
13 
14 /// A builder for constructing an AST->HIR translator.
15 #[derive(Clone, Debug)]
16 pub struct TranslatorBuilder {
17     allow_invalid_utf8: bool,
18     flags: Flags,
19 }
20 
21 impl Default for TranslatorBuilder {
default() -> TranslatorBuilder22     fn default() -> TranslatorBuilder {
23         TranslatorBuilder::new()
24     }
25 }
26 
27 impl TranslatorBuilder {
28     /// Create a new translator builder with a default c onfiguration.
new() -> TranslatorBuilder29     pub fn new() -> TranslatorBuilder {
30         TranslatorBuilder {
31             allow_invalid_utf8: false,
32             flags: Flags::default(),
33         }
34     }
35 
36     /// Build a translator using the current configuration.
build(&self) -> Translator37     pub fn build(&self) -> Translator {
38         Translator {
39             stack: RefCell::new(vec![]),
40             flags: Cell::new(self.flags),
41             allow_invalid_utf8: self.allow_invalid_utf8,
42         }
43     }
44 
45     /// When enabled, translation will permit the construction of a regular
46     /// expression that may match invalid UTF-8.
47     ///
48     /// When disabled (the default), the translator is guaranteed to produce
49     /// an expression that will only ever match valid UTF-8 (otherwise, the
50     /// translator will return an error).
51     ///
52     /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53     /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54     /// the parser to return an error. Namely, a negated ASCII word boundary
55     /// can result in matching positions that aren't valid UTF-8 boundaries.
allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder56     pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
57         self.allow_invalid_utf8 = yes;
58         self
59     }
60 
61     /// Enable or disable the case insensitive flag (`i`) by default.
case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder62     pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
63         self.flags.case_insensitive = if yes { Some(true) } else { None };
64         self
65     }
66 
67     /// Enable or disable the multi-line matching flag (`m`) by default.
multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder68     pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
69         self.flags.multi_line = if yes { Some(true) } else { None };
70         self
71     }
72 
73     /// Enable or disable the "dot matches any character" flag (`s`) by
74     /// default.
dot_matches_new_line( &mut self, yes: bool, ) -> &mut TranslatorBuilder75     pub fn dot_matches_new_line(
76         &mut self,
77         yes: bool,
78     ) -> &mut TranslatorBuilder {
79         self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
80         self
81     }
82 
83     /// Enable or disable the "swap greed" flag (`U`) by default.
swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder84     pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
85         self.flags.swap_greed = if yes { Some(true) } else { None };
86         self
87     }
88 
89     /// Enable or disable the Unicode flag (`u`) by default.
unicode(&mut self, yes: bool) -> &mut TranslatorBuilder90     pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
91         self.flags.unicode = if yes { None } else { Some(false) };
92         self
93     }
94 }
95 
96 /// A translator maps abstract syntax to a high level intermediate
97 /// representation.
98 ///
99 /// A translator may be benefit from reuse. That is, a translator can translate
100 /// many abstract syntax trees.
101 ///
102 /// A `Translator` can be configured in more detail via a
103 /// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104 #[derive(Clone, Debug)]
105 pub struct Translator {
106     /// Our call stack, but on the heap.
107     stack: RefCell<Vec<HirFrame>>,
108     /// The current flag settings.
109     flags: Cell<Flags>,
110     /// Whether we're allowed to produce HIR that can match arbitrary bytes.
111     allow_invalid_utf8: bool,
112 }
113 
114 impl Translator {
115     /// Create a new translator using the default configuration.
new() -> Translator116     pub fn new() -> Translator {
117         TranslatorBuilder::new().build()
118     }
119 
120     /// Translate the given abstract syntax tree (AST) into a high level
121     /// intermediate representation (HIR).
122     ///
123     /// If there was a problem doing the translation, then an HIR-specific
124     /// error is returned.
125     ///
126     /// The original pattern string used to produce the `Ast` *must* also be
127     /// provided. The translator does not use the pattern string during any
128     /// correct translation, but is used for error reporting.
translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir>129     pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
130         ast::visit(ast, TranslatorI::new(self, pattern))
131     }
132 }
133 
134 /// An HirFrame is a single stack frame, represented explicitly, which is
135 /// created for each item in the Ast that we traverse.
136 ///
137 /// Note that technically, this type doesn't represent our entire stack
138 /// frame. In particular, the Ast visitor represents any state associated with
139 /// traversing the Ast itself.
140 #[derive(Clone, Debug)]
141 enum HirFrame {
142     /// An arbitrary HIR expression. These get pushed whenever we hit a base
143     /// case in the Ast. They get popped after an inductive (i.e., recursive)
144     /// step is complete.
145     Expr(Hir),
146     /// A Unicode character class. This frame is mutated as we descend into
147     /// the Ast of a character class (which is itself its own mini recursive
148     /// structure).
149     ClassUnicode(hir::ClassUnicode),
150     /// A byte-oriented character class. This frame is mutated as we descend
151     /// into the Ast of a character class (which is itself its own mini
152     /// recursive structure).
153     ///
154     /// Byte character classes are created when Unicode mode (`u`) is disabled.
155     /// If `allow_invalid_utf8` is disabled (the default), then a byte
156     /// character is only permitted to match ASCII text.
157     ClassBytes(hir::ClassBytes),
158     /// This is pushed on to the stack upon first seeing any kind of group,
159     /// indicated by parentheses (including non-capturing groups). It is popped
160     /// upon leaving a group.
161     Group {
162         /// The old active flags when this group was opened.
163         ///
164         /// If this group sets flags, then the new active flags are set to the
165         /// result of merging the old flags with the flags introduced by this
166         /// group. If the group doesn't set any flags, then this is simply
167         /// equivalent to whatever flags were set when the group was opened.
168         ///
169         /// When this group is popped, the active flags should be restored to
170         /// the flags set here.
171         ///
172         /// The "active" flags correspond to whatever flags are set in the
173         /// Translator.
174         old_flags: Flags,
175     },
176     /// This is pushed whenever a concatenation is observed. After visiting
177     /// every sub-expression in the concatenation, the translator's stack is
178     /// popped until it sees a Concat frame.
179     Concat,
180     /// This is pushed whenever an alternation is observed. After visiting
181     /// every sub-expression in the alternation, the translator's stack is
182     /// popped until it sees an Alternation frame.
183     Alternation,
184 }
185 
186 impl HirFrame {
187     /// Assert that the current stack frame is an Hir expression and return it.
unwrap_expr(self) -> Hir188     fn unwrap_expr(self) -> Hir {
189         match self {
190             HirFrame::Expr(expr) => expr,
191             _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
192         }
193     }
194 
195     /// Assert that the current stack frame is a Unicode class expression and
196     /// return it.
unwrap_class_unicode(self) -> hir::ClassUnicode197     fn unwrap_class_unicode(self) -> hir::ClassUnicode {
198         match self {
199             HirFrame::ClassUnicode(cls) => cls,
200             _ => panic!(
201                 "tried to unwrap Unicode class \
202                  from HirFrame, got: {:?}",
203                 self
204             ),
205         }
206     }
207 
208     /// Assert that the current stack frame is a byte class expression and
209     /// return it.
unwrap_class_bytes(self) -> hir::ClassBytes210     fn unwrap_class_bytes(self) -> hir::ClassBytes {
211         match self {
212             HirFrame::ClassBytes(cls) => cls,
213             _ => panic!(
214                 "tried to unwrap byte class \
215                  from HirFrame, got: {:?}",
216                 self
217             ),
218         }
219     }
220 
221     /// Assert that the current stack frame is a group indicator and return
222     /// its corresponding flags (the flags that were active at the time the
223     /// group was entered).
unwrap_group(self) -> Flags224     fn unwrap_group(self) -> Flags {
225         match self {
226             HirFrame::Group { old_flags } => old_flags,
227             _ => {
228                 panic!("tried to unwrap group from HirFrame, got: {:?}", self)
229             }
230         }
231     }
232 }
233 
234 impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
235     type Output = Hir;
236     type Err = Error;
237 
finish(self) -> Result<Hir>238     fn finish(self) -> Result<Hir> {
239         // ... otherwise, we should have exactly one HIR on the stack.
240         assert_eq!(self.trans().stack.borrow().len(), 1);
241         Ok(self.pop().unwrap().unwrap_expr())
242     }
243 
visit_pre(&mut self, ast: &Ast) -> Result<()>244     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
245         match *ast {
246             Ast::Class(ast::Class::Bracketed(_)) => {
247                 if self.flags().unicode() {
248                     let cls = hir::ClassUnicode::empty();
249                     self.push(HirFrame::ClassUnicode(cls));
250                 } else {
251                     let cls = hir::ClassBytes::empty();
252                     self.push(HirFrame::ClassBytes(cls));
253                 }
254             }
255             Ast::Group(ref x) => {
256                 let old_flags = x
257                     .flags()
258                     .map(|ast| self.set_flags(ast))
259                     .unwrap_or_else(|| self.flags());
260                 self.push(HirFrame::Group { old_flags });
261             }
262             Ast::Concat(ref x) if x.asts.is_empty() => {}
263             Ast::Concat(_) => {
264                 self.push(HirFrame::Concat);
265             }
266             Ast::Alternation(ref x) if x.asts.is_empty() => {}
267             Ast::Alternation(_) => {
268                 self.push(HirFrame::Alternation);
269             }
270             _ => {}
271         }
272         Ok(())
273     }
274 
visit_post(&mut self, ast: &Ast) -> Result<()>275     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
276         match *ast {
277             Ast::Empty(_) => {
278                 self.push(HirFrame::Expr(Hir::empty()));
279             }
280             Ast::Flags(ref x) => {
281                 self.set_flags(&x.flags);
282                 // Flags in the AST are generally considered directives and
283                 // not actual sub-expressions. However, they can be used in
284                 // the concrete syntax like `((?i))`, and we need some kind of
285                 // indication of an expression there, and Empty is the correct
286                 // choice.
287                 //
288                 // There can also be things like `(?i)+`, but we rule those out
289                 // in the parser. In the future, we might allow them for
290                 // consistency sake.
291                 self.push(HirFrame::Expr(Hir::empty()));
292             }
293             Ast::Literal(ref x) => {
294                 self.push(HirFrame::Expr(self.hir_literal(x)?));
295             }
296             Ast::Dot(span) => {
297                 self.push(HirFrame::Expr(self.hir_dot(span)?));
298             }
299             Ast::Assertion(ref x) => {
300                 self.push(HirFrame::Expr(self.hir_assertion(x)?));
301             }
302             Ast::Class(ast::Class::Perl(ref x)) => {
303                 if self.flags().unicode() {
304                     let cls = self.hir_perl_unicode_class(x)?;
305                     let hcls = hir::Class::Unicode(cls);
306                     self.push(HirFrame::Expr(Hir::class(hcls)));
307                 } else {
308                     let cls = self.hir_perl_byte_class(x);
309                     let hcls = hir::Class::Bytes(cls);
310                     self.push(HirFrame::Expr(Hir::class(hcls)));
311                 }
312             }
313             Ast::Class(ast::Class::Unicode(ref x)) => {
314                 let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
315                 self.push(HirFrame::Expr(Hir::class(cls)));
316             }
317             Ast::Class(ast::Class::Bracketed(ref ast)) => {
318                 if self.flags().unicode() {
319                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
320                     self.unicode_fold_and_negate(
321                         &ast.span,
322                         ast.negated,
323                         &mut cls,
324                     )?;
325                     if cls.ranges().is_empty() {
326                         return Err(self.error(
327                             ast.span,
328                             ErrorKind::EmptyClassNotAllowed,
329                         ));
330                     }
331                     let expr = Hir::class(hir::Class::Unicode(cls));
332                     self.push(HirFrame::Expr(expr));
333                 } else {
334                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
335                     self.bytes_fold_and_negate(
336                         &ast.span,
337                         ast.negated,
338                         &mut cls,
339                     )?;
340                     if cls.ranges().is_empty() {
341                         return Err(self.error(
342                             ast.span,
343                             ErrorKind::EmptyClassNotAllowed,
344                         ));
345                     }
346 
347                     let expr = Hir::class(hir::Class::Bytes(cls));
348                     self.push(HirFrame::Expr(expr));
349                 }
350             }
351             Ast::Repetition(ref x) => {
352                 let expr = self.pop().unwrap().unwrap_expr();
353                 self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
354             }
355             Ast::Group(ref x) => {
356                 let expr = self.pop().unwrap().unwrap_expr();
357                 let old_flags = self.pop().unwrap().unwrap_group();
358                 self.trans().flags.set(old_flags);
359                 self.push(HirFrame::Expr(self.hir_group(x, expr)));
360             }
361             Ast::Concat(_) => {
362                 let mut exprs = vec![];
363                 while let Some(HirFrame::Expr(expr)) = self.pop() {
364                     if !expr.kind().is_empty() {
365                         exprs.push(expr);
366                     }
367                 }
368                 exprs.reverse();
369                 self.push(HirFrame::Expr(Hir::concat(exprs)));
370             }
371             Ast::Alternation(_) => {
372                 let mut exprs = vec![];
373                 while let Some(HirFrame::Expr(expr)) = self.pop() {
374                     exprs.push(expr);
375                 }
376                 exprs.reverse();
377                 self.push(HirFrame::Expr(Hir::alternation(exprs)));
378             }
379         }
380         Ok(())
381     }
382 
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>383     fn visit_class_set_item_pre(
384         &mut self,
385         ast: &ast::ClassSetItem,
386     ) -> Result<()> {
387         match *ast {
388             ast::ClassSetItem::Bracketed(_) => {
389                 if self.flags().unicode() {
390                     let cls = hir::ClassUnicode::empty();
391                     self.push(HirFrame::ClassUnicode(cls));
392                 } else {
393                     let cls = hir::ClassBytes::empty();
394                     self.push(HirFrame::ClassBytes(cls));
395                 }
396             }
397             // We needn't handle the Union case here since the visitor will
398             // do it for us.
399             _ => {}
400         }
401         Ok(())
402     }
403 
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>404     fn visit_class_set_item_post(
405         &mut self,
406         ast: &ast::ClassSetItem,
407     ) -> Result<()> {
408         match *ast {
409             ast::ClassSetItem::Empty(_) => {}
410             ast::ClassSetItem::Literal(ref x) => {
411                 if self.flags().unicode() {
412                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
413                     cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
414                     self.push(HirFrame::ClassUnicode(cls));
415                 } else {
416                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
417                     let byte = self.class_literal_byte(x)?;
418                     cls.push(hir::ClassBytesRange::new(byte, byte));
419                     self.push(HirFrame::ClassBytes(cls));
420                 }
421             }
422             ast::ClassSetItem::Range(ref x) => {
423                 if self.flags().unicode() {
424                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
425                     cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
426                     self.push(HirFrame::ClassUnicode(cls));
427                 } else {
428                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
429                     let start = self.class_literal_byte(&x.start)?;
430                     let end = self.class_literal_byte(&x.end)?;
431                     cls.push(hir::ClassBytesRange::new(start, end));
432                     self.push(HirFrame::ClassBytes(cls));
433                 }
434             }
435             ast::ClassSetItem::Ascii(ref x) => {
436                 if self.flags().unicode() {
437                     let xcls = self.hir_ascii_unicode_class(x)?;
438                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
439                     cls.union(&xcls);
440                     self.push(HirFrame::ClassUnicode(cls));
441                 } else {
442                     let xcls = self.hir_ascii_byte_class(x)?;
443                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
444                     cls.union(&xcls);
445                     self.push(HirFrame::ClassBytes(cls));
446                 }
447             }
448             ast::ClassSetItem::Unicode(ref x) => {
449                 let xcls = self.hir_unicode_class(x)?;
450                 let mut cls = self.pop().unwrap().unwrap_class_unicode();
451                 cls.union(&xcls);
452                 self.push(HirFrame::ClassUnicode(cls));
453             }
454             ast::ClassSetItem::Perl(ref x) => {
455                 if self.flags().unicode() {
456                     let xcls = self.hir_perl_unicode_class(x)?;
457                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
458                     cls.union(&xcls);
459                     self.push(HirFrame::ClassUnicode(cls));
460                 } else {
461                     let xcls = self.hir_perl_byte_class(x);
462                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
463                     cls.union(&xcls);
464                     self.push(HirFrame::ClassBytes(cls));
465                 }
466             }
467             ast::ClassSetItem::Bracketed(ref ast) => {
468                 if self.flags().unicode() {
469                     let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
470                     self.unicode_fold_and_negate(
471                         &ast.span,
472                         ast.negated,
473                         &mut cls1,
474                     )?;
475 
476                     let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
477                     cls2.union(&cls1);
478                     self.push(HirFrame::ClassUnicode(cls2));
479                 } else {
480                     let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
481                     self.bytes_fold_and_negate(
482                         &ast.span,
483                         ast.negated,
484                         &mut cls1,
485                     )?;
486 
487                     let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
488                     cls2.union(&cls1);
489                     self.push(HirFrame::ClassBytes(cls2));
490                 }
491             }
492             // This is handled automatically by the visitor.
493             ast::ClassSetItem::Union(_) => {}
494         }
495         Ok(())
496     }
497 
visit_class_set_binary_op_pre( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>498     fn visit_class_set_binary_op_pre(
499         &mut self,
500         _op: &ast::ClassSetBinaryOp,
501     ) -> Result<()> {
502         if self.flags().unicode() {
503             let cls = hir::ClassUnicode::empty();
504             self.push(HirFrame::ClassUnicode(cls));
505         } else {
506             let cls = hir::ClassBytes::empty();
507             self.push(HirFrame::ClassBytes(cls));
508         }
509         Ok(())
510     }
511 
visit_class_set_binary_op_in( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>512     fn visit_class_set_binary_op_in(
513         &mut self,
514         _op: &ast::ClassSetBinaryOp,
515     ) -> Result<()> {
516         if self.flags().unicode() {
517             let cls = hir::ClassUnicode::empty();
518             self.push(HirFrame::ClassUnicode(cls));
519         } else {
520             let cls = hir::ClassBytes::empty();
521             self.push(HirFrame::ClassBytes(cls));
522         }
523         Ok(())
524     }
525 
visit_class_set_binary_op_post( &mut self, op: &ast::ClassSetBinaryOp, ) -> Result<()>526     fn visit_class_set_binary_op_post(
527         &mut self,
528         op: &ast::ClassSetBinaryOp,
529     ) -> Result<()> {
530         use crate::ast::ClassSetBinaryOpKind::*;
531 
532         if self.flags().unicode() {
533             let mut rhs = self.pop().unwrap().unwrap_class_unicode();
534             let mut lhs = self.pop().unwrap().unwrap_class_unicode();
535             let mut cls = self.pop().unwrap().unwrap_class_unicode();
536             if self.flags().case_insensitive() {
537                 rhs.try_case_fold_simple().map_err(|_| {
538                     self.error(
539                         op.rhs.span().clone(),
540                         ErrorKind::UnicodeCaseUnavailable,
541                     )
542                 })?;
543                 lhs.try_case_fold_simple().map_err(|_| {
544                     self.error(
545                         op.lhs.span().clone(),
546                         ErrorKind::UnicodeCaseUnavailable,
547                     )
548                 })?;
549             }
550             match op.kind {
551                 Intersection => lhs.intersect(&rhs),
552                 Difference => lhs.difference(&rhs),
553                 SymmetricDifference => lhs.symmetric_difference(&rhs),
554             }
555             cls.union(&lhs);
556             self.push(HirFrame::ClassUnicode(cls));
557         } else {
558             let mut rhs = self.pop().unwrap().unwrap_class_bytes();
559             let mut lhs = self.pop().unwrap().unwrap_class_bytes();
560             let mut cls = self.pop().unwrap().unwrap_class_bytes();
561             if self.flags().case_insensitive() {
562                 rhs.case_fold_simple();
563                 lhs.case_fold_simple();
564             }
565             match op.kind {
566                 Intersection => lhs.intersect(&rhs),
567                 Difference => lhs.difference(&rhs),
568                 SymmetricDifference => lhs.symmetric_difference(&rhs),
569             }
570             cls.union(&lhs);
571             self.push(HirFrame::ClassBytes(cls));
572         }
573         Ok(())
574     }
575 }
576 
577 /// The internal implementation of a translator.
578 ///
579 /// This type is responsible for carrying around the original pattern string,
580 /// which is not tied to the internal state of a translator.
581 ///
582 /// A TranslatorI exists for the time it takes to translate a single Ast.
583 #[derive(Clone, Debug)]
584 struct TranslatorI<'t, 'p> {
585     trans: &'t Translator,
586     pattern: &'p str,
587 }
588 
589 impl<'t, 'p> TranslatorI<'t, 'p> {
590     /// Build a new internal translator.
new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p>591     fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
592         TranslatorI { trans, pattern }
593     }
594 
595     /// Return a reference to the underlying translator.
trans(&self) -> &Translator596     fn trans(&self) -> &Translator {
597         &self.trans
598     }
599 
600     /// Push the given frame on to the call stack.
push(&self, frame: HirFrame)601     fn push(&self, frame: HirFrame) {
602         self.trans().stack.borrow_mut().push(frame);
603     }
604 
605     /// Pop the top of the call stack. If the call stack is empty, return None.
pop(&self) -> Option<HirFrame>606     fn pop(&self) -> Option<HirFrame> {
607         self.trans().stack.borrow_mut().pop()
608     }
609 
610     /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ErrorKind) -> Error611     fn error(&self, span: Span, kind: ErrorKind) -> Error {
612         Error { kind, pattern: self.pattern.to_string(), span }
613     }
614 
615     /// Return a copy of the active flags.
flags(&self) -> Flags616     fn flags(&self) -> Flags {
617         self.trans().flags.get()
618     }
619 
620     /// Set the flags of this translator from the flags set in the given AST.
621     /// Then, return the old flags.
set_flags(&self, ast_flags: &ast::Flags) -> Flags622     fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
623         let old_flags = self.flags();
624         let mut new_flags = Flags::from_ast(ast_flags);
625         new_flags.merge(&old_flags);
626         self.trans().flags.set(new_flags);
627         old_flags
628     }
629 
hir_literal(&self, lit: &ast::Literal) -> Result<Hir>630     fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
631         let ch = match self.literal_to_char(lit)? {
632             byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
633             hir::Literal::Unicode(ch) => ch,
634         };
635         if self.flags().case_insensitive() {
636             self.hir_from_char_case_insensitive(lit.span, ch)
637         } else {
638             self.hir_from_char(lit.span, ch)
639         }
640     }
641 
642     /// Convert an Ast literal to its scalar representation.
643     ///
644     /// When Unicode mode is enabled, then this always succeeds and returns a
645     /// `char` (Unicode scalar value).
646     ///
647     /// When Unicode mode is disabled, then a raw byte is returned. If that
648     /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
649     /// an error.
literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal>650     fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
651         if self.flags().unicode() {
652             return Ok(hir::Literal::Unicode(lit.c));
653         }
654         let byte = match lit.byte() {
655             None => return Ok(hir::Literal::Unicode(lit.c)),
656             Some(byte) => byte,
657         };
658         if byte <= 0x7F {
659             return Ok(hir::Literal::Unicode(byte as char));
660         }
661         if !self.trans().allow_invalid_utf8 {
662             return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
663         }
664         Ok(hir::Literal::Byte(byte))
665     }
666 
hir_from_char(&self, span: Span, c: char) -> Result<Hir>667     fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
668         if !self.flags().unicode() && c.len_utf8() > 1 {
669             return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
670         }
671         Ok(Hir::literal(hir::Literal::Unicode(c)))
672     }
673 
hir_from_char_case_insensitive( &self, span: Span, c: char, ) -> Result<Hir>674     fn hir_from_char_case_insensitive(
675         &self,
676         span: Span,
677         c: char,
678     ) -> Result<Hir> {
679         if self.flags().unicode() {
680             // If case folding won't do anything, then don't bother trying.
681             let map =
682                 unicode::contains_simple_case_mapping(c, c).map_err(|_| {
683                     self.error(span, ErrorKind::UnicodeCaseUnavailable)
684                 })?;
685             if !map {
686                 return self.hir_from_char(span, c);
687             }
688             let mut cls =
689                 hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
690                     c, c,
691                 )]);
692             cls.try_case_fold_simple().map_err(|_| {
693                 self.error(span, ErrorKind::UnicodeCaseUnavailable)
694             })?;
695             Ok(Hir::class(hir::Class::Unicode(cls)))
696         } else {
697             if c.len_utf8() > 1 {
698                 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
699             }
700             // If case folding won't do anything, then don't bother trying.
701             match c {
702                 'A'..='Z' | 'a'..='z' => {}
703                 _ => return self.hir_from_char(span, c),
704             }
705             let mut cls =
706                 hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
707                     c as u8, c as u8,
708                 )]);
709             cls.case_fold_simple();
710             Ok(Hir::class(hir::Class::Bytes(cls)))
711         }
712     }
713 
hir_dot(&self, span: Span) -> Result<Hir>714     fn hir_dot(&self, span: Span) -> Result<Hir> {
715         let unicode = self.flags().unicode();
716         if !unicode && !self.trans().allow_invalid_utf8 {
717             return Err(self.error(span, ErrorKind::InvalidUtf8));
718         }
719         Ok(if self.flags().dot_matches_new_line() {
720             Hir::any(!unicode)
721         } else {
722             Hir::dot(!unicode)
723         })
724     }
725 
hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir>726     fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
727         let unicode = self.flags().unicode();
728         let multi_line = self.flags().multi_line();
729         Ok(match asst.kind {
730             ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
731                 hir::Anchor::StartLine
732             } else {
733                 hir::Anchor::StartText
734             }),
735             ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
736                 hir::Anchor::EndLine
737             } else {
738                 hir::Anchor::EndText
739             }),
740             ast::AssertionKind::StartText => {
741                 Hir::anchor(hir::Anchor::StartText)
742             }
743             ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
744             ast::AssertionKind::WordBoundary => {
745                 Hir::word_boundary(if unicode {
746                     hir::WordBoundary::Unicode
747                 } else {
748                     hir::WordBoundary::Ascii
749                 })
750             }
751             ast::AssertionKind::NotWordBoundary => {
752                 Hir::word_boundary(if unicode {
753                     hir::WordBoundary::UnicodeNegate
754                 } else {
755                     // It is possible for negated ASCII word boundaries to
756                     // match at invalid UTF-8 boundaries, even when searching
757                     // valid UTF-8.
758                     if !self.trans().allow_invalid_utf8 {
759                         return Err(
760                             self.error(asst.span, ErrorKind::InvalidUtf8)
761                         );
762                     }
763                     hir::WordBoundary::AsciiNegate
764                 })
765             }
766         })
767     }
768 
hir_group(&self, group: &ast::Group, expr: Hir) -> Hir769     fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
770         let kind = match group.kind {
771             ast::GroupKind::CaptureIndex(idx) => {
772                 hir::GroupKind::CaptureIndex(idx)
773             }
774             ast::GroupKind::CaptureName(ref capname) => {
775                 hir::GroupKind::CaptureName {
776                     name: capname.name.clone(),
777                     index: capname.index,
778                 }
779             }
780             ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
781         };
782         Hir::group(hir::Group { kind, hir: Box::new(expr) })
783     }
784 
hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir785     fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
786         let kind = match rep.op.kind {
787             ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
788             ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
789             ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
790             ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
791                 hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
792             }
793             ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
794                 hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
795             }
796             ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
797                 m,
798                 n,
799             )) => {
800                 hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
801             }
802         };
803         let greedy =
804             if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
805         Hir::repetition(hir::Repetition { kind, greedy, hir: Box::new(expr) })
806     }
807 
hir_unicode_class( &self, ast_class: &ast::ClassUnicode, ) -> Result<hir::ClassUnicode>808     fn hir_unicode_class(
809         &self,
810         ast_class: &ast::ClassUnicode,
811     ) -> Result<hir::ClassUnicode> {
812         use crate::ast::ClassUnicodeKind::*;
813 
814         if !self.flags().unicode() {
815             return Err(
816                 self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
817             );
818         }
819         let query = match ast_class.kind {
820             OneLetter(name) => ClassQuery::OneLetter(name),
821             Named(ref name) => ClassQuery::Binary(name),
822             NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
823                 property_name: name,
824                 property_value: value,
825             },
826         };
827         let mut result = self.convert_unicode_class_error(
828             &ast_class.span,
829             unicode::class(query),
830         );
831         if let Ok(ref mut class) = result {
832             self.unicode_fold_and_negate(
833                 &ast_class.span,
834                 ast_class.negated,
835                 class,
836             )?;
837             if class.ranges().is_empty() {
838                 let err = self
839                     .error(ast_class.span, ErrorKind::EmptyClassNotAllowed);
840                 return Err(err);
841             }
842         }
843         result
844     }
845 
hir_ascii_unicode_class( &self, ast: &ast::ClassAscii, ) -> Result<hir::ClassUnicode>846     fn hir_ascii_unicode_class(
847         &self,
848         ast: &ast::ClassAscii,
849     ) -> Result<hir::ClassUnicode> {
850         let mut cls = hir::ClassUnicode::new(
851             ascii_class(&ast.kind)
852                 .iter()
853                 .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)),
854         );
855         self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
856         Ok(cls)
857     }
858 
hir_ascii_byte_class( &self, ast: &ast::ClassAscii, ) -> Result<hir::ClassBytes>859     fn hir_ascii_byte_class(
860         &self,
861         ast: &ast::ClassAscii,
862     ) -> Result<hir::ClassBytes> {
863         let mut cls = hir::ClassBytes::new(
864             ascii_class(&ast.kind)
865                 .iter()
866                 .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)),
867         );
868         self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
869         Ok(cls)
870     }
871 
hir_perl_unicode_class( &self, ast_class: &ast::ClassPerl, ) -> Result<hir::ClassUnicode>872     fn hir_perl_unicode_class(
873         &self,
874         ast_class: &ast::ClassPerl,
875     ) -> Result<hir::ClassUnicode> {
876         use crate::ast::ClassPerlKind::*;
877 
878         assert!(self.flags().unicode());
879         let result = match ast_class.kind {
880             Digit => unicode::perl_digit(),
881             Space => unicode::perl_space(),
882             Word => unicode::perl_word(),
883         };
884         let mut class =
885             self.convert_unicode_class_error(&ast_class.span, result)?;
886         // We needn't apply case folding here because the Perl Unicode classes
887         // are already closed under Unicode simple case folding.
888         if ast_class.negated {
889             class.negate();
890         }
891         Ok(class)
892     }
893 
hir_perl_byte_class( &self, ast_class: &ast::ClassPerl, ) -> hir::ClassBytes894     fn hir_perl_byte_class(
895         &self,
896         ast_class: &ast::ClassPerl,
897     ) -> hir::ClassBytes {
898         use crate::ast::ClassPerlKind::*;
899 
900         assert!(!self.flags().unicode());
901         let mut class = match ast_class.kind {
902             Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
903             Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
904             Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
905         };
906         // We needn't apply case folding here because the Perl ASCII classes
907         // are already closed (under ASCII case folding).
908         if ast_class.negated {
909             class.negate();
910         }
911         class
912     }
913 
914     /// Converts the given Unicode specific error to an HIR translation error.
915     ///
916     /// The span given should approximate the position at which an error would
917     /// occur.
convert_unicode_class_error( &self, span: &Span, result: unicode::Result<hir::ClassUnicode>, ) -> Result<hir::ClassUnicode>918     fn convert_unicode_class_error(
919         &self,
920         span: &Span,
921         result: unicode::Result<hir::ClassUnicode>,
922     ) -> Result<hir::ClassUnicode> {
923         result.map_err(|err| {
924             let sp = span.clone();
925             match err {
926                 unicode::Error::PropertyNotFound => {
927                     self.error(sp, ErrorKind::UnicodePropertyNotFound)
928                 }
929                 unicode::Error::PropertyValueNotFound => {
930                     self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
931                 }
932                 unicode::Error::PerlClassNotFound => {
933                     self.error(sp, ErrorKind::UnicodePerlClassNotFound)
934                 }
935             }
936         })
937     }
938 
unicode_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassUnicode, ) -> Result<()>939     fn unicode_fold_and_negate(
940         &self,
941         span: &Span,
942         negated: bool,
943         class: &mut hir::ClassUnicode,
944     ) -> Result<()> {
945         // Note that we must apply case folding before negation!
946         // Consider `(?i)[^x]`. If we applied negation field, then
947         // the result would be the character class that matched any
948         // Unicode scalar value.
949         if self.flags().case_insensitive() {
950             class.try_case_fold_simple().map_err(|_| {
951                 self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
952             })?;
953         }
954         if negated {
955             class.negate();
956         }
957         Ok(())
958     }
959 
bytes_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassBytes, ) -> Result<()>960     fn bytes_fold_and_negate(
961         &self,
962         span: &Span,
963         negated: bool,
964         class: &mut hir::ClassBytes,
965     ) -> Result<()> {
966         // Note that we must apply case folding before negation!
967         // Consider `(?i)[^x]`. If we applied negation first, then
968         // the result would be the character class that matched any
969         // Unicode scalar value.
970         if self.flags().case_insensitive() {
971             class.case_fold_simple();
972         }
973         if negated {
974             class.negate();
975         }
976         if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
977             return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
978         }
979         Ok(())
980     }
981 
982     /// Return a scalar byte value suitable for use as a literal in a byte
983     /// character class.
class_literal_byte(&self, ast: &ast::Literal) -> Result<u8>984     fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
985         match self.literal_to_char(ast)? {
986             hir::Literal::Byte(byte) => Ok(byte),
987             hir::Literal::Unicode(ch) => {
988                 if ch <= 0x7F as char {
989                     Ok(ch as u8)
990                 } else {
991                     // We can't feasibly support Unicode in
992                     // byte oriented classes. Byte classes don't
993                     // do Unicode case folding.
994                     Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
995                 }
996             }
997         }
998     }
999 }
1000 
1001 /// A translator's representation of a regular expression's flags at any given
1002 /// moment in time.
1003 ///
1004 /// Each flag can be in one of three states: absent, present but disabled or
1005 /// present but enabled.
1006 #[derive(Clone, Copy, Debug, Default)]
1007 struct Flags {
1008     case_insensitive: Option<bool>,
1009     multi_line: Option<bool>,
1010     dot_matches_new_line: Option<bool>,
1011     swap_greed: Option<bool>,
1012     unicode: Option<bool>,
1013     // Note that `ignore_whitespace` is omitted here because it is handled
1014     // entirely in the parser.
1015 }
1016 
1017 impl Flags {
from_ast(ast: &ast::Flags) -> Flags1018     fn from_ast(ast: &ast::Flags) -> Flags {
1019         let mut flags = Flags::default();
1020         let mut enable = true;
1021         for item in &ast.items {
1022             match item.kind {
1023                 ast::FlagsItemKind::Negation => {
1024                     enable = false;
1025                 }
1026                 ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1027                     flags.case_insensitive = Some(enable);
1028                 }
1029                 ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1030                     flags.multi_line = Some(enable);
1031                 }
1032                 ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1033                     flags.dot_matches_new_line = Some(enable);
1034                 }
1035                 ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1036                     flags.swap_greed = Some(enable);
1037                 }
1038                 ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1039                     flags.unicode = Some(enable);
1040                 }
1041                 ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1042             }
1043         }
1044         flags
1045     }
1046 
merge(&mut self, previous: &Flags)1047     fn merge(&mut self, previous: &Flags) {
1048         if self.case_insensitive.is_none() {
1049             self.case_insensitive = previous.case_insensitive;
1050         }
1051         if self.multi_line.is_none() {
1052             self.multi_line = previous.multi_line;
1053         }
1054         if self.dot_matches_new_line.is_none() {
1055             self.dot_matches_new_line = previous.dot_matches_new_line;
1056         }
1057         if self.swap_greed.is_none() {
1058             self.swap_greed = previous.swap_greed;
1059         }
1060         if self.unicode.is_none() {
1061             self.unicode = previous.unicode;
1062         }
1063     }
1064 
case_insensitive(&self) -> bool1065     fn case_insensitive(&self) -> bool {
1066         self.case_insensitive.unwrap_or(false)
1067     }
1068 
multi_line(&self) -> bool1069     fn multi_line(&self) -> bool {
1070         self.multi_line.unwrap_or(false)
1071     }
1072 
dot_matches_new_line(&self) -> bool1073     fn dot_matches_new_line(&self) -> bool {
1074         self.dot_matches_new_line.unwrap_or(false)
1075     }
1076 
swap_greed(&self) -> bool1077     fn swap_greed(&self) -> bool {
1078         self.swap_greed.unwrap_or(false)
1079     }
1080 
unicode(&self) -> bool1081     fn unicode(&self) -> bool {
1082         self.unicode.unwrap_or(true)
1083     }
1084 }
1085 
hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes1086 fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1087     let ranges: Vec<_> = ascii_class(kind)
1088         .iter()
1089         .cloned()
1090         .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
1091         .collect();
1092     hir::ClassBytes::new(ranges)
1093 }
1094 
ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)]1095 fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
1096     use crate::ast::ClassAsciiKind::*;
1097     match *kind {
1098         Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
1099         Alpha => &[('A', 'Z'), ('a', 'z')],
1100         Ascii => &[('\x00', '\x7F')],
1101         Blank => &[('\t', '\t'), (' ', ' ')],
1102         Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
1103         Digit => &[('0', '9')],
1104         Graph => &[('!', '~')],
1105         Lower => &[('a', 'z')],
1106         Print => &[(' ', '~')],
1107         Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
1108         Space => &[
1109             ('\t', '\t'),
1110             ('\n', '\n'),
1111             ('\x0B', '\x0B'),
1112             ('\x0C', '\x0C'),
1113             ('\r', '\r'),
1114             (' ', ' '),
1115         ],
1116         Upper => &[('A', 'Z')],
1117         Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
1118         Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
1119     }
1120 }
1121 
1122 #[cfg(test)]
1123 mod tests {
1124     use crate::ast::parse::ParserBuilder;
1125     use crate::ast::{self, Ast, Position, Span};
1126     use crate::hir::{self, Hir, HirKind};
1127     use crate::unicode::{self, ClassQuery};
1128 
1129     use super::{ascii_class, TranslatorBuilder};
1130 
1131     // We create these errors to compare with real hir::Errors in the tests.
1132     // We define equality between TestError and hir::Error to disregard the
1133     // pattern string in hir::Error, which is annoying to provide in tests.
1134     #[derive(Clone, Debug)]
1135     struct TestError {
1136         span: Span,
1137         kind: hir::ErrorKind,
1138     }
1139 
1140     impl PartialEq<hir::Error> for TestError {
eq(&self, other: &hir::Error) -> bool1141         fn eq(&self, other: &hir::Error) -> bool {
1142             self.span == other.span && self.kind == other.kind
1143         }
1144     }
1145 
1146     impl PartialEq<TestError> for hir::Error {
eq(&self, other: &TestError) -> bool1147         fn eq(&self, other: &TestError) -> bool {
1148             self.span == other.span && self.kind == other.kind
1149         }
1150     }
1151 
parse(pattern: &str) -> Ast1152     fn parse(pattern: &str) -> Ast {
1153         ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1154     }
1155 
t(pattern: &str) -> Hir1156     fn t(pattern: &str) -> Hir {
1157         TranslatorBuilder::new()
1158             .allow_invalid_utf8(false)
1159             .build()
1160             .translate(pattern, &parse(pattern))
1161             .unwrap()
1162     }
1163 
t_err(pattern: &str) -> hir::Error1164     fn t_err(pattern: &str) -> hir::Error {
1165         TranslatorBuilder::new()
1166             .allow_invalid_utf8(false)
1167             .build()
1168             .translate(pattern, &parse(pattern))
1169             .unwrap_err()
1170     }
1171 
t_bytes(pattern: &str) -> Hir1172     fn t_bytes(pattern: &str) -> Hir {
1173         TranslatorBuilder::new()
1174             .allow_invalid_utf8(true)
1175             .build()
1176             .translate(pattern, &parse(pattern))
1177             .unwrap()
1178     }
1179 
hir_lit(s: &str) -> Hir1180     fn hir_lit(s: &str) -> Hir {
1181         match s.len() {
1182             0 => Hir::empty(),
1183             _ => {
1184                 let lits = s
1185                     .chars()
1186                     .map(hir::Literal::Unicode)
1187                     .map(Hir::literal)
1188                     .collect();
1189                 Hir::concat(lits)
1190             }
1191         }
1192     }
1193 
hir_blit(s: &[u8]) -> Hir1194     fn hir_blit(s: &[u8]) -> Hir {
1195         match s.len() {
1196             0 => Hir::empty(),
1197             1 => Hir::literal(hir::Literal::Byte(s[0])),
1198             _ => {
1199                 let lits = s
1200                     .iter()
1201                     .cloned()
1202                     .map(hir::Literal::Byte)
1203                     .map(Hir::literal)
1204                     .collect();
1205                 Hir::concat(lits)
1206             }
1207         }
1208     }
1209 
hir_group(i: u32, expr: Hir) -> Hir1210     fn hir_group(i: u32, expr: Hir) -> Hir {
1211         Hir::group(hir::Group {
1212             kind: hir::GroupKind::CaptureIndex(i),
1213             hir: Box::new(expr),
1214         })
1215     }
1216 
hir_group_name(i: u32, name: &str, expr: Hir) -> Hir1217     fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
1218         Hir::group(hir::Group {
1219             kind: hir::GroupKind::CaptureName {
1220                 name: name.to_string(),
1221                 index: i,
1222             },
1223             hir: Box::new(expr),
1224         })
1225     }
1226 
hir_group_nocap(expr: Hir) -> Hir1227     fn hir_group_nocap(expr: Hir) -> Hir {
1228         Hir::group(hir::Group {
1229             kind: hir::GroupKind::NonCapturing,
1230             hir: Box::new(expr),
1231         })
1232     }
1233 
hir_quest(greedy: bool, expr: Hir) -> Hir1234     fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1235         Hir::repetition(hir::Repetition {
1236             kind: hir::RepetitionKind::ZeroOrOne,
1237             greedy,
1238             hir: Box::new(expr),
1239         })
1240     }
1241 
hir_star(greedy: bool, expr: Hir) -> Hir1242     fn hir_star(greedy: bool, expr: Hir) -> Hir {
1243         Hir::repetition(hir::Repetition {
1244             kind: hir::RepetitionKind::ZeroOrMore,
1245             greedy,
1246             hir: Box::new(expr),
1247         })
1248     }
1249 
hir_plus(greedy: bool, expr: Hir) -> Hir1250     fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1251         Hir::repetition(hir::Repetition {
1252             kind: hir::RepetitionKind::OneOrMore,
1253             greedy,
1254             hir: Box::new(expr),
1255         })
1256     }
1257 
hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir1258     fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
1259         Hir::repetition(hir::Repetition {
1260             kind: hir::RepetitionKind::Range(range),
1261             greedy,
1262             hir: Box::new(expr),
1263         })
1264     }
1265 
hir_alt(alts: Vec<Hir>) -> Hir1266     fn hir_alt(alts: Vec<Hir>) -> Hir {
1267         Hir::alternation(alts)
1268     }
1269 
hir_cat(exprs: Vec<Hir>) -> Hir1270     fn hir_cat(exprs: Vec<Hir>) -> Hir {
1271         Hir::concat(exprs)
1272     }
1273 
1274     #[allow(dead_code)]
hir_uclass_query(query: ClassQuery<'_>) -> Hir1275     fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
1276         Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1277     }
1278 
1279     #[allow(dead_code)]
hir_uclass_perl_word() -> Hir1280     fn hir_uclass_perl_word() -> Hir {
1281         Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1282     }
1283 
hir_uclass(ranges: &[(char, char)]) -> Hir1284     fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1285         let ranges: Vec<hir::ClassUnicodeRange> = ranges
1286             .iter()
1287             .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1288             .collect();
1289         Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
1290     }
1291 
hir_bclass(ranges: &[(u8, u8)]) -> Hir1292     fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1293         let ranges: Vec<hir::ClassBytesRange> = ranges
1294             .iter()
1295             .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1296             .collect();
1297         Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1298     }
1299 
hir_bclass_from_char(ranges: &[(char, char)]) -> Hir1300     fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
1301         let ranges: Vec<hir::ClassBytesRange> = ranges
1302             .iter()
1303             .map(|&(s, e)| {
1304                 assert!(s as u32 <= 0x7F);
1305                 assert!(e as u32 <= 0x7F);
1306                 hir::ClassBytesRange::new(s as u8, e as u8)
1307             })
1308             .collect();
1309         Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1310     }
1311 
hir_case_fold(expr: Hir) -> Hir1312     fn hir_case_fold(expr: Hir) -> Hir {
1313         match expr.into_kind() {
1314             HirKind::Class(mut cls) => {
1315                 cls.case_fold_simple();
1316                 Hir::class(cls)
1317             }
1318             _ => panic!("cannot case fold non-class Hir expr"),
1319         }
1320     }
1321 
hir_negate(expr: Hir) -> Hir1322     fn hir_negate(expr: Hir) -> Hir {
1323         match expr.into_kind() {
1324             HirKind::Class(mut cls) => {
1325                 cls.negate();
1326                 Hir::class(cls)
1327             }
1328             _ => panic!("cannot negate non-class Hir expr"),
1329         }
1330     }
1331 
1332     #[allow(dead_code)]
hir_union(expr1: Hir, expr2: Hir) -> Hir1333     fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1334         use crate::hir::Class::{Bytes, Unicode};
1335 
1336         match (expr1.into_kind(), expr2.into_kind()) {
1337             (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1338                 c1.union(&c2);
1339                 Hir::class(hir::Class::Unicode(c1))
1340             }
1341             (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1342                 c1.union(&c2);
1343                 Hir::class(hir::Class::Bytes(c1))
1344             }
1345             _ => panic!("cannot union non-class Hir exprs"),
1346         }
1347     }
1348 
1349     #[allow(dead_code)]
hir_difference(expr1: Hir, expr2: Hir) -> Hir1350     fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1351         use crate::hir::Class::{Bytes, Unicode};
1352 
1353         match (expr1.into_kind(), expr2.into_kind()) {
1354             (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1355                 c1.difference(&c2);
1356                 Hir::class(hir::Class::Unicode(c1))
1357             }
1358             (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1359                 c1.difference(&c2);
1360                 Hir::class(hir::Class::Bytes(c1))
1361             }
1362             _ => panic!("cannot difference non-class Hir exprs"),
1363         }
1364     }
1365 
hir_anchor(anchor: hir::Anchor) -> Hir1366     fn hir_anchor(anchor: hir::Anchor) -> Hir {
1367         Hir::anchor(anchor)
1368     }
1369 
hir_word(wb: hir::WordBoundary) -> Hir1370     fn hir_word(wb: hir::WordBoundary) -> Hir {
1371         Hir::word_boundary(wb)
1372     }
1373 
1374     #[test]
empty()1375     fn empty() {
1376         assert_eq!(t(""), Hir::empty());
1377         assert_eq!(t("(?i)"), Hir::empty());
1378         assert_eq!(t("()"), hir_group(1, Hir::empty()));
1379         assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1380         assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
1381         assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1382         assert_eq!(
1383             t("()|()"),
1384             hir_alt(vec![
1385                 hir_group(1, Hir::empty()),
1386                 hir_group(2, Hir::empty()),
1387             ])
1388         );
1389         assert_eq!(
1390             t("(|b)"),
1391             hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1392         );
1393         assert_eq!(
1394             t("(a|)"),
1395             hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1396         );
1397         assert_eq!(
1398             t("(a||c)"),
1399             hir_group(
1400                 1,
1401                 hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1402             )
1403         );
1404         assert_eq!(
1405             t("(||)"),
1406             hir_group(
1407                 1,
1408                 hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1409             )
1410         );
1411     }
1412 
1413     #[test]
literal()1414     fn literal() {
1415         assert_eq!(t("a"), hir_lit("a"));
1416         assert_eq!(t("(?-u)a"), hir_lit("a"));
1417         assert_eq!(t("☃"), hir_lit("☃"));
1418         assert_eq!(t("abcd"), hir_lit("abcd"));
1419 
1420         assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1421         assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1422         assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1423         assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1424 
1425         assert_eq!(
1426             t_err("(?-u)☃"),
1427             TestError {
1428                 kind: hir::ErrorKind::UnicodeNotAllowed,
1429                 span: Span::new(
1430                     Position::new(5, 1, 6),
1431                     Position::new(8, 1, 7)
1432                 ),
1433             }
1434         );
1435         assert_eq!(
1436             t_err(r"(?-u)\xFF"),
1437             TestError {
1438                 kind: hir::ErrorKind::InvalidUtf8,
1439                 span: Span::new(
1440                     Position::new(5, 1, 6),
1441                     Position::new(9, 1, 10)
1442                 ),
1443             }
1444         );
1445     }
1446 
1447     #[test]
literal_case_insensitive()1448     fn literal_case_insensitive() {
1449         #[cfg(feature = "unicode-case")]
1450         assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1451         #[cfg(feature = "unicode-case")]
1452         assert_eq!(
1453             t("(?i:a)"),
1454             hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
1455         );
1456         #[cfg(feature = "unicode-case")]
1457         assert_eq!(
1458             t("a(?i)a(?-i)a"),
1459             hir_cat(vec![
1460                 hir_lit("a"),
1461                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1462                 hir_lit("a"),
1463             ])
1464         );
1465         #[cfg(feature = "unicode-case")]
1466         assert_eq!(
1467             t("(?i)ab@c"),
1468             hir_cat(vec![
1469                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1470                 hir_uclass(&[('B', 'B'), ('b', 'b')]),
1471                 hir_lit("@"),
1472                 hir_uclass(&[('C', 'C'), ('c', 'c')]),
1473             ])
1474         );
1475         #[cfg(feature = "unicode-case")]
1476         assert_eq!(
1477             t("(?i)β"),
1478             hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1479         );
1480 
1481         assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1482         #[cfg(feature = "unicode-case")]
1483         assert_eq!(
1484             t("(?-u)a(?i)a(?-i)a"),
1485             hir_cat(vec![
1486                 hir_lit("a"),
1487                 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1488                 hir_lit("a"),
1489             ])
1490         );
1491         assert_eq!(
1492             t("(?i-u)ab@c"),
1493             hir_cat(vec![
1494                 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1495                 hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1496                 hir_lit("@"),
1497                 hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1498             ])
1499         );
1500 
1501         assert_eq!(
1502             t_bytes("(?i-u)a"),
1503             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1504         );
1505         assert_eq!(
1506             t_bytes("(?i-u)\x61"),
1507             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1508         );
1509         assert_eq!(
1510             t_bytes(r"(?i-u)\x61"),
1511             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1512         );
1513         assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1514 
1515         assert_eq!(
1516             t_err("(?i-u)β"),
1517             TestError {
1518                 kind: hir::ErrorKind::UnicodeNotAllowed,
1519                 span: Span::new(
1520                     Position::new(6, 1, 7),
1521                     Position::new(8, 1, 8),
1522                 ),
1523             }
1524         );
1525     }
1526 
1527     #[test]
dot()1528     fn dot() {
1529         assert_eq!(
1530             t("."),
1531             hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
1532         );
1533         assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
1534         assert_eq!(
1535             t_bytes("(?-u)."),
1536             hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
1537         );
1538         assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1539 
1540         // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1541         assert_eq!(
1542             t_err("(?-u)."),
1543             TestError {
1544                 kind: hir::ErrorKind::InvalidUtf8,
1545                 span: Span::new(
1546                     Position::new(5, 1, 6),
1547                     Position::new(6, 1, 7)
1548                 ),
1549             }
1550         );
1551         assert_eq!(
1552             t_err("(?s-u)."),
1553             TestError {
1554                 kind: hir::ErrorKind::InvalidUtf8,
1555                 span: Span::new(
1556                     Position::new(6, 1, 7),
1557                     Position::new(7, 1, 8)
1558                 ),
1559             }
1560         );
1561     }
1562 
1563     #[test]
assertions()1564     fn assertions() {
1565         assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
1566         assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
1567         assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
1568         assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
1569         assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
1570         assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
1571         assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
1572         assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
1573 
1574         assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
1575         assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
1576         assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
1577         assert_eq!(
1578             t_bytes(r"(?-u)\B"),
1579             hir_word(hir::WordBoundary::AsciiNegate)
1580         );
1581 
1582         assert_eq!(
1583             t_err(r"(?-u)\B"),
1584             TestError {
1585                 kind: hir::ErrorKind::InvalidUtf8,
1586                 span: Span::new(
1587                     Position::new(5, 1, 6),
1588                     Position::new(7, 1, 8)
1589                 ),
1590             }
1591         );
1592     }
1593 
1594     #[test]
group()1595     fn group() {
1596         assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
1597         assert_eq!(
1598             t("(a)(b)"),
1599             hir_cat(vec![
1600                 hir_group(1, hir_lit("a")),
1601                 hir_group(2, hir_lit("b")),
1602             ])
1603         );
1604         assert_eq!(
1605             t("(a)|(b)"),
1606             hir_alt(vec![
1607                 hir_group(1, hir_lit("a")),
1608                 hir_group(2, hir_lit("b")),
1609             ])
1610         );
1611         assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
1612         assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1613         assert_eq!(
1614             t("(?P<foo>a)(?P<bar>b)"),
1615             hir_cat(vec![
1616                 hir_group_name(1, "foo", hir_lit("a")),
1617                 hir_group_name(2, "bar", hir_lit("b")),
1618             ])
1619         );
1620         assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1621         assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1622         assert_eq!(
1623             t("(?:a)(b)"),
1624             hir_cat(vec![
1625                 hir_group_nocap(hir_lit("a")),
1626                 hir_group(1, hir_lit("b")),
1627             ])
1628         );
1629         assert_eq!(
1630             t("(a)(?:b)(c)"),
1631             hir_cat(vec![
1632                 hir_group(1, hir_lit("a")),
1633                 hir_group_nocap(hir_lit("b")),
1634                 hir_group(2, hir_lit("c")),
1635             ])
1636         );
1637         assert_eq!(
1638             t("(a)(?P<foo>b)(c)"),
1639             hir_cat(vec![
1640                 hir_group(1, hir_lit("a")),
1641                 hir_group_name(2, "foo", hir_lit("b")),
1642                 hir_group(3, hir_lit("c")),
1643             ])
1644         );
1645         assert_eq!(t("()"), hir_group(1, Hir::empty()));
1646         assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
1647         assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
1648         assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
1649     }
1650 
1651     #[test]
flags()1652     fn flags() {
1653         #[cfg(feature = "unicode-case")]
1654         assert_eq!(
1655             t("(?i:a)a"),
1656             hir_cat(vec![
1657                 hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
1658                 hir_lit("a"),
1659             ])
1660         );
1661         assert_eq!(
1662             t("(?i-u:a)β"),
1663             hir_cat(vec![
1664                 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1665                 hir_lit("β"),
1666             ])
1667         );
1668         assert_eq!(
1669             t("(?:(?i-u)a)b"),
1670             hir_cat(vec![
1671                 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1672                 hir_lit("b"),
1673             ])
1674         );
1675         assert_eq!(
1676             t("((?i-u)a)b"),
1677             hir_cat(vec![
1678                 hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1679                 hir_lit("b"),
1680             ])
1681         );
1682         #[cfg(feature = "unicode-case")]
1683         assert_eq!(
1684             t("(?i)(?-i:a)a"),
1685             hir_cat(vec![
1686                 hir_group_nocap(hir_lit("a")),
1687                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1688             ])
1689         );
1690         #[cfg(feature = "unicode-case")]
1691         assert_eq!(
1692             t("(?im)a^"),
1693             hir_cat(vec![
1694                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1695                 hir_anchor(hir::Anchor::StartLine),
1696             ])
1697         );
1698         #[cfg(feature = "unicode-case")]
1699         assert_eq!(
1700             t("(?im)a^(?i-m)a^"),
1701             hir_cat(vec![
1702                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1703                 hir_anchor(hir::Anchor::StartLine),
1704                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1705                 hir_anchor(hir::Anchor::StartText),
1706             ])
1707         );
1708         assert_eq!(
1709             t("(?U)a*a*?(?-U)a*a*?"),
1710             hir_cat(vec![
1711                 hir_star(false, hir_lit("a")),
1712                 hir_star(true, hir_lit("a")),
1713                 hir_star(true, hir_lit("a")),
1714                 hir_star(false, hir_lit("a")),
1715             ])
1716         );
1717         #[cfg(feature = "unicode-case")]
1718         assert_eq!(
1719             t("(?:a(?i)a)a"),
1720             hir_cat(vec![
1721                 hir_group_nocap(hir_cat(vec![
1722                     hir_lit("a"),
1723                     hir_uclass(&[('A', 'A'), ('a', 'a')]),
1724                 ])),
1725                 hir_lit("a"),
1726             ])
1727         );
1728         #[cfg(feature = "unicode-case")]
1729         assert_eq!(
1730             t("(?i)(?:a(?-i)a)a"),
1731             hir_cat(vec![
1732                 hir_group_nocap(hir_cat(vec![
1733                     hir_uclass(&[('A', 'A'), ('a', 'a')]),
1734                     hir_lit("a"),
1735                 ])),
1736                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1737             ])
1738         );
1739     }
1740 
1741     #[test]
escape()1742     fn escape() {
1743         assert_eq!(
1744             t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1745             hir_lit(r"\.+*?()|[]{}^$#")
1746         );
1747     }
1748 
1749     #[test]
repetition()1750     fn repetition() {
1751         assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
1752         assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
1753         assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
1754         assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
1755         assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
1756         assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
1757 
1758         assert_eq!(
1759             t("a{1}"),
1760             hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1761         );
1762         assert_eq!(
1763             t("a{1,}"),
1764             hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1765         );
1766         assert_eq!(
1767             t("a{1,2}"),
1768             hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
1769         );
1770         assert_eq!(
1771             t("a{1}?"),
1772             hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1773         );
1774         assert_eq!(
1775             t("a{1,}?"),
1776             hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1777         );
1778         assert_eq!(
1779             t("a{1,2}?"),
1780             hir_range(
1781                 false,
1782                 hir::RepetitionRange::Bounded(1, 2),
1783                 hir_lit("a"),
1784             )
1785         );
1786 
1787         assert_eq!(
1788             t("ab?"),
1789             hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1790         );
1791         assert_eq!(
1792             t("(ab)?"),
1793             hir_quest(
1794                 true,
1795                 hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1796             )
1797         );
1798         assert_eq!(
1799             t("a|b?"),
1800             hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1801         );
1802     }
1803 
1804     #[test]
cat_alt()1805     fn cat_alt() {
1806         assert_eq!(
1807             t("(ab)"),
1808             hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1809         );
1810         assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
1811         assert_eq!(
1812             t("a|b|c"),
1813             hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1814         );
1815         assert_eq!(
1816             t("ab|bc|cd"),
1817             hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1818         );
1819         assert_eq!(
1820             t("(a|b)"),
1821             hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
1822         );
1823         assert_eq!(
1824             t("(a|b|c)"),
1825             hir_group(
1826                 1,
1827                 hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1828             )
1829         );
1830         assert_eq!(
1831             t("(ab|bc|cd)"),
1832             hir_group(
1833                 1,
1834                 hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1835             )
1836         );
1837         assert_eq!(
1838             t("(ab|(bc|(cd)))"),
1839             hir_group(
1840                 1,
1841                 hir_alt(vec![
1842                     hir_lit("ab"),
1843                     hir_group(
1844                         2,
1845                         hir_alt(vec![
1846                             hir_lit("bc"),
1847                             hir_group(3, hir_lit("cd")),
1848                         ])
1849                     ),
1850                 ])
1851             )
1852         );
1853     }
1854 
1855     #[test]
class_ascii()1856     fn class_ascii() {
1857         assert_eq!(
1858             t("[[:alnum:]]"),
1859             hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
1860         );
1861         assert_eq!(
1862             t("[[:alpha:]]"),
1863             hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
1864         );
1865         assert_eq!(
1866             t("[[:ascii:]]"),
1867             hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
1868         );
1869         assert_eq!(
1870             t("[[:blank:]]"),
1871             hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
1872         );
1873         assert_eq!(
1874             t("[[:cntrl:]]"),
1875             hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
1876         );
1877         assert_eq!(
1878             t("[[:digit:]]"),
1879             hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
1880         );
1881         assert_eq!(
1882             t("[[:graph:]]"),
1883             hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
1884         );
1885         assert_eq!(
1886             t("[[:lower:]]"),
1887             hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
1888         );
1889         assert_eq!(
1890             t("[[:print:]]"),
1891             hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
1892         );
1893         assert_eq!(
1894             t("[[:punct:]]"),
1895             hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
1896         );
1897         assert_eq!(
1898             t("[[:space:]]"),
1899             hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
1900         );
1901         assert_eq!(
1902             t("[[:upper:]]"),
1903             hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
1904         );
1905         assert_eq!(
1906             t("[[:word:]]"),
1907             hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
1908         );
1909         assert_eq!(
1910             t("[[:xdigit:]]"),
1911             hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
1912         );
1913 
1914         assert_eq!(
1915             t("[[:^lower:]]"),
1916             hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
1917         );
1918         #[cfg(feature = "unicode-case")]
1919         assert_eq!(
1920             t("(?i)[[:lower:]]"),
1921             hir_uclass(&[
1922                 ('A', 'Z'),
1923                 ('a', 'z'),
1924                 ('\u{17F}', '\u{17F}'),
1925                 ('\u{212A}', '\u{212A}'),
1926             ])
1927         );
1928 
1929         assert_eq!(
1930             t("(?-u)[[:lower:]]"),
1931             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
1932         );
1933         assert_eq!(
1934             t("(?i-u)[[:lower:]]"),
1935             hir_case_fold(hir_bclass_from_char(ascii_class(
1936                 &ast::ClassAsciiKind::Lower
1937             )))
1938         );
1939 
1940         assert_eq!(
1941             t_err("(?-u)[[:^lower:]]"),
1942             TestError {
1943                 kind: hir::ErrorKind::InvalidUtf8,
1944                 span: Span::new(
1945                     Position::new(6, 1, 7),
1946                     Position::new(16, 1, 17)
1947                 ),
1948             }
1949         );
1950         assert_eq!(
1951             t_err("(?i-u)[[:^lower:]]"),
1952             TestError {
1953                 kind: hir::ErrorKind::InvalidUtf8,
1954                 span: Span::new(
1955                     Position::new(7, 1, 8),
1956                     Position::new(17, 1, 18)
1957                 ),
1958             }
1959         );
1960     }
1961 
1962     #[test]
class_ascii_multiple()1963     fn class_ascii_multiple() {
1964         // See: https://github.com/rust-lang/regex/issues/680
1965         assert_eq!(
1966             t("[[:alnum:][:^ascii:]]"),
1967             hir_union(
1968                 hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)),
1969                 hir_uclass(&[('\u{80}', '\u{10FFFF}')]),
1970             ),
1971         );
1972         assert_eq!(
1973             t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
1974             hir_union(
1975                 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)),
1976                 hir_bclass(&[(0x80, 0xFF)]),
1977             ),
1978         );
1979     }
1980 
1981     #[test]
1982     #[cfg(feature = "unicode-perl")]
class_perl()1983     fn class_perl() {
1984         // Unicode
1985         assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
1986         assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
1987         assert_eq!(t(r"\w"), hir_uclass_perl_word());
1988         #[cfg(feature = "unicode-case")]
1989         assert_eq!(
1990             t(r"(?i)\d"),
1991             hir_uclass_query(ClassQuery::Binary("digit"))
1992         );
1993         #[cfg(feature = "unicode-case")]
1994         assert_eq!(
1995             t(r"(?i)\s"),
1996             hir_uclass_query(ClassQuery::Binary("space"))
1997         );
1998         #[cfg(feature = "unicode-case")]
1999         assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
2000 
2001         // Unicode, negated
2002         assert_eq!(
2003             t(r"\D"),
2004             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2005         );
2006         assert_eq!(
2007             t(r"\S"),
2008             hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2009         );
2010         assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
2011         #[cfg(feature = "unicode-case")]
2012         assert_eq!(
2013             t(r"(?i)\D"),
2014             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2015         );
2016         #[cfg(feature = "unicode-case")]
2017         assert_eq!(
2018             t(r"(?i)\S"),
2019             hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2020         );
2021         #[cfg(feature = "unicode-case")]
2022         assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
2023 
2024         // ASCII only
2025         assert_eq!(
2026             t(r"(?-u)\d"),
2027             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2028         );
2029         assert_eq!(
2030             t(r"(?-u)\s"),
2031             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2032         );
2033         assert_eq!(
2034             t(r"(?-u)\w"),
2035             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2036         );
2037         assert_eq!(
2038             t(r"(?i-u)\d"),
2039             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2040         );
2041         assert_eq!(
2042             t(r"(?i-u)\s"),
2043             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2044         );
2045         assert_eq!(
2046             t(r"(?i-u)\w"),
2047             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2048         );
2049 
2050         // ASCII only, negated
2051         assert_eq!(
2052             t(r"(?-u)\D"),
2053             hir_negate(hir_bclass_from_char(ascii_class(
2054                 &ast::ClassAsciiKind::Digit
2055             )))
2056         );
2057         assert_eq!(
2058             t(r"(?-u)\S"),
2059             hir_negate(hir_bclass_from_char(ascii_class(
2060                 &ast::ClassAsciiKind::Space
2061             )))
2062         );
2063         assert_eq!(
2064             t(r"(?-u)\W"),
2065             hir_negate(hir_bclass_from_char(ascii_class(
2066                 &ast::ClassAsciiKind::Word
2067             )))
2068         );
2069         assert_eq!(
2070             t(r"(?i-u)\D"),
2071             hir_negate(hir_bclass_from_char(ascii_class(
2072                 &ast::ClassAsciiKind::Digit
2073             )))
2074         );
2075         assert_eq!(
2076             t(r"(?i-u)\S"),
2077             hir_negate(hir_bclass_from_char(ascii_class(
2078                 &ast::ClassAsciiKind::Space
2079             )))
2080         );
2081         assert_eq!(
2082             t(r"(?i-u)\W"),
2083             hir_negate(hir_bclass_from_char(ascii_class(
2084                 &ast::ClassAsciiKind::Word
2085             )))
2086         );
2087     }
2088 
2089     #[test]
2090     #[cfg(not(feature = "unicode-perl"))]
class_perl_word_disabled()2091     fn class_perl_word_disabled() {
2092         assert_eq!(
2093             t_err(r"\w"),
2094             TestError {
2095                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2096                 span: Span::new(
2097                     Position::new(0, 1, 1),
2098                     Position::new(2, 1, 3)
2099                 ),
2100             }
2101         );
2102     }
2103 
2104     #[test]
2105     #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
class_perl_space_disabled()2106     fn class_perl_space_disabled() {
2107         assert_eq!(
2108             t_err(r"\s"),
2109             TestError {
2110                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2111                 span: Span::new(
2112                     Position::new(0, 1, 1),
2113                     Position::new(2, 1, 3)
2114                 ),
2115             }
2116         );
2117     }
2118 
2119     #[test]
2120     #[cfg(all(
2121         not(feature = "unicode-perl"),
2122         not(feature = "unicode-gencat")
2123     ))]
class_perl_digit_disabled()2124     fn class_perl_digit_disabled() {
2125         assert_eq!(
2126             t_err(r"\d"),
2127             TestError {
2128                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2129                 span: Span::new(
2130                     Position::new(0, 1, 1),
2131                     Position::new(2, 1, 3)
2132                 ),
2133             }
2134         );
2135     }
2136 
2137     #[test]
2138     #[cfg(feature = "unicode-gencat")]
class_unicode_gencat()2139     fn class_unicode_gencat() {
2140         assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2141         assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2142         assert_eq!(
2143             t(r"\p{Separator}"),
2144             hir_uclass_query(ClassQuery::Binary("Z"))
2145         );
2146         assert_eq!(
2147             t(r"\p{se      PaRa ToR}"),
2148             hir_uclass_query(ClassQuery::Binary("Z"))
2149         );
2150         assert_eq!(
2151             t(r"\p{gc:Separator}"),
2152             hir_uclass_query(ClassQuery::Binary("Z"))
2153         );
2154         assert_eq!(
2155             t(r"\p{gc=Separator}"),
2156             hir_uclass_query(ClassQuery::Binary("Z"))
2157         );
2158         assert_eq!(
2159             t(r"\p{Other}"),
2160             hir_uclass_query(ClassQuery::Binary("Other"))
2161         );
2162         assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2163 
2164         assert_eq!(
2165             t(r"\PZ"),
2166             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2167         );
2168         assert_eq!(
2169             t(r"\P{separator}"),
2170             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2171         );
2172         assert_eq!(
2173             t(r"\P{gc!=separator}"),
2174             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2175         );
2176 
2177         assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2178         assert_eq!(
2179             t(r"\p{assigned}"),
2180             hir_uclass_query(ClassQuery::Binary("Assigned"))
2181         );
2182         assert_eq!(
2183             t(r"\p{ascii}"),
2184             hir_uclass_query(ClassQuery::Binary("ASCII"))
2185         );
2186         assert_eq!(
2187             t(r"\p{gc:any}"),
2188             hir_uclass_query(ClassQuery::Binary("Any"))
2189         );
2190         assert_eq!(
2191             t(r"\p{gc:assigned}"),
2192             hir_uclass_query(ClassQuery::Binary("Assigned"))
2193         );
2194         assert_eq!(
2195             t(r"\p{gc:ascii}"),
2196             hir_uclass_query(ClassQuery::Binary("ASCII"))
2197         );
2198 
2199         assert_eq!(
2200             t_err(r"(?-u)\pZ"),
2201             TestError {
2202                 kind: hir::ErrorKind::UnicodeNotAllowed,
2203                 span: Span::new(
2204                     Position::new(5, 1, 6),
2205                     Position::new(8, 1, 9)
2206                 ),
2207             }
2208         );
2209         assert_eq!(
2210             t_err(r"(?-u)\p{Separator}"),
2211             TestError {
2212                 kind: hir::ErrorKind::UnicodeNotAllowed,
2213                 span: Span::new(
2214                     Position::new(5, 1, 6),
2215                     Position::new(18, 1, 19)
2216                 ),
2217             }
2218         );
2219         assert_eq!(
2220             t_err(r"\pE"),
2221             TestError {
2222                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2223                 span: Span::new(
2224                     Position::new(0, 1, 1),
2225                     Position::new(3, 1, 4)
2226                 ),
2227             }
2228         );
2229         assert_eq!(
2230             t_err(r"\p{Foo}"),
2231             TestError {
2232                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2233                 span: Span::new(
2234                     Position::new(0, 1, 1),
2235                     Position::new(7, 1, 8)
2236                 ),
2237             }
2238         );
2239         assert_eq!(
2240             t_err(r"\p{gc:Foo}"),
2241             TestError {
2242                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2243                 span: Span::new(
2244                     Position::new(0, 1, 1),
2245                     Position::new(10, 1, 11)
2246                 ),
2247             }
2248         );
2249     }
2250 
2251     #[test]
2252     #[cfg(not(feature = "unicode-gencat"))]
class_unicode_gencat_disabled()2253     fn class_unicode_gencat_disabled() {
2254         assert_eq!(
2255             t_err(r"\p{Separator}"),
2256             TestError {
2257                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2258                 span: Span::new(
2259                     Position::new(0, 1, 1),
2260                     Position::new(13, 1, 14)
2261                 ),
2262             }
2263         );
2264 
2265         assert_eq!(
2266             t_err(r"\p{Any}"),
2267             TestError {
2268                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2269                 span: Span::new(
2270                     Position::new(0, 1, 1),
2271                     Position::new(7, 1, 8)
2272                 ),
2273             }
2274         );
2275     }
2276 
2277     #[test]
2278     #[cfg(feature = "unicode-script")]
class_unicode_script()2279     fn class_unicode_script() {
2280         assert_eq!(
2281             t(r"\p{Greek}"),
2282             hir_uclass_query(ClassQuery::Binary("Greek"))
2283         );
2284         #[cfg(feature = "unicode-case")]
2285         assert_eq!(
2286             t(r"(?i)\p{Greek}"),
2287             hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2288         );
2289         #[cfg(feature = "unicode-case")]
2290         assert_eq!(
2291             t(r"(?i)\P{Greek}"),
2292             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2293                 "Greek"
2294             ))))
2295         );
2296 
2297         assert_eq!(
2298             t_err(r"\p{sc:Foo}"),
2299             TestError {
2300                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2301                 span: Span::new(
2302                     Position::new(0, 1, 1),
2303                     Position::new(10, 1, 11)
2304                 ),
2305             }
2306         );
2307         assert_eq!(
2308             t_err(r"\p{scx:Foo}"),
2309             TestError {
2310                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2311                 span: Span::new(
2312                     Position::new(0, 1, 1),
2313                     Position::new(11, 1, 12)
2314                 ),
2315             }
2316         );
2317     }
2318 
2319     #[test]
2320     #[cfg(not(feature = "unicode-script"))]
class_unicode_script_disabled()2321     fn class_unicode_script_disabled() {
2322         assert_eq!(
2323             t_err(r"\p{Greek}"),
2324             TestError {
2325                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2326                 span: Span::new(
2327                     Position::new(0, 1, 1),
2328                     Position::new(9, 1, 10)
2329                 ),
2330             }
2331         );
2332 
2333         assert_eq!(
2334             t_err(r"\p{scx:Greek}"),
2335             TestError {
2336                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2337                 span: Span::new(
2338                     Position::new(0, 1, 1),
2339                     Position::new(13, 1, 14)
2340                 ),
2341             }
2342         );
2343     }
2344 
2345     #[test]
2346     #[cfg(feature = "unicode-age")]
class_unicode_age()2347     fn class_unicode_age() {
2348         assert_eq!(
2349             t_err(r"\p{age:Foo}"),
2350             TestError {
2351                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2352                 span: Span::new(
2353                     Position::new(0, 1, 1),
2354                     Position::new(11, 1, 12)
2355                 ),
2356             }
2357         );
2358     }
2359 
2360     #[test]
2361     #[cfg(feature = "unicode-gencat")]
class_unicode_any_empty()2362     fn class_unicode_any_empty() {
2363         assert_eq!(
2364             t_err(r"\P{any}"),
2365             TestError {
2366                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2367                 span: Span::new(
2368                     Position::new(0, 1, 1),
2369                     Position::new(7, 1, 8)
2370                 ),
2371             }
2372         );
2373     }
2374 
2375     #[test]
2376     #[cfg(not(feature = "unicode-age"))]
class_unicode_age_disabled()2377     fn class_unicode_age_disabled() {
2378         assert_eq!(
2379             t_err(r"\p{age:3.0}"),
2380             TestError {
2381                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2382                 span: Span::new(
2383                     Position::new(0, 1, 1),
2384                     Position::new(11, 1, 12)
2385                 ),
2386             }
2387         );
2388     }
2389 
2390     #[test]
class_bracketed()2391     fn class_bracketed() {
2392         assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
2393         assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
2394         assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2395         assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2396         assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2397         assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2398         assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2399         assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2400         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2401         assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2402         #[cfg(feature = "unicode-gencat")]
2403         assert_eq!(
2404             t(r"[\pZ]"),
2405             hir_uclass_query(ClassQuery::Binary("separator"))
2406         );
2407         #[cfg(feature = "unicode-gencat")]
2408         assert_eq!(
2409             t(r"[\p{separator}]"),
2410             hir_uclass_query(ClassQuery::Binary("separator"))
2411         );
2412         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2413         assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2414         #[cfg(feature = "unicode-gencat")]
2415         assert_eq!(
2416             t(r"[^\PZ]"),
2417             hir_uclass_query(ClassQuery::Binary("separator"))
2418         );
2419         #[cfg(feature = "unicode-gencat")]
2420         assert_eq!(
2421             t(r"[^\P{separator}]"),
2422             hir_uclass_query(ClassQuery::Binary("separator"))
2423         );
2424         #[cfg(all(
2425             feature = "unicode-case",
2426             any(feature = "unicode-perl", feature = "unicode-gencat")
2427         ))]
2428         assert_eq!(
2429             t(r"(?i)[^\D]"),
2430             hir_uclass_query(ClassQuery::Binary("digit"))
2431         );
2432         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2433         assert_eq!(
2434             t(r"(?i)[^\P{greek}]"),
2435             hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2436         );
2437 
2438         assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2439         assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2440         assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2441 
2442         #[cfg(feature = "unicode-case")]
2443         assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2444         #[cfg(feature = "unicode-case")]
2445         assert_eq!(
2446             t("(?i)[k]"),
2447             hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2448         );
2449         #[cfg(feature = "unicode-case")]
2450         assert_eq!(
2451             t("(?i)[β]"),
2452             hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2453         );
2454         assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2455 
2456         assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
2457         assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
2458         assert_eq!(
2459             t_bytes("(?-u)[^a]"),
2460             hir_negate(hir_bclass(&[(b'a', b'a')]))
2461         );
2462         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2463         assert_eq!(
2464             t(r"[^\d]"),
2465             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2466         );
2467         #[cfg(feature = "unicode-gencat")]
2468         assert_eq!(
2469             t(r"[^\pZ]"),
2470             hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2471         );
2472         #[cfg(feature = "unicode-gencat")]
2473         assert_eq!(
2474             t(r"[^\p{separator}]"),
2475             hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2476         );
2477         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2478         assert_eq!(
2479             t(r"(?i)[^\p{greek}]"),
2480             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2481                 "greek"
2482             ))))
2483         );
2484         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2485         assert_eq!(
2486             t(r"(?i)[\P{greek}]"),
2487             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2488                 "greek"
2489             ))))
2490         );
2491 
2492         // Test some weird cases.
2493         assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2494 
2495         assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2496         assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2497         assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2498         assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2499         assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2500 
2501         assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2502         assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2503         assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2504         assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2505         assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2506 
2507         assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2508         assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2509         assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2510         assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2511         assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2512 
2513         assert_eq!(
2514             t_err("(?-u)[^a]"),
2515             TestError {
2516                 kind: hir::ErrorKind::InvalidUtf8,
2517                 span: Span::new(
2518                     Position::new(5, 1, 6),
2519                     Position::new(9, 1, 10)
2520                 ),
2521             }
2522         );
2523         #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2524         assert_eq!(
2525             t_err(r"[^\s\S]"),
2526             TestError {
2527                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2528                 span: Span::new(
2529                     Position::new(0, 1, 1),
2530                     Position::new(7, 1, 8)
2531                 ),
2532             }
2533         );
2534         #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2535         assert_eq!(
2536             t_err(r"(?-u)[^\s\S]"),
2537             TestError {
2538                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2539                 span: Span::new(
2540                     Position::new(5, 1, 6),
2541                     Position::new(12, 1, 13)
2542                 ),
2543             }
2544         );
2545     }
2546 
2547     #[test]
class_bracketed_union()2548     fn class_bracketed_union() {
2549         assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2550         #[cfg(feature = "unicode-gencat")]
2551         assert_eq!(
2552             t(r"[a\pZb]"),
2553             hir_union(
2554                 hir_uclass(&[('a', 'b')]),
2555                 hir_uclass_query(ClassQuery::Binary("separator"))
2556             )
2557         );
2558         #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2559         assert_eq!(
2560             t(r"[\pZ\p{Greek}]"),
2561             hir_union(
2562                 hir_uclass_query(ClassQuery::Binary("greek")),
2563                 hir_uclass_query(ClassQuery::Binary("separator"))
2564             )
2565         );
2566         #[cfg(all(
2567             feature = "unicode-age",
2568             feature = "unicode-gencat",
2569             feature = "unicode-script"
2570         ))]
2571         assert_eq!(
2572             t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2573             hir_union(
2574                 hir_uclass_query(ClassQuery::ByValue {
2575                     property_name: "age",
2576                     property_value: "3.0",
2577                 }),
2578                 hir_union(
2579                     hir_uclass_query(ClassQuery::Binary("greek")),
2580                     hir_uclass_query(ClassQuery::Binary("separator"))
2581                 )
2582             )
2583         );
2584         #[cfg(all(
2585             feature = "unicode-age",
2586             feature = "unicode-gencat",
2587             feature = "unicode-script"
2588         ))]
2589         assert_eq!(
2590             t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2591             hir_union(
2592                 hir_uclass_query(ClassQuery::ByValue {
2593                     property_name: "age",
2594                     property_value: "3.0",
2595                 }),
2596                 hir_union(
2597                     hir_uclass_query(ClassQuery::Binary("cyrillic")),
2598                     hir_union(
2599                         hir_uclass_query(ClassQuery::Binary("greek")),
2600                         hir_uclass_query(ClassQuery::Binary("separator"))
2601                     )
2602                 )
2603             )
2604         );
2605 
2606         #[cfg(all(
2607             feature = "unicode-age",
2608             feature = "unicode-case",
2609             feature = "unicode-gencat",
2610             feature = "unicode-script"
2611         ))]
2612         assert_eq!(
2613             t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2614             hir_case_fold(hir_union(
2615                 hir_uclass_query(ClassQuery::ByValue {
2616                     property_name: "age",
2617                     property_value: "3.0",
2618                 }),
2619                 hir_union(
2620                     hir_uclass_query(ClassQuery::Binary("greek")),
2621                     hir_uclass_query(ClassQuery::Binary("separator"))
2622                 )
2623             ))
2624         );
2625         #[cfg(all(
2626             feature = "unicode-age",
2627             feature = "unicode-gencat",
2628             feature = "unicode-script"
2629         ))]
2630         assert_eq!(
2631             t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2632             hir_negate(hir_union(
2633                 hir_uclass_query(ClassQuery::ByValue {
2634                     property_name: "age",
2635                     property_value: "3.0",
2636                 }),
2637                 hir_union(
2638                     hir_uclass_query(ClassQuery::Binary("greek")),
2639                     hir_uclass_query(ClassQuery::Binary("separator"))
2640                 )
2641             ))
2642         );
2643         #[cfg(all(
2644             feature = "unicode-age",
2645             feature = "unicode-case",
2646             feature = "unicode-gencat",
2647             feature = "unicode-script"
2648         ))]
2649         assert_eq!(
2650             t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2651             hir_negate(hir_case_fold(hir_union(
2652                 hir_uclass_query(ClassQuery::ByValue {
2653                     property_name: "age",
2654                     property_value: "3.0",
2655                 }),
2656                 hir_union(
2657                     hir_uclass_query(ClassQuery::Binary("greek")),
2658                     hir_uclass_query(ClassQuery::Binary("separator"))
2659                 )
2660             )))
2661         );
2662     }
2663 
2664     #[test]
class_bracketed_nested()2665     fn class_bracketed_nested() {
2666         assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2667         assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2668         assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2669 
2670         assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2671         assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2672 
2673         #[cfg(feature = "unicode-case")]
2674         assert_eq!(
2675             t(r"(?i)[a[^c]]"),
2676             hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2677         );
2678         #[cfg(feature = "unicode-case")]
2679         assert_eq!(
2680             t(r"(?i)[a-b[^c]]"),
2681             hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2682         );
2683 
2684         #[cfg(feature = "unicode-case")]
2685         assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2686         #[cfg(feature = "unicode-case")]
2687         assert_eq!(
2688             t(r"(?i)[^a-b[^c]]"),
2689             hir_uclass(&[('C', 'C'), ('c', 'c')])
2690         );
2691 
2692         assert_eq!(
2693             t_err(r"[^a-c[^c]]"),
2694             TestError {
2695                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2696                 span: Span::new(
2697                     Position::new(0, 1, 1),
2698                     Position::new(10, 1, 11)
2699                 ),
2700             }
2701         );
2702         #[cfg(feature = "unicode-case")]
2703         assert_eq!(
2704             t_err(r"(?i)[^a-c[^c]]"),
2705             TestError {
2706                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2707                 span: Span::new(
2708                     Position::new(4, 1, 5),
2709                     Position::new(14, 1, 15)
2710                 ),
2711             }
2712         );
2713     }
2714 
2715     #[test]
class_bracketed_intersect()2716     fn class_bracketed_intersect() {
2717         assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2718         assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2719         assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2720         assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2721         assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2722         assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2723         assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2724         assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2725         assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2726 
2727         assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2728         assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2729         assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2730         assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
2731         assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
2732         assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
2733 
2734         #[cfg(feature = "unicode-case")]
2735         assert_eq!(
2736             t("(?i)[abc&&b-c]"),
2737             hir_case_fold(hir_uclass(&[('b', 'c')]))
2738         );
2739         #[cfg(feature = "unicode-case")]
2740         assert_eq!(
2741             t("(?i)[abc&&[b-c]]"),
2742             hir_case_fold(hir_uclass(&[('b', 'c')]))
2743         );
2744         #[cfg(feature = "unicode-case")]
2745         assert_eq!(
2746             t("(?i)[[abc]&&[b-c]]"),
2747             hir_case_fold(hir_uclass(&[('b', 'c')]))
2748         );
2749         #[cfg(feature = "unicode-case")]
2750         assert_eq!(
2751             t("(?i)[a-z&&b-y&&c-x]"),
2752             hir_case_fold(hir_uclass(&[('c', 'x')]))
2753         );
2754         #[cfg(feature = "unicode-case")]
2755         assert_eq!(
2756             t("(?i)[c-da-b&&a-d]"),
2757             hir_case_fold(hir_uclass(&[('a', 'd')]))
2758         );
2759         #[cfg(feature = "unicode-case")]
2760         assert_eq!(
2761             t("(?i)[a-d&&c-da-b]"),
2762             hir_case_fold(hir_uclass(&[('a', 'd')]))
2763         );
2764 
2765         assert_eq!(
2766             t("(?i-u)[abc&&b-c]"),
2767             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2768         );
2769         assert_eq!(
2770             t("(?i-u)[abc&&[b-c]]"),
2771             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2772         );
2773         assert_eq!(
2774             t("(?i-u)[[abc]&&[b-c]]"),
2775             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2776         );
2777         assert_eq!(
2778             t("(?i-u)[a-z&&b-y&&c-x]"),
2779             hir_case_fold(hir_bclass(&[(b'c', b'x')]))
2780         );
2781         assert_eq!(
2782             t("(?i-u)[c-da-b&&a-d]"),
2783             hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2784         );
2785         assert_eq!(
2786             t("(?i-u)[a-d&&c-da-b]"),
2787             hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2788         );
2789 
2790         // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2791         // `^` is also allowed to be unescaped after `&&`.
2792         assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
2793         // `]` needs to be escaped after `&&` since it's not at start of class.
2794         assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
2795         assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
2796         assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
2797         assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
2798         // Test precedence.
2799         assert_eq!(
2800             t(r"[a-w&&[^c-g]z]"),
2801             hir_uclass(&[('a', 'b'), ('h', 'w')])
2802         );
2803     }
2804 
2805     #[test]
class_bracketed_intersect_negate()2806     fn class_bracketed_intersect_negate() {
2807         #[cfg(feature = "unicode-perl")]
2808         assert_eq!(
2809             t(r"[^\w&&\d]"),
2810             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2811         );
2812         assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2813         #[cfg(feature = "unicode-perl")]
2814         assert_eq!(
2815             t(r"[^[\w&&\d]]"),
2816             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2817         );
2818         #[cfg(feature = "unicode-perl")]
2819         assert_eq!(
2820             t(r"[^[^\w&&\d]]"),
2821             hir_uclass_query(ClassQuery::Binary("digit"))
2822         );
2823         #[cfg(feature = "unicode-perl")]
2824         assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2825 
2826         #[cfg(feature = "unicode-perl")]
2827         assert_eq!(
2828             t_bytes(r"(?-u)[^\w&&\d]"),
2829             hir_negate(hir_bclass_from_char(ascii_class(
2830                 &ast::ClassAsciiKind::Digit
2831             )))
2832         );
2833         assert_eq!(
2834             t_bytes(r"(?-u)[^[a-z&&a-c]]"),
2835             hir_negate(hir_bclass(&[(b'a', b'c')]))
2836         );
2837         assert_eq!(
2838             t_bytes(r"(?-u)[^[\w&&\d]]"),
2839             hir_negate(hir_bclass_from_char(ascii_class(
2840                 &ast::ClassAsciiKind::Digit
2841             )))
2842         );
2843         assert_eq!(
2844             t_bytes(r"(?-u)[^[^\w&&\d]]"),
2845             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2846         );
2847         assert_eq!(
2848             t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
2849             hir_negate(hir_bclass_from_char(ascii_class(
2850                 &ast::ClassAsciiKind::Word
2851             )))
2852         );
2853     }
2854 
2855     #[test]
class_bracketed_difference()2856     fn class_bracketed_difference() {
2857         #[cfg(feature = "unicode-gencat")]
2858         assert_eq!(
2859             t(r"[\pL--[:ascii:]]"),
2860             hir_difference(
2861                 hir_uclass_query(ClassQuery::Binary("letter")),
2862                 hir_uclass(&[('\0', '\x7F')])
2863             )
2864         );
2865 
2866         assert_eq!(
2867             t(r"(?-u)[[:alpha:]--[:lower:]]"),
2868             hir_bclass(&[(b'A', b'Z')])
2869         );
2870     }
2871 
2872     #[test]
class_bracketed_symmetric_difference()2873     fn class_bracketed_symmetric_difference() {
2874         #[cfg(feature = "unicode-script")]
2875         assert_eq!(
2876             t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
2877             hir_uclass(&[
2878                 ('\u{0342}', '\u{0342}'),
2879                 ('\u{0345}', '\u{0345}'),
2880                 ('\u{1DC0}', '\u{1DC1}'),
2881             ])
2882         );
2883         assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
2884 
2885         assert_eq!(
2886             t(r"(?-u)[a-g~~c-j]"),
2887             hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
2888         );
2889     }
2890 
2891     #[test]
ignore_whitespace()2892     fn ignore_whitespace() {
2893         assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
2894         assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
2895         assert_eq!(
2896             t(r"(?x)\x # comment
2897 { # comment
2898     53 # comment
2899 } #comment"),
2900             hir_lit("S")
2901         );
2902 
2903         assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
2904         assert_eq!(
2905             t(r"(?x)\x # comment
2906         53 # comment"),
2907             hir_lit("S")
2908         );
2909         assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
2910 
2911         #[cfg(feature = "unicode-gencat")]
2912         assert_eq!(
2913             t(r"(?x)\p # comment
2914 { # comment
2915     Separator # comment
2916 } # comment"),
2917             hir_uclass_query(ClassQuery::Binary("separator"))
2918         );
2919 
2920         assert_eq!(
2921             t(r"(?x)a # comment
2922 { # comment
2923     5 # comment
2924     , # comment
2925     10 # comment
2926 } # comment"),
2927             hir_range(
2928                 true,
2929                 hir::RepetitionRange::Bounded(5, 10),
2930                 hir_lit("a")
2931             )
2932         );
2933 
2934         assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
2935     }
2936 
2937     #[test]
analysis_is_always_utf8()2938     fn analysis_is_always_utf8() {
2939         // Positive examples.
2940         assert!(t_bytes(r"a").is_always_utf8());
2941         assert!(t_bytes(r"ab").is_always_utf8());
2942         assert!(t_bytes(r"(?-u)a").is_always_utf8());
2943         assert!(t_bytes(r"(?-u)ab").is_always_utf8());
2944         assert!(t_bytes(r"\xFF").is_always_utf8());
2945         assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
2946         assert!(t_bytes(r"[^a]").is_always_utf8());
2947         assert!(t_bytes(r"[^a][^a]").is_always_utf8());
2948         assert!(t_bytes(r"\b").is_always_utf8());
2949         assert!(t_bytes(r"\B").is_always_utf8());
2950         assert!(t_bytes(r"(?-u)\b").is_always_utf8());
2951 
2952         // Negative examples.
2953         assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
2954         assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
2955         assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
2956         assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
2957         assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
2958     }
2959 
2960     #[test]
analysis_is_all_assertions()2961     fn analysis_is_all_assertions() {
2962         // Positive examples.
2963         assert!(t(r"\b").is_all_assertions());
2964         assert!(t(r"\B").is_all_assertions());
2965         assert!(t(r"^").is_all_assertions());
2966         assert!(t(r"$").is_all_assertions());
2967         assert!(t(r"\A").is_all_assertions());
2968         assert!(t(r"\z").is_all_assertions());
2969         assert!(t(r"$^\z\A\b\B").is_all_assertions());
2970         assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
2971         assert!(t(r"^$|$^").is_all_assertions());
2972         assert!(t(r"((\b)+())*^").is_all_assertions());
2973 
2974         // Negative examples.
2975         assert!(!t(r"^a").is_all_assertions());
2976     }
2977 
2978     #[test]
analysis_is_anchored()2979     fn analysis_is_anchored() {
2980         // Positive examples.
2981         assert!(t(r"^").is_anchored_start());
2982         assert!(t(r"$").is_anchored_end());
2983         assert!(t(r"^").is_line_anchored_start());
2984         assert!(t(r"$").is_line_anchored_end());
2985 
2986         assert!(t(r"^^").is_anchored_start());
2987         assert!(t(r"$$").is_anchored_end());
2988         assert!(t(r"^^").is_line_anchored_start());
2989         assert!(t(r"$$").is_line_anchored_end());
2990 
2991         assert!(t(r"^$").is_anchored_start());
2992         assert!(t(r"^$").is_anchored_end());
2993         assert!(t(r"^$").is_line_anchored_start());
2994         assert!(t(r"^$").is_line_anchored_end());
2995 
2996         assert!(t(r"^foo").is_anchored_start());
2997         assert!(t(r"foo$").is_anchored_end());
2998         assert!(t(r"^foo").is_line_anchored_start());
2999         assert!(t(r"foo$").is_line_anchored_end());
3000 
3001         assert!(t(r"^foo|^bar").is_anchored_start());
3002         assert!(t(r"foo$|bar$").is_anchored_end());
3003         assert!(t(r"^foo|^bar").is_line_anchored_start());
3004         assert!(t(r"foo$|bar$").is_line_anchored_end());
3005 
3006         assert!(t(r"^(foo|bar)").is_anchored_start());
3007         assert!(t(r"(foo|bar)$").is_anchored_end());
3008         assert!(t(r"^(foo|bar)").is_line_anchored_start());
3009         assert!(t(r"(foo|bar)$").is_line_anchored_end());
3010 
3011         assert!(t(r"^+").is_anchored_start());
3012         assert!(t(r"$+").is_anchored_end());
3013         assert!(t(r"^+").is_line_anchored_start());
3014         assert!(t(r"$+").is_line_anchored_end());
3015         assert!(t(r"^++").is_anchored_start());
3016         assert!(t(r"$++").is_anchored_end());
3017         assert!(t(r"^++").is_line_anchored_start());
3018         assert!(t(r"$++").is_line_anchored_end());
3019         assert!(t(r"(^)+").is_anchored_start());
3020         assert!(t(r"($)+").is_anchored_end());
3021         assert!(t(r"(^)+").is_line_anchored_start());
3022         assert!(t(r"($)+").is_line_anchored_end());
3023 
3024         assert!(t(r"$^").is_anchored_start());
3025         assert!(t(r"$^").is_anchored_start());
3026         assert!(t(r"$^").is_line_anchored_end());
3027         assert!(t(r"$^").is_line_anchored_end());
3028         assert!(t(r"$^|^$").is_anchored_start());
3029         assert!(t(r"$^|^$").is_anchored_end());
3030         assert!(t(r"$^|^$").is_line_anchored_start());
3031         assert!(t(r"$^|^$").is_line_anchored_end());
3032 
3033         assert!(t(r"\b^").is_anchored_start());
3034         assert!(t(r"$\b").is_anchored_end());
3035         assert!(t(r"\b^").is_line_anchored_start());
3036         assert!(t(r"$\b").is_line_anchored_end());
3037         assert!(t(r"^(?m:^)").is_anchored_start());
3038         assert!(t(r"(?m:$)$").is_anchored_end());
3039         assert!(t(r"^(?m:^)").is_line_anchored_start());
3040         assert!(t(r"(?m:$)$").is_line_anchored_end());
3041         assert!(t(r"(?m:^)^").is_anchored_start());
3042         assert!(t(r"$(?m:$)").is_anchored_end());
3043         assert!(t(r"(?m:^)^").is_line_anchored_start());
3044         assert!(t(r"$(?m:$)").is_line_anchored_end());
3045 
3046         // Negative examples.
3047         assert!(!t(r"(?m)^").is_anchored_start());
3048         assert!(!t(r"(?m)$").is_anchored_end());
3049         assert!(!t(r"(?m:^$)|$^").is_anchored_start());
3050         assert!(!t(r"(?m:^$)|$^").is_anchored_end());
3051         assert!(!t(r"$^|(?m:^$)").is_anchored_start());
3052         assert!(!t(r"$^|(?m:^$)").is_anchored_end());
3053 
3054         assert!(!t(r"a^").is_anchored_start());
3055         assert!(!t(r"$a").is_anchored_start());
3056         assert!(!t(r"a^").is_line_anchored_start());
3057         assert!(!t(r"$a").is_line_anchored_start());
3058 
3059         assert!(!t(r"a^").is_anchored_end());
3060         assert!(!t(r"$a").is_anchored_end());
3061         assert!(!t(r"a^").is_line_anchored_end());
3062         assert!(!t(r"$a").is_line_anchored_end());
3063 
3064         assert!(!t(r"^foo|bar").is_anchored_start());
3065         assert!(!t(r"foo|bar$").is_anchored_end());
3066         assert!(!t(r"^foo|bar").is_line_anchored_start());
3067         assert!(!t(r"foo|bar$").is_line_anchored_end());
3068 
3069         assert!(!t(r"^*").is_anchored_start());
3070         assert!(!t(r"$*").is_anchored_end());
3071         assert!(!t(r"^*").is_line_anchored_start());
3072         assert!(!t(r"$*").is_line_anchored_end());
3073         assert!(!t(r"^*+").is_anchored_start());
3074         assert!(!t(r"$*+").is_anchored_end());
3075         assert!(!t(r"^*+").is_line_anchored_start());
3076         assert!(!t(r"$*+").is_line_anchored_end());
3077         assert!(!t(r"^+*").is_anchored_start());
3078         assert!(!t(r"$+*").is_anchored_end());
3079         assert!(!t(r"^+*").is_line_anchored_start());
3080         assert!(!t(r"$+*").is_line_anchored_end());
3081         assert!(!t(r"(^)*").is_anchored_start());
3082         assert!(!t(r"($)*").is_anchored_end());
3083         assert!(!t(r"(^)*").is_line_anchored_start());
3084         assert!(!t(r"($)*").is_line_anchored_end());
3085     }
3086 
3087     #[test]
analysis_is_line_anchored()3088     fn analysis_is_line_anchored() {
3089         assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
3090         assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
3091 
3092         assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
3093         assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
3094 
3095         assert!(t(r"(?m)^").is_line_anchored_start());
3096         assert!(t(r"(?m)$").is_line_anchored_end());
3097 
3098         assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
3099         assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
3100 
3101         assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
3102         assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
3103     }
3104 
3105     #[test]
analysis_is_any_anchored()3106     fn analysis_is_any_anchored() {
3107         // Positive examples.
3108         assert!(t(r"^").is_any_anchored_start());
3109         assert!(t(r"$").is_any_anchored_end());
3110         assert!(t(r"\A").is_any_anchored_start());
3111         assert!(t(r"\z").is_any_anchored_end());
3112 
3113         // Negative examples.
3114         assert!(!t(r"(?m)^").is_any_anchored_start());
3115         assert!(!t(r"(?m)$").is_any_anchored_end());
3116         assert!(!t(r"$").is_any_anchored_start());
3117         assert!(!t(r"^").is_any_anchored_end());
3118     }
3119 
3120     #[test]
analysis_is_match_empty()3121     fn analysis_is_match_empty() {
3122         // Positive examples.
3123         assert!(t(r"").is_match_empty());
3124         assert!(t(r"()").is_match_empty());
3125         assert!(t(r"()*").is_match_empty());
3126         assert!(t(r"()+").is_match_empty());
3127         assert!(t(r"()?").is_match_empty());
3128         assert!(t(r"a*").is_match_empty());
3129         assert!(t(r"a?").is_match_empty());
3130         assert!(t(r"a{0}").is_match_empty());
3131         assert!(t(r"a{0,}").is_match_empty());
3132         assert!(t(r"a{0,1}").is_match_empty());
3133         assert!(t(r"a{0,10}").is_match_empty());
3134         #[cfg(feature = "unicode-gencat")]
3135         assert!(t(r"\pL*").is_match_empty());
3136         assert!(t(r"a*|b").is_match_empty());
3137         assert!(t(r"b|a*").is_match_empty());
3138         assert!(t(r"a|").is_match_empty());
3139         assert!(t(r"|a").is_match_empty());
3140         assert!(t(r"a||b").is_match_empty());
3141         assert!(t(r"a*a?(abcd)*").is_match_empty());
3142         assert!(t(r"^").is_match_empty());
3143         assert!(t(r"$").is_match_empty());
3144         assert!(t(r"(?m)^").is_match_empty());
3145         assert!(t(r"(?m)$").is_match_empty());
3146         assert!(t(r"\A").is_match_empty());
3147         assert!(t(r"\z").is_match_empty());
3148         assert!(t(r"\B").is_match_empty());
3149         assert!(t_bytes(r"(?-u)\B").is_match_empty());
3150         assert!(t(r"\b").is_match_empty());
3151         assert!(t(r"(?-u)\b").is_match_empty());
3152 
3153         // Negative examples.
3154         assert!(!t(r"a+").is_match_empty());
3155         assert!(!t(r"a{1}").is_match_empty());
3156         assert!(!t(r"a{1,}").is_match_empty());
3157         assert!(!t(r"a{1,2}").is_match_empty());
3158         assert!(!t(r"a{1,10}").is_match_empty());
3159         assert!(!t(r"b|a").is_match_empty());
3160         assert!(!t(r"a*a+(abcd)*").is_match_empty());
3161     }
3162 
3163     #[test]
analysis_is_literal()3164     fn analysis_is_literal() {
3165         // Positive examples.
3166         assert!(t(r"a").is_literal());
3167         assert!(t(r"ab").is_literal());
3168         assert!(t(r"abc").is_literal());
3169         assert!(t(r"(?m)abc").is_literal());
3170 
3171         // Negative examples.
3172         assert!(!t(r"").is_literal());
3173         assert!(!t(r"^").is_literal());
3174         assert!(!t(r"a|b").is_literal());
3175         assert!(!t(r"(a)").is_literal());
3176         assert!(!t(r"a+").is_literal());
3177         assert!(!t(r"foo(a)").is_literal());
3178         assert!(!t(r"(a)foo").is_literal());
3179         assert!(!t(r"[a]").is_literal());
3180     }
3181 
3182     #[test]
analysis_is_alternation_literal()3183     fn analysis_is_alternation_literal() {
3184         // Positive examples.
3185         assert!(t(r"a").is_alternation_literal());
3186         assert!(t(r"ab").is_alternation_literal());
3187         assert!(t(r"abc").is_alternation_literal());
3188         assert!(t(r"(?m)abc").is_alternation_literal());
3189         assert!(t(r"a|b").is_alternation_literal());
3190         assert!(t(r"a|b|c").is_alternation_literal());
3191         assert!(t(r"foo|bar").is_alternation_literal());
3192         assert!(t(r"foo|bar|baz").is_alternation_literal());
3193 
3194         // Negative examples.
3195         assert!(!t(r"").is_alternation_literal());
3196         assert!(!t(r"^").is_alternation_literal());
3197         assert!(!t(r"(a)").is_alternation_literal());
3198         assert!(!t(r"a+").is_alternation_literal());
3199         assert!(!t(r"foo(a)").is_alternation_literal());
3200         assert!(!t(r"(a)foo").is_alternation_literal());
3201         assert!(!t(r"[a]").is_alternation_literal());
3202         assert!(!t(r"[a]|b").is_alternation_literal());
3203         assert!(!t(r"a|[b]").is_alternation_literal());
3204         assert!(!t(r"(a)|b").is_alternation_literal());
3205         assert!(!t(r"a|(b)").is_alternation_literal());
3206     }
3207 }
3208