1 /*!
2 Defines a translator that converts an `Ast` to an `Hir`.
3 */
4
5 use std::cell::{Cell, RefCell};
6 use std::result;
7
8 use ast::{self, Ast, Span, Visitor};
9 use hir::{self, Error, ErrorKind, Hir};
10 use unicode::{self, ClassQuery};
11
12 type Result<T> = result::Result<T, Error>;
13
14 /// A builder for constructing an AST->HIR translator.
15 #[derive(Clone, Debug)]
16 pub struct TranslatorBuilder {
17 allow_invalid_utf8: bool,
18 flags: Flags,
19 }
20
21 impl Default for TranslatorBuilder {
default() -> TranslatorBuilder22 fn default() -> TranslatorBuilder {
23 TranslatorBuilder::new()
24 }
25 }
26
27 impl TranslatorBuilder {
28 /// Create a new translator builder with a default c onfiguration.
new() -> TranslatorBuilder29 pub fn new() -> TranslatorBuilder {
30 TranslatorBuilder {
31 allow_invalid_utf8: false,
32 flags: Flags::default(),
33 }
34 }
35
36 /// Build a translator using the current configuration.
build(&self) -> Translator37 pub fn build(&self) -> Translator {
38 Translator {
39 stack: RefCell::new(vec![]),
40 flags: Cell::new(self.flags),
41 allow_invalid_utf8: self.allow_invalid_utf8,
42 }
43 }
44
45 /// When enabled, translation will permit the construction of a regular
46 /// expression that may match invalid UTF-8.
47 ///
48 /// When disabled (the default), the translator is guaranteed to produce
49 /// an expression that will only ever match valid UTF-8 (otherwise, the
50 /// translator will return an error).
51 ///
52 /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53 /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54 /// the parser to return an error. Namely, a negated ASCII word boundary
55 /// can result in matching positions that aren't valid UTF-8 boundaries.
allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder56 pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
57 self.allow_invalid_utf8 = yes;
58 self
59 }
60
61 /// Enable or disable the case insensitive flag (`i`) by default.
case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder62 pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
63 self.flags.case_insensitive = if yes { Some(true) } else { None };
64 self
65 }
66
67 /// Enable or disable the multi-line matching flag (`m`) by default.
multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder68 pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
69 self.flags.multi_line = if yes { Some(true) } else { None };
70 self
71 }
72
73 /// Enable or disable the "dot matches any character" flag (`s`) by
74 /// default.
dot_matches_new_line( &mut self, yes: bool, ) -> &mut TranslatorBuilder75 pub fn dot_matches_new_line(
76 &mut self,
77 yes: bool,
78 ) -> &mut TranslatorBuilder {
79 self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
80 self
81 }
82
83 /// Enable or disable the "swap greed" flag (`U`) by default.
swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder84 pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
85 self.flags.swap_greed = if yes { Some(true) } else { None };
86 self
87 }
88
89 /// Enable or disable the Unicode flag (`u`) by default.
unicode(&mut self, yes: bool) -> &mut TranslatorBuilder90 pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
91 self.flags.unicode = if yes { None } else { Some(false) };
92 self
93 }
94 }
95
96 /// A translator maps abstract syntax to a high level intermediate
97 /// representation.
98 ///
99 /// A translator may be benefit from reuse. That is, a translator can translate
100 /// many abstract syntax trees.
101 ///
102 /// A `Translator` can be configured in more detail via a
103 /// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104 #[derive(Clone, Debug)]
105 pub struct Translator {
106 /// Our call stack, but on the heap.
107 stack: RefCell<Vec<HirFrame>>,
108 /// The current flag settings.
109 flags: Cell<Flags>,
110 /// Whether we're allowed to produce HIR that can match arbitrary bytes.
111 allow_invalid_utf8: bool,
112 }
113
114 impl Translator {
115 /// Create a new translator using the default configuration.
new() -> Translator116 pub fn new() -> Translator {
117 TranslatorBuilder::new().build()
118 }
119
120 /// Translate the given abstract syntax tree (AST) into a high level
121 /// intermediate representation (HIR).
122 ///
123 /// If there was a problem doing the translation, then an HIR-specific
124 /// error is returned.
125 ///
126 /// The original pattern string used to produce the `Ast` *must* also be
127 /// provided. The translator does not use the pattern string during any
128 /// correct translation, but is used for error reporting.
translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir>129 pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
130 ast::visit(ast, TranslatorI::new(self, pattern))
131 }
132 }
133
134 /// An HirFrame is a single stack frame, represented explicitly, which is
135 /// created for each item in the Ast that we traverse.
136 ///
137 /// Note that technically, this type doesn't represent our entire stack
138 /// frame. In particular, the Ast visitor represents any state associated with
139 /// traversing the Ast itself.
140 #[derive(Clone, Debug)]
141 enum HirFrame {
142 /// An arbitrary HIR expression. These get pushed whenever we hit a base
143 /// case in the Ast. They get popped after an inductive (i.e., recursive)
144 /// step is complete.
145 Expr(Hir),
146 /// A Unicode character class. This frame is mutated as we descend into
147 /// the Ast of a character class (which is itself its own mini recursive
148 /// structure).
149 ClassUnicode(hir::ClassUnicode),
150 /// A byte-oriented character class. This frame is mutated as we descend
151 /// into the Ast of a character class (which is itself its own mini
152 /// recursive structure).
153 ///
154 /// Byte character classes are created when Unicode mode (`u`) is disabled.
155 /// If `allow_invalid_utf8` is disabled (the default), then a byte
156 /// character is only permitted to match ASCII text.
157 ClassBytes(hir::ClassBytes),
158 /// This is pushed on to the stack upon first seeing any kind of group,
159 /// indicated by parentheses (including non-capturing groups). It is popped
160 /// upon leaving a group.
161 Group {
162 /// The old active flags when this group was opened.
163 ///
164 /// If this group sets flags, then the new active flags are set to the
165 /// result of merging the old flags with the flags introduced by this
166 /// group. If the group doesn't set any flags, then this is simply
167 /// equivalent to whatever flags were set when the group was opened.
168 ///
169 /// When this group is popped, the active flags should be restored to
170 /// the flags set here.
171 ///
172 /// The "active" flags correspond to whatever flags are set in the
173 /// Translator.
174 old_flags: Flags,
175 },
176 /// This is pushed whenever a concatenation is observed. After visiting
177 /// every sub-expression in the concatenation, the translator's stack is
178 /// popped until it sees a Concat frame.
179 Concat,
180 /// This is pushed whenever an alternation is observed. After visiting
181 /// every sub-expression in the alternation, the translator's stack is
182 /// popped until it sees an Alternation frame.
183 Alternation,
184 }
185
186 impl HirFrame {
187 /// Assert that the current stack frame is an Hir expression and return it.
unwrap_expr(self) -> Hir188 fn unwrap_expr(self) -> Hir {
189 match self {
190 HirFrame::Expr(expr) => expr,
191 _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
192 }
193 }
194
195 /// Assert that the current stack frame is a Unicode class expression and
196 /// return it.
unwrap_class_unicode(self) -> hir::ClassUnicode197 fn unwrap_class_unicode(self) -> hir::ClassUnicode {
198 match self {
199 HirFrame::ClassUnicode(cls) => cls,
200 _ => panic!(
201 "tried to unwrap Unicode class \
202 from HirFrame, got: {:?}",
203 self
204 ),
205 }
206 }
207
208 /// Assert that the current stack frame is a byte class expression and
209 /// return it.
unwrap_class_bytes(self) -> hir::ClassBytes210 fn unwrap_class_bytes(self) -> hir::ClassBytes {
211 match self {
212 HirFrame::ClassBytes(cls) => cls,
213 _ => panic!(
214 "tried to unwrap byte class \
215 from HirFrame, got: {:?}",
216 self
217 ),
218 }
219 }
220
221 /// Assert that the current stack frame is a group indicator and return
222 /// its corresponding flags (the flags that were active at the time the
223 /// group was entered).
unwrap_group(self) -> Flags224 fn unwrap_group(self) -> Flags {
225 match self {
226 HirFrame::Group { old_flags } => old_flags,
227 _ => {
228 panic!("tried to unwrap group from HirFrame, got: {:?}", self)
229 }
230 }
231 }
232 }
233
234 impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
235 type Output = Hir;
236 type Err = Error;
237
finish(self) -> Result<Hir>238 fn finish(self) -> Result<Hir> {
239 // ... otherwise, we should have exactly one HIR on the stack.
240 assert_eq!(self.trans().stack.borrow().len(), 1);
241 Ok(self.pop().unwrap().unwrap_expr())
242 }
243
visit_pre(&mut self, ast: &Ast) -> Result<()>244 fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
245 match *ast {
246 Ast::Class(ast::Class::Bracketed(_)) => {
247 if self.flags().unicode() {
248 let cls = hir::ClassUnicode::empty();
249 self.push(HirFrame::ClassUnicode(cls));
250 } else {
251 let cls = hir::ClassBytes::empty();
252 self.push(HirFrame::ClassBytes(cls));
253 }
254 }
255 Ast::Group(ref x) => {
256 let old_flags = x
257 .flags()
258 .map(|ast| self.set_flags(ast))
259 .unwrap_or_else(|| self.flags());
260 self.push(HirFrame::Group { old_flags });
261 }
262 Ast::Concat(ref x) if x.asts.is_empty() => {}
263 Ast::Concat(_) => {
264 self.push(HirFrame::Concat);
265 }
266 Ast::Alternation(ref x) if x.asts.is_empty() => {}
267 Ast::Alternation(_) => {
268 self.push(HirFrame::Alternation);
269 }
270 _ => {}
271 }
272 Ok(())
273 }
274
visit_post(&mut self, ast: &Ast) -> Result<()>275 fn visit_post(&mut self, ast: &Ast) -> Result<()> {
276 match *ast {
277 Ast::Empty(_) => {
278 self.push(HirFrame::Expr(Hir::empty()));
279 }
280 Ast::Flags(ref x) => {
281 self.set_flags(&x.flags);
282 // Flags in the AST are generally considered directives and
283 // not actual sub-expressions. However, they can be used in
284 // the concrete syntax like `((?i))`, and we need some kind of
285 // indication of an expression there, and Empty is the correct
286 // choice.
287 //
288 // There can also be things like `(?i)+`, but we rule those out
289 // in the parser. In the future, we might allow them for
290 // consistency sake.
291 self.push(HirFrame::Expr(Hir::empty()));
292 }
293 Ast::Literal(ref x) => {
294 self.push(HirFrame::Expr(self.hir_literal(x)?));
295 }
296 Ast::Dot(span) => {
297 self.push(HirFrame::Expr(self.hir_dot(span)?));
298 }
299 Ast::Assertion(ref x) => {
300 self.push(HirFrame::Expr(self.hir_assertion(x)?));
301 }
302 Ast::Class(ast::Class::Perl(ref x)) => {
303 if self.flags().unicode() {
304 let cls = self.hir_perl_unicode_class(x)?;
305 let hcls = hir::Class::Unicode(cls);
306 self.push(HirFrame::Expr(Hir::class(hcls)));
307 } else {
308 let cls = self.hir_perl_byte_class(x);
309 let hcls = hir::Class::Bytes(cls);
310 self.push(HirFrame::Expr(Hir::class(hcls)));
311 }
312 }
313 Ast::Class(ast::Class::Unicode(ref x)) => {
314 let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
315 self.push(HirFrame::Expr(Hir::class(cls)));
316 }
317 Ast::Class(ast::Class::Bracketed(ref ast)) => {
318 if self.flags().unicode() {
319 let mut cls = self.pop().unwrap().unwrap_class_unicode();
320 self.unicode_fold_and_negate(
321 &ast.span,
322 ast.negated,
323 &mut cls,
324 )?;
325 if cls.ranges().is_empty() {
326 return Err(self.error(
327 ast.span,
328 ErrorKind::EmptyClassNotAllowed,
329 ));
330 }
331 let expr = Hir::class(hir::Class::Unicode(cls));
332 self.push(HirFrame::Expr(expr));
333 } else {
334 let mut cls = self.pop().unwrap().unwrap_class_bytes();
335 self.bytes_fold_and_negate(
336 &ast.span,
337 ast.negated,
338 &mut cls,
339 )?;
340 if cls.ranges().is_empty() {
341 return Err(self.error(
342 ast.span,
343 ErrorKind::EmptyClassNotAllowed,
344 ));
345 }
346
347 let expr = Hir::class(hir::Class::Bytes(cls));
348 self.push(HirFrame::Expr(expr));
349 }
350 }
351 Ast::Repetition(ref x) => {
352 let expr = self.pop().unwrap().unwrap_expr();
353 self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
354 }
355 Ast::Group(ref x) => {
356 let expr = self.pop().unwrap().unwrap_expr();
357 let old_flags = self.pop().unwrap().unwrap_group();
358 self.trans().flags.set(old_flags);
359 self.push(HirFrame::Expr(self.hir_group(x, expr)));
360 }
361 Ast::Concat(_) => {
362 let mut exprs = vec![];
363 while let Some(HirFrame::Expr(expr)) = self.pop() {
364 if !expr.kind().is_empty() {
365 exprs.push(expr);
366 }
367 }
368 exprs.reverse();
369 self.push(HirFrame::Expr(Hir::concat(exprs)));
370 }
371 Ast::Alternation(_) => {
372 let mut exprs = vec![];
373 while let Some(HirFrame::Expr(expr)) = self.pop() {
374 exprs.push(expr);
375 }
376 exprs.reverse();
377 self.push(HirFrame::Expr(Hir::alternation(exprs)));
378 }
379 }
380 Ok(())
381 }
382
visit_class_set_item_pre( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>383 fn visit_class_set_item_pre(
384 &mut self,
385 ast: &ast::ClassSetItem,
386 ) -> Result<()> {
387 match *ast {
388 ast::ClassSetItem::Bracketed(_) => {
389 if self.flags().unicode() {
390 let cls = hir::ClassUnicode::empty();
391 self.push(HirFrame::ClassUnicode(cls));
392 } else {
393 let cls = hir::ClassBytes::empty();
394 self.push(HirFrame::ClassBytes(cls));
395 }
396 }
397 // We needn't handle the Union case here since the visitor will
398 // do it for us.
399 _ => {}
400 }
401 Ok(())
402 }
403
visit_class_set_item_post( &mut self, ast: &ast::ClassSetItem, ) -> Result<()>404 fn visit_class_set_item_post(
405 &mut self,
406 ast: &ast::ClassSetItem,
407 ) -> Result<()> {
408 match *ast {
409 ast::ClassSetItem::Empty(_) => {}
410 ast::ClassSetItem::Literal(ref x) => {
411 if self.flags().unicode() {
412 let mut cls = self.pop().unwrap().unwrap_class_unicode();
413 cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
414 self.push(HirFrame::ClassUnicode(cls));
415 } else {
416 let mut cls = self.pop().unwrap().unwrap_class_bytes();
417 let byte = self.class_literal_byte(x)?;
418 cls.push(hir::ClassBytesRange::new(byte, byte));
419 self.push(HirFrame::ClassBytes(cls));
420 }
421 }
422 ast::ClassSetItem::Range(ref x) => {
423 if self.flags().unicode() {
424 let mut cls = self.pop().unwrap().unwrap_class_unicode();
425 cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
426 self.push(HirFrame::ClassUnicode(cls));
427 } else {
428 let mut cls = self.pop().unwrap().unwrap_class_bytes();
429 let start = self.class_literal_byte(&x.start)?;
430 let end = self.class_literal_byte(&x.end)?;
431 cls.push(hir::ClassBytesRange::new(start, end));
432 self.push(HirFrame::ClassBytes(cls));
433 }
434 }
435 ast::ClassSetItem::Ascii(ref x) => {
436 if self.flags().unicode() {
437 let mut cls = self.pop().unwrap().unwrap_class_unicode();
438 for &(s, e) in ascii_class(&x.kind) {
439 cls.push(hir::ClassUnicodeRange::new(s, e));
440 }
441 self.unicode_fold_and_negate(
442 &x.span, x.negated, &mut cls,
443 )?;
444 self.push(HirFrame::ClassUnicode(cls));
445 } else {
446 let mut cls = self.pop().unwrap().unwrap_class_bytes();
447 for &(s, e) in ascii_class(&x.kind) {
448 cls.push(hir::ClassBytesRange::new(s as u8, e as u8));
449 }
450 self.bytes_fold_and_negate(&x.span, x.negated, &mut cls)?;
451 self.push(HirFrame::ClassBytes(cls));
452 }
453 }
454 ast::ClassSetItem::Unicode(ref x) => {
455 let xcls = self.hir_unicode_class(x)?;
456 let mut cls = self.pop().unwrap().unwrap_class_unicode();
457 cls.union(&xcls);
458 self.push(HirFrame::ClassUnicode(cls));
459 }
460 ast::ClassSetItem::Perl(ref x) => {
461 if self.flags().unicode() {
462 let xcls = self.hir_perl_unicode_class(x)?;
463 let mut cls = self.pop().unwrap().unwrap_class_unicode();
464 cls.union(&xcls);
465 self.push(HirFrame::ClassUnicode(cls));
466 } else {
467 let xcls = self.hir_perl_byte_class(x);
468 let mut cls = self.pop().unwrap().unwrap_class_bytes();
469 cls.union(&xcls);
470 self.push(HirFrame::ClassBytes(cls));
471 }
472 }
473 ast::ClassSetItem::Bracketed(ref ast) => {
474 if self.flags().unicode() {
475 let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
476 self.unicode_fold_and_negate(
477 &ast.span,
478 ast.negated,
479 &mut cls1,
480 )?;
481
482 let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
483 cls2.union(&cls1);
484 self.push(HirFrame::ClassUnicode(cls2));
485 } else {
486 let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
487 self.bytes_fold_and_negate(
488 &ast.span,
489 ast.negated,
490 &mut cls1,
491 )?;
492
493 let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
494 cls2.union(&cls1);
495 self.push(HirFrame::ClassBytes(cls2));
496 }
497 }
498 // This is handled automatically by the visitor.
499 ast::ClassSetItem::Union(_) => {}
500 }
501 Ok(())
502 }
503
visit_class_set_binary_op_pre( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>504 fn visit_class_set_binary_op_pre(
505 &mut self,
506 _op: &ast::ClassSetBinaryOp,
507 ) -> Result<()> {
508 if self.flags().unicode() {
509 let cls = hir::ClassUnicode::empty();
510 self.push(HirFrame::ClassUnicode(cls));
511 } else {
512 let cls = hir::ClassBytes::empty();
513 self.push(HirFrame::ClassBytes(cls));
514 }
515 Ok(())
516 }
517
visit_class_set_binary_op_in( &mut self, _op: &ast::ClassSetBinaryOp, ) -> Result<()>518 fn visit_class_set_binary_op_in(
519 &mut self,
520 _op: &ast::ClassSetBinaryOp,
521 ) -> Result<()> {
522 if self.flags().unicode() {
523 let cls = hir::ClassUnicode::empty();
524 self.push(HirFrame::ClassUnicode(cls));
525 } else {
526 let cls = hir::ClassBytes::empty();
527 self.push(HirFrame::ClassBytes(cls));
528 }
529 Ok(())
530 }
531
visit_class_set_binary_op_post( &mut self, op: &ast::ClassSetBinaryOp, ) -> Result<()>532 fn visit_class_set_binary_op_post(
533 &mut self,
534 op: &ast::ClassSetBinaryOp,
535 ) -> Result<()> {
536 use ast::ClassSetBinaryOpKind::*;
537
538 if self.flags().unicode() {
539 let mut rhs = self.pop().unwrap().unwrap_class_unicode();
540 let mut lhs = self.pop().unwrap().unwrap_class_unicode();
541 let mut cls = self.pop().unwrap().unwrap_class_unicode();
542 if self.flags().case_insensitive() {
543 rhs.try_case_fold_simple().map_err(|_| {
544 self.error(
545 op.rhs.span().clone(),
546 ErrorKind::UnicodeCaseUnavailable,
547 )
548 })?;
549 lhs.try_case_fold_simple().map_err(|_| {
550 self.error(
551 op.lhs.span().clone(),
552 ErrorKind::UnicodeCaseUnavailable,
553 )
554 })?;
555 }
556 match op.kind {
557 Intersection => lhs.intersect(&rhs),
558 Difference => lhs.difference(&rhs),
559 SymmetricDifference => lhs.symmetric_difference(&rhs),
560 }
561 cls.union(&lhs);
562 self.push(HirFrame::ClassUnicode(cls));
563 } else {
564 let mut rhs = self.pop().unwrap().unwrap_class_bytes();
565 let mut lhs = self.pop().unwrap().unwrap_class_bytes();
566 let mut cls = self.pop().unwrap().unwrap_class_bytes();
567 if self.flags().case_insensitive() {
568 rhs.case_fold_simple();
569 lhs.case_fold_simple();
570 }
571 match op.kind {
572 Intersection => lhs.intersect(&rhs),
573 Difference => lhs.difference(&rhs),
574 SymmetricDifference => lhs.symmetric_difference(&rhs),
575 }
576 cls.union(&lhs);
577 self.push(HirFrame::ClassBytes(cls));
578 }
579 Ok(())
580 }
581 }
582
583 /// The internal implementation of a translator.
584 ///
585 /// This type is responsible for carrying around the original pattern string,
586 /// which is not tied to the internal state of a translator.
587 ///
588 /// A TranslatorI exists for the time it takes to translate a single Ast.
589 #[derive(Clone, Debug)]
590 struct TranslatorI<'t, 'p> {
591 trans: &'t Translator,
592 pattern: &'p str,
593 }
594
595 impl<'t, 'p> TranslatorI<'t, 'p> {
596 /// Build a new internal translator.
new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p>597 fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
598 TranslatorI { trans: trans, pattern: pattern }
599 }
600
601 /// Return a reference to the underlying translator.
trans(&self) -> &Translator602 fn trans(&self) -> &Translator {
603 &self.trans
604 }
605
606 /// Push the given frame on to the call stack.
push(&self, frame: HirFrame)607 fn push(&self, frame: HirFrame) {
608 self.trans().stack.borrow_mut().push(frame);
609 }
610
611 /// Pop the top of the call stack. If the call stack is empty, return None.
pop(&self) -> Option<HirFrame>612 fn pop(&self) -> Option<HirFrame> {
613 self.trans().stack.borrow_mut().pop()
614 }
615
616 /// Create a new error with the given span and error type.
error(&self, span: Span, kind: ErrorKind) -> Error617 fn error(&self, span: Span, kind: ErrorKind) -> Error {
618 Error { kind: kind, pattern: self.pattern.to_string(), span: span }
619 }
620
621 /// Return a copy of the active flags.
flags(&self) -> Flags622 fn flags(&self) -> Flags {
623 self.trans().flags.get()
624 }
625
626 /// Set the flags of this translator from the flags set in the given AST.
627 /// Then, return the old flags.
set_flags(&self, ast_flags: &ast::Flags) -> Flags628 fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
629 let old_flags = self.flags();
630 let mut new_flags = Flags::from_ast(ast_flags);
631 new_flags.merge(&old_flags);
632 self.trans().flags.set(new_flags);
633 old_flags
634 }
635
hir_literal(&self, lit: &ast::Literal) -> Result<Hir>636 fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
637 let ch = match self.literal_to_char(lit)? {
638 byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
639 hir::Literal::Unicode(ch) => ch,
640 };
641 if self.flags().case_insensitive() {
642 self.hir_from_char_case_insensitive(lit.span, ch)
643 } else {
644 self.hir_from_char(lit.span, ch)
645 }
646 }
647
648 /// Convert an Ast literal to its scalar representation.
649 ///
650 /// When Unicode mode is enabled, then this always succeeds and returns a
651 /// `char` (Unicode scalar value).
652 ///
653 /// When Unicode mode is disabled, then a raw byte is returned. If that
654 /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
655 /// an error.
literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal>656 fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
657 if self.flags().unicode() {
658 return Ok(hir::Literal::Unicode(lit.c));
659 }
660 let byte = match lit.byte() {
661 None => return Ok(hir::Literal::Unicode(lit.c)),
662 Some(byte) => byte,
663 };
664 if byte <= 0x7F {
665 return Ok(hir::Literal::Unicode(byte as char));
666 }
667 if !self.trans().allow_invalid_utf8 {
668 return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
669 }
670 Ok(hir::Literal::Byte(byte))
671 }
672
hir_from_char(&self, span: Span, c: char) -> Result<Hir>673 fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
674 if !self.flags().unicode() && c.len_utf8() > 1 {
675 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
676 }
677 Ok(Hir::literal(hir::Literal::Unicode(c)))
678 }
679
hir_from_char_case_insensitive( &self, span: Span, c: char, ) -> Result<Hir>680 fn hir_from_char_case_insensitive(
681 &self,
682 span: Span,
683 c: char,
684 ) -> Result<Hir> {
685 if self.flags().unicode() {
686 // If case folding won't do anything, then don't bother trying.
687 let map =
688 unicode::contains_simple_case_mapping(c, c).map_err(|_| {
689 self.error(span, ErrorKind::UnicodeCaseUnavailable)
690 })?;
691 if !map {
692 return self.hir_from_char(span, c);
693 }
694 let mut cls =
695 hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
696 c, c,
697 )]);
698 cls.try_case_fold_simple().map_err(|_| {
699 self.error(span, ErrorKind::UnicodeCaseUnavailable)
700 })?;
701 Ok(Hir::class(hir::Class::Unicode(cls)))
702 } else {
703 if c.len_utf8() > 1 {
704 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
705 }
706 // If case folding won't do anything, then don't bother trying.
707 match c {
708 'A'..='Z' | 'a'..='z' => {}
709 _ => return self.hir_from_char(span, c),
710 }
711 let mut cls =
712 hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
713 c as u8, c as u8,
714 )]);
715 cls.case_fold_simple();
716 Ok(Hir::class(hir::Class::Bytes(cls)))
717 }
718 }
719
hir_dot(&self, span: Span) -> Result<Hir>720 fn hir_dot(&self, span: Span) -> Result<Hir> {
721 let unicode = self.flags().unicode();
722 if !unicode && !self.trans().allow_invalid_utf8 {
723 return Err(self.error(span, ErrorKind::InvalidUtf8));
724 }
725 Ok(if self.flags().dot_matches_new_line() {
726 Hir::any(!unicode)
727 } else {
728 Hir::dot(!unicode)
729 })
730 }
731
hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir>732 fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
733 let unicode = self.flags().unicode();
734 let multi_line = self.flags().multi_line();
735 Ok(match asst.kind {
736 ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
737 hir::Anchor::StartLine
738 } else {
739 hir::Anchor::StartText
740 }),
741 ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
742 hir::Anchor::EndLine
743 } else {
744 hir::Anchor::EndText
745 }),
746 ast::AssertionKind::StartText => {
747 Hir::anchor(hir::Anchor::StartText)
748 }
749 ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
750 ast::AssertionKind::WordBoundary => {
751 Hir::word_boundary(if unicode {
752 hir::WordBoundary::Unicode
753 } else {
754 hir::WordBoundary::Ascii
755 })
756 }
757 ast::AssertionKind::NotWordBoundary => {
758 Hir::word_boundary(if unicode {
759 hir::WordBoundary::UnicodeNegate
760 } else {
761 // It is possible for negated ASCII word boundaries to
762 // match at invalid UTF-8 boundaries, even when searching
763 // valid UTF-8.
764 if !self.trans().allow_invalid_utf8 {
765 return Err(
766 self.error(asst.span, ErrorKind::InvalidUtf8)
767 );
768 }
769 hir::WordBoundary::AsciiNegate
770 })
771 }
772 })
773 }
774
hir_group(&self, group: &ast::Group, expr: Hir) -> Hir775 fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
776 let kind = match group.kind {
777 ast::GroupKind::CaptureIndex(idx) => {
778 hir::GroupKind::CaptureIndex(idx)
779 }
780 ast::GroupKind::CaptureName(ref capname) => {
781 hir::GroupKind::CaptureName {
782 name: capname.name.clone(),
783 index: capname.index,
784 }
785 }
786 ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
787 };
788 Hir::group(hir::Group { kind: kind, hir: Box::new(expr) })
789 }
790
hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir791 fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
792 let kind = match rep.op.kind {
793 ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
794 ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
795 ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
796 ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
797 hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
798 }
799 ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
800 hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
801 }
802 ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
803 m,
804 n,
805 )) => {
806 hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
807 }
808 };
809 let greedy =
810 if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
811 Hir::repetition(hir::Repetition {
812 kind: kind,
813 greedy: greedy,
814 hir: Box::new(expr),
815 })
816 }
817
hir_unicode_class( &self, ast_class: &ast::ClassUnicode, ) -> Result<hir::ClassUnicode>818 fn hir_unicode_class(
819 &self,
820 ast_class: &ast::ClassUnicode,
821 ) -> Result<hir::ClassUnicode> {
822 use ast::ClassUnicodeKind::*;
823
824 if !self.flags().unicode() {
825 return Err(
826 self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
827 );
828 }
829 let query = match ast_class.kind {
830 OneLetter(name) => ClassQuery::OneLetter(name),
831 Named(ref name) => ClassQuery::Binary(name),
832 NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
833 property_name: name,
834 property_value: value,
835 },
836 };
837 let mut result = self.convert_unicode_class_error(
838 &ast_class.span,
839 unicode::class(query),
840 );
841 if let Ok(ref mut class) = result {
842 self.unicode_fold_and_negate(
843 &ast_class.span,
844 ast_class.negated,
845 class,
846 )?;
847 if class.ranges().is_empty() {
848 let err = self
849 .error(ast_class.span, ErrorKind::EmptyClassNotAllowed);
850 return Err(err);
851 }
852 }
853 result
854 }
855
hir_perl_unicode_class( &self, ast_class: &ast::ClassPerl, ) -> Result<hir::ClassUnicode>856 fn hir_perl_unicode_class(
857 &self,
858 ast_class: &ast::ClassPerl,
859 ) -> Result<hir::ClassUnicode> {
860 use ast::ClassPerlKind::*;
861
862 assert!(self.flags().unicode());
863 let result = match ast_class.kind {
864 Digit => unicode::perl_digit(),
865 Space => unicode::perl_space(),
866 Word => unicode::perl_word(),
867 };
868 let mut class =
869 self.convert_unicode_class_error(&ast_class.span, result)?;
870 // We needn't apply case folding here because the Perl Unicode classes
871 // are already closed under Unicode simple case folding.
872 if ast_class.negated {
873 class.negate();
874 }
875 Ok(class)
876 }
877
hir_perl_byte_class( &self, ast_class: &ast::ClassPerl, ) -> hir::ClassBytes878 fn hir_perl_byte_class(
879 &self,
880 ast_class: &ast::ClassPerl,
881 ) -> hir::ClassBytes {
882 use ast::ClassPerlKind::*;
883
884 assert!(!self.flags().unicode());
885 let mut class = match ast_class.kind {
886 Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
887 Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
888 Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
889 };
890 // We needn't apply case folding here because the Perl ASCII classes
891 // are already closed (under ASCII case folding).
892 if ast_class.negated {
893 class.negate();
894 }
895 class
896 }
897
898 /// Converts the given Unicode specific error to an HIR translation error.
899 ///
900 /// The span given should approximate the position at which an error would
901 /// occur.
convert_unicode_class_error( &self, span: &Span, result: unicode::Result<hir::ClassUnicode>, ) -> Result<hir::ClassUnicode>902 fn convert_unicode_class_error(
903 &self,
904 span: &Span,
905 result: unicode::Result<hir::ClassUnicode>,
906 ) -> Result<hir::ClassUnicode> {
907 result.map_err(|err| {
908 let sp = span.clone();
909 match err {
910 unicode::Error::PropertyNotFound => {
911 self.error(sp, ErrorKind::UnicodePropertyNotFound)
912 }
913 unicode::Error::PropertyValueNotFound => {
914 self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
915 }
916 unicode::Error::PerlClassNotFound => {
917 self.error(sp, ErrorKind::UnicodePerlClassNotFound)
918 }
919 }
920 })
921 }
922
unicode_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassUnicode, ) -> Result<()>923 fn unicode_fold_and_negate(
924 &self,
925 span: &Span,
926 negated: bool,
927 class: &mut hir::ClassUnicode,
928 ) -> Result<()> {
929 // Note that we must apply case folding before negation!
930 // Consider `(?i)[^x]`. If we applied negation field, then
931 // the result would be the character class that matched any
932 // Unicode scalar value.
933 if self.flags().case_insensitive() {
934 class.try_case_fold_simple().map_err(|_| {
935 self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
936 })?;
937 }
938 if negated {
939 class.negate();
940 }
941 Ok(())
942 }
943
bytes_fold_and_negate( &self, span: &Span, negated: bool, class: &mut hir::ClassBytes, ) -> Result<()>944 fn bytes_fold_and_negate(
945 &self,
946 span: &Span,
947 negated: bool,
948 class: &mut hir::ClassBytes,
949 ) -> Result<()> {
950 // Note that we must apply case folding before negation!
951 // Consider `(?i)[^x]`. If we applied negation field, then
952 // the result would be the character class that matched any
953 // Unicode scalar value.
954 if self.flags().case_insensitive() {
955 class.case_fold_simple();
956 }
957 if negated {
958 class.negate();
959 }
960 if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
961 return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
962 }
963 Ok(())
964 }
965
966 /// Return a scalar byte value suitable for use as a literal in a byte
967 /// character class.
class_literal_byte(&self, ast: &ast::Literal) -> Result<u8>968 fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
969 match self.literal_to_char(ast)? {
970 hir::Literal::Byte(byte) => Ok(byte),
971 hir::Literal::Unicode(ch) => {
972 if ch <= 0x7F as char {
973 Ok(ch as u8)
974 } else {
975 // We can't feasibly support Unicode in
976 // byte oriented classes. Byte classes don't
977 // do Unicode case folding.
978 Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
979 }
980 }
981 }
982 }
983 }
984
985 /// A translator's representation of a regular expression's flags at any given
986 /// moment in time.
987 ///
988 /// Each flag can be in one of three states: absent, present but disabled or
989 /// present but enabled.
990 #[derive(Clone, Copy, Debug, Default)]
991 struct Flags {
992 case_insensitive: Option<bool>,
993 multi_line: Option<bool>,
994 dot_matches_new_line: Option<bool>,
995 swap_greed: Option<bool>,
996 unicode: Option<bool>,
997 // Note that `ignore_whitespace` is omitted here because it is handled
998 // entirely in the parser.
999 }
1000
1001 impl Flags {
from_ast(ast: &ast::Flags) -> Flags1002 fn from_ast(ast: &ast::Flags) -> Flags {
1003 let mut flags = Flags::default();
1004 let mut enable = true;
1005 for item in &ast.items {
1006 match item.kind {
1007 ast::FlagsItemKind::Negation => {
1008 enable = false;
1009 }
1010 ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1011 flags.case_insensitive = Some(enable);
1012 }
1013 ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1014 flags.multi_line = Some(enable);
1015 }
1016 ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1017 flags.dot_matches_new_line = Some(enable);
1018 }
1019 ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1020 flags.swap_greed = Some(enable);
1021 }
1022 ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1023 flags.unicode = Some(enable);
1024 }
1025 ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1026 }
1027 }
1028 flags
1029 }
1030
merge(&mut self, previous: &Flags)1031 fn merge(&mut self, previous: &Flags) {
1032 if self.case_insensitive.is_none() {
1033 self.case_insensitive = previous.case_insensitive;
1034 }
1035 if self.multi_line.is_none() {
1036 self.multi_line = previous.multi_line;
1037 }
1038 if self.dot_matches_new_line.is_none() {
1039 self.dot_matches_new_line = previous.dot_matches_new_line;
1040 }
1041 if self.swap_greed.is_none() {
1042 self.swap_greed = previous.swap_greed;
1043 }
1044 if self.unicode.is_none() {
1045 self.unicode = previous.unicode;
1046 }
1047 }
1048
case_insensitive(&self) -> bool1049 fn case_insensitive(&self) -> bool {
1050 self.case_insensitive.unwrap_or(false)
1051 }
1052
multi_line(&self) -> bool1053 fn multi_line(&self) -> bool {
1054 self.multi_line.unwrap_or(false)
1055 }
1056
dot_matches_new_line(&self) -> bool1057 fn dot_matches_new_line(&self) -> bool {
1058 self.dot_matches_new_line.unwrap_or(false)
1059 }
1060
swap_greed(&self) -> bool1061 fn swap_greed(&self) -> bool {
1062 self.swap_greed.unwrap_or(false)
1063 }
1064
unicode(&self) -> bool1065 fn unicode(&self) -> bool {
1066 self.unicode.unwrap_or(true)
1067 }
1068 }
1069
hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes1070 fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1071 let ranges: Vec<_> = ascii_class(kind)
1072 .iter()
1073 .cloned()
1074 .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
1075 .collect();
1076 hir::ClassBytes::new(ranges)
1077 }
1078
ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)]1079 fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
1080 use ast::ClassAsciiKind::*;
1081 match *kind {
1082 Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
1083 Alpha => &[('A', 'Z'), ('a', 'z')],
1084 Ascii => &[('\x00', '\x7F')],
1085 Blank => &[('\t', '\t'), (' ', ' ')],
1086 Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
1087 Digit => &[('0', '9')],
1088 Graph => &[('!', '~')],
1089 Lower => &[('a', 'z')],
1090 Print => &[(' ', '~')],
1091 Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
1092 Space => &[
1093 ('\t', '\t'),
1094 ('\n', '\n'),
1095 ('\x0B', '\x0B'),
1096 ('\x0C', '\x0C'),
1097 ('\r', '\r'),
1098 (' ', ' '),
1099 ],
1100 Upper => &[('A', 'Z')],
1101 Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
1102 Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
1103 }
1104 }
1105
1106 #[cfg(test)]
1107 mod tests {
1108 use ast::parse::ParserBuilder;
1109 use ast::{self, Ast, Position, Span};
1110 use hir::{self, Hir, HirKind};
1111 use unicode::{self, ClassQuery};
1112
1113 use super::{ascii_class, TranslatorBuilder};
1114
1115 // We create these errors to compare with real hir::Errors in the tests.
1116 // We define equality between TestError and hir::Error to disregard the
1117 // pattern string in hir::Error, which is annoying to provide in tests.
1118 #[derive(Clone, Debug)]
1119 struct TestError {
1120 span: Span,
1121 kind: hir::ErrorKind,
1122 }
1123
1124 impl PartialEq<hir::Error> for TestError {
eq(&self, other: &hir::Error) -> bool1125 fn eq(&self, other: &hir::Error) -> bool {
1126 self.span == other.span && self.kind == other.kind
1127 }
1128 }
1129
1130 impl PartialEq<TestError> for hir::Error {
eq(&self, other: &TestError) -> bool1131 fn eq(&self, other: &TestError) -> bool {
1132 self.span == other.span && self.kind == other.kind
1133 }
1134 }
1135
parse(pattern: &str) -> Ast1136 fn parse(pattern: &str) -> Ast {
1137 ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1138 }
1139
t(pattern: &str) -> Hir1140 fn t(pattern: &str) -> Hir {
1141 TranslatorBuilder::new()
1142 .allow_invalid_utf8(false)
1143 .build()
1144 .translate(pattern, &parse(pattern))
1145 .unwrap()
1146 }
1147
t_err(pattern: &str) -> hir::Error1148 fn t_err(pattern: &str) -> hir::Error {
1149 TranslatorBuilder::new()
1150 .allow_invalid_utf8(false)
1151 .build()
1152 .translate(pattern, &parse(pattern))
1153 .unwrap_err()
1154 }
1155
t_bytes(pattern: &str) -> Hir1156 fn t_bytes(pattern: &str) -> Hir {
1157 TranslatorBuilder::new()
1158 .allow_invalid_utf8(true)
1159 .build()
1160 .translate(pattern, &parse(pattern))
1161 .unwrap()
1162 }
1163
hir_lit(s: &str) -> Hir1164 fn hir_lit(s: &str) -> Hir {
1165 match s.len() {
1166 0 => Hir::empty(),
1167 _ => {
1168 let lits = s
1169 .chars()
1170 .map(hir::Literal::Unicode)
1171 .map(Hir::literal)
1172 .collect();
1173 Hir::concat(lits)
1174 }
1175 }
1176 }
1177
hir_blit(s: &[u8]) -> Hir1178 fn hir_blit(s: &[u8]) -> Hir {
1179 match s.len() {
1180 0 => Hir::empty(),
1181 1 => Hir::literal(hir::Literal::Byte(s[0])),
1182 _ => {
1183 let lits = s
1184 .iter()
1185 .cloned()
1186 .map(hir::Literal::Byte)
1187 .map(Hir::literal)
1188 .collect();
1189 Hir::concat(lits)
1190 }
1191 }
1192 }
1193
hir_group(i: u32, expr: Hir) -> Hir1194 fn hir_group(i: u32, expr: Hir) -> Hir {
1195 Hir::group(hir::Group {
1196 kind: hir::GroupKind::CaptureIndex(i),
1197 hir: Box::new(expr),
1198 })
1199 }
1200
hir_group_name(i: u32, name: &str, expr: Hir) -> Hir1201 fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
1202 Hir::group(hir::Group {
1203 kind: hir::GroupKind::CaptureName {
1204 name: name.to_string(),
1205 index: i,
1206 },
1207 hir: Box::new(expr),
1208 })
1209 }
1210
hir_group_nocap(expr: Hir) -> Hir1211 fn hir_group_nocap(expr: Hir) -> Hir {
1212 Hir::group(hir::Group {
1213 kind: hir::GroupKind::NonCapturing,
1214 hir: Box::new(expr),
1215 })
1216 }
1217
hir_quest(greedy: bool, expr: Hir) -> Hir1218 fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1219 Hir::repetition(hir::Repetition {
1220 kind: hir::RepetitionKind::ZeroOrOne,
1221 greedy: greedy,
1222 hir: Box::new(expr),
1223 })
1224 }
1225
hir_star(greedy: bool, expr: Hir) -> Hir1226 fn hir_star(greedy: bool, expr: Hir) -> Hir {
1227 Hir::repetition(hir::Repetition {
1228 kind: hir::RepetitionKind::ZeroOrMore,
1229 greedy: greedy,
1230 hir: Box::new(expr),
1231 })
1232 }
1233
hir_plus(greedy: bool, expr: Hir) -> Hir1234 fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1235 Hir::repetition(hir::Repetition {
1236 kind: hir::RepetitionKind::OneOrMore,
1237 greedy: greedy,
1238 hir: Box::new(expr),
1239 })
1240 }
1241
hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir1242 fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
1243 Hir::repetition(hir::Repetition {
1244 kind: hir::RepetitionKind::Range(range),
1245 greedy: greedy,
1246 hir: Box::new(expr),
1247 })
1248 }
1249
hir_alt(alts: Vec<Hir>) -> Hir1250 fn hir_alt(alts: Vec<Hir>) -> Hir {
1251 Hir::alternation(alts)
1252 }
1253
hir_cat(exprs: Vec<Hir>) -> Hir1254 fn hir_cat(exprs: Vec<Hir>) -> Hir {
1255 Hir::concat(exprs)
1256 }
1257
1258 #[allow(dead_code)]
hir_uclass_query(query: ClassQuery) -> Hir1259 fn hir_uclass_query(query: ClassQuery) -> Hir {
1260 Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1261 }
1262
1263 #[allow(dead_code)]
hir_uclass_perl_word() -> Hir1264 fn hir_uclass_perl_word() -> Hir {
1265 Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1266 }
1267
hir_uclass(ranges: &[(char, char)]) -> Hir1268 fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1269 let ranges: Vec<hir::ClassUnicodeRange> = ranges
1270 .iter()
1271 .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1272 .collect();
1273 Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
1274 }
1275
hir_bclass(ranges: &[(u8, u8)]) -> Hir1276 fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1277 let ranges: Vec<hir::ClassBytesRange> = ranges
1278 .iter()
1279 .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1280 .collect();
1281 Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1282 }
1283
hir_bclass_from_char(ranges: &[(char, char)]) -> Hir1284 fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
1285 let ranges: Vec<hir::ClassBytesRange> = ranges
1286 .iter()
1287 .map(|&(s, e)| {
1288 assert!(s as u32 <= 0x7F);
1289 assert!(e as u32 <= 0x7F);
1290 hir::ClassBytesRange::new(s as u8, e as u8)
1291 })
1292 .collect();
1293 Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1294 }
1295
hir_case_fold(expr: Hir) -> Hir1296 fn hir_case_fold(expr: Hir) -> Hir {
1297 match expr.into_kind() {
1298 HirKind::Class(mut cls) => {
1299 cls.case_fold_simple();
1300 Hir::class(cls)
1301 }
1302 _ => panic!("cannot case fold non-class Hir expr"),
1303 }
1304 }
1305
hir_negate(expr: Hir) -> Hir1306 fn hir_negate(expr: Hir) -> Hir {
1307 match expr.into_kind() {
1308 HirKind::Class(mut cls) => {
1309 cls.negate();
1310 Hir::class(cls)
1311 }
1312 _ => panic!("cannot negate non-class Hir expr"),
1313 }
1314 }
1315
1316 #[allow(dead_code)]
hir_union(expr1: Hir, expr2: Hir) -> Hir1317 fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1318 use hir::Class::{Bytes, Unicode};
1319
1320 match (expr1.into_kind(), expr2.into_kind()) {
1321 (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1322 c1.union(&c2);
1323 Hir::class(hir::Class::Unicode(c1))
1324 }
1325 (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1326 c1.union(&c2);
1327 Hir::class(hir::Class::Bytes(c1))
1328 }
1329 _ => panic!("cannot union non-class Hir exprs"),
1330 }
1331 }
1332
1333 #[allow(dead_code)]
hir_difference(expr1: Hir, expr2: Hir) -> Hir1334 fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1335 use hir::Class::{Bytes, Unicode};
1336
1337 match (expr1.into_kind(), expr2.into_kind()) {
1338 (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1339 c1.difference(&c2);
1340 Hir::class(hir::Class::Unicode(c1))
1341 }
1342 (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1343 c1.difference(&c2);
1344 Hir::class(hir::Class::Bytes(c1))
1345 }
1346 _ => panic!("cannot difference non-class Hir exprs"),
1347 }
1348 }
1349
hir_anchor(anchor: hir::Anchor) -> Hir1350 fn hir_anchor(anchor: hir::Anchor) -> Hir {
1351 Hir::anchor(anchor)
1352 }
1353
hir_word(wb: hir::WordBoundary) -> Hir1354 fn hir_word(wb: hir::WordBoundary) -> Hir {
1355 Hir::word_boundary(wb)
1356 }
1357
1358 #[test]
empty()1359 fn empty() {
1360 assert_eq!(t(""), Hir::empty());
1361 assert_eq!(t("(?i)"), Hir::empty());
1362 assert_eq!(t("()"), hir_group(1, Hir::empty()));
1363 assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1364 assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
1365 assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1366 assert_eq!(
1367 t("()|()"),
1368 hir_alt(vec![
1369 hir_group(1, Hir::empty()),
1370 hir_group(2, Hir::empty()),
1371 ])
1372 );
1373 assert_eq!(
1374 t("(|b)"),
1375 hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1376 );
1377 assert_eq!(
1378 t("(a|)"),
1379 hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1380 );
1381 assert_eq!(
1382 t("(a||c)"),
1383 hir_group(
1384 1,
1385 hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1386 )
1387 );
1388 assert_eq!(
1389 t("(||)"),
1390 hir_group(
1391 1,
1392 hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1393 )
1394 );
1395 }
1396
1397 #[test]
literal()1398 fn literal() {
1399 assert_eq!(t("a"), hir_lit("a"));
1400 assert_eq!(t("(?-u)a"), hir_lit("a"));
1401 assert_eq!(t("☃"), hir_lit("☃"));
1402 assert_eq!(t("abcd"), hir_lit("abcd"));
1403
1404 assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1405 assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1406 assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1407 assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1408
1409 assert_eq!(
1410 t_err("(?-u)☃"),
1411 TestError {
1412 kind: hir::ErrorKind::UnicodeNotAllowed,
1413 span: Span::new(
1414 Position::new(5, 1, 6),
1415 Position::new(8, 1, 7)
1416 ),
1417 }
1418 );
1419 assert_eq!(
1420 t_err(r"(?-u)\xFF"),
1421 TestError {
1422 kind: hir::ErrorKind::InvalidUtf8,
1423 span: Span::new(
1424 Position::new(5, 1, 6),
1425 Position::new(9, 1, 10)
1426 ),
1427 }
1428 );
1429 }
1430
1431 #[test]
literal_case_insensitive()1432 fn literal_case_insensitive() {
1433 #[cfg(feature = "unicode-case")]
1434 assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1435 #[cfg(feature = "unicode-case")]
1436 assert_eq!(
1437 t("(?i:a)"),
1438 hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
1439 );
1440 #[cfg(feature = "unicode-case")]
1441 assert_eq!(
1442 t("a(?i)a(?-i)a"),
1443 hir_cat(vec![
1444 hir_lit("a"),
1445 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1446 hir_lit("a"),
1447 ])
1448 );
1449 #[cfg(feature = "unicode-case")]
1450 assert_eq!(
1451 t("(?i)ab@c"),
1452 hir_cat(vec![
1453 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1454 hir_uclass(&[('B', 'B'), ('b', 'b')]),
1455 hir_lit("@"),
1456 hir_uclass(&[('C', 'C'), ('c', 'c')]),
1457 ])
1458 );
1459 #[cfg(feature = "unicode-case")]
1460 assert_eq!(
1461 t("(?i)β"),
1462 hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1463 );
1464
1465 assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1466 #[cfg(feature = "unicode-case")]
1467 assert_eq!(
1468 t("(?-u)a(?i)a(?-i)a"),
1469 hir_cat(vec![
1470 hir_lit("a"),
1471 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1472 hir_lit("a"),
1473 ])
1474 );
1475 assert_eq!(
1476 t("(?i-u)ab@c"),
1477 hir_cat(vec![
1478 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1479 hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1480 hir_lit("@"),
1481 hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1482 ])
1483 );
1484
1485 assert_eq!(
1486 t_bytes("(?i-u)a"),
1487 hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1488 );
1489 assert_eq!(
1490 t_bytes("(?i-u)\x61"),
1491 hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1492 );
1493 assert_eq!(
1494 t_bytes(r"(?i-u)\x61"),
1495 hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1496 );
1497 assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1498
1499 assert_eq!(
1500 t_err("(?i-u)β"),
1501 TestError {
1502 kind: hir::ErrorKind::UnicodeNotAllowed,
1503 span: Span::new(
1504 Position::new(6, 1, 7),
1505 Position::new(8, 1, 8),
1506 ),
1507 }
1508 );
1509 }
1510
1511 #[test]
dot()1512 fn dot() {
1513 assert_eq!(
1514 t("."),
1515 hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
1516 );
1517 assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
1518 assert_eq!(
1519 t_bytes("(?-u)."),
1520 hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
1521 );
1522 assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1523
1524 // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1525 assert_eq!(
1526 t_err("(?-u)."),
1527 TestError {
1528 kind: hir::ErrorKind::InvalidUtf8,
1529 span: Span::new(
1530 Position::new(5, 1, 6),
1531 Position::new(6, 1, 7)
1532 ),
1533 }
1534 );
1535 assert_eq!(
1536 t_err("(?s-u)."),
1537 TestError {
1538 kind: hir::ErrorKind::InvalidUtf8,
1539 span: Span::new(
1540 Position::new(6, 1, 7),
1541 Position::new(7, 1, 8)
1542 ),
1543 }
1544 );
1545 }
1546
1547 #[test]
assertions()1548 fn assertions() {
1549 assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
1550 assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
1551 assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
1552 assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
1553 assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
1554 assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
1555 assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
1556 assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
1557
1558 assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
1559 assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
1560 assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
1561 assert_eq!(
1562 t_bytes(r"(?-u)\B"),
1563 hir_word(hir::WordBoundary::AsciiNegate)
1564 );
1565
1566 assert_eq!(
1567 t_err(r"(?-u)\B"),
1568 TestError {
1569 kind: hir::ErrorKind::InvalidUtf8,
1570 span: Span::new(
1571 Position::new(5, 1, 6),
1572 Position::new(7, 1, 8)
1573 ),
1574 }
1575 );
1576 }
1577
1578 #[test]
group()1579 fn group() {
1580 assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
1581 assert_eq!(
1582 t("(a)(b)"),
1583 hir_cat(vec![
1584 hir_group(1, hir_lit("a")),
1585 hir_group(2, hir_lit("b")),
1586 ])
1587 );
1588 assert_eq!(
1589 t("(a)|(b)"),
1590 hir_alt(vec![
1591 hir_group(1, hir_lit("a")),
1592 hir_group(2, hir_lit("b")),
1593 ])
1594 );
1595 assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
1596 assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1597 assert_eq!(
1598 t("(?P<foo>a)(?P<bar>b)"),
1599 hir_cat(vec![
1600 hir_group_name(1, "foo", hir_lit("a")),
1601 hir_group_name(2, "bar", hir_lit("b")),
1602 ])
1603 );
1604 assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1605 assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1606 assert_eq!(
1607 t("(?:a)(b)"),
1608 hir_cat(vec![
1609 hir_group_nocap(hir_lit("a")),
1610 hir_group(1, hir_lit("b")),
1611 ])
1612 );
1613 assert_eq!(
1614 t("(a)(?:b)(c)"),
1615 hir_cat(vec![
1616 hir_group(1, hir_lit("a")),
1617 hir_group_nocap(hir_lit("b")),
1618 hir_group(2, hir_lit("c")),
1619 ])
1620 );
1621 assert_eq!(
1622 t("(a)(?P<foo>b)(c)"),
1623 hir_cat(vec![
1624 hir_group(1, hir_lit("a")),
1625 hir_group_name(2, "foo", hir_lit("b")),
1626 hir_group(3, hir_lit("c")),
1627 ])
1628 );
1629 assert_eq!(t("()"), hir_group(1, Hir::empty()));
1630 assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
1631 assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
1632 assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
1633 }
1634
1635 #[test]
flags()1636 fn flags() {
1637 #[cfg(feature = "unicode-case")]
1638 assert_eq!(
1639 t("(?i:a)a"),
1640 hir_cat(vec![
1641 hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
1642 hir_lit("a"),
1643 ])
1644 );
1645 assert_eq!(
1646 t("(?i-u:a)β"),
1647 hir_cat(vec![
1648 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1649 hir_lit("β"),
1650 ])
1651 );
1652 assert_eq!(
1653 t("(?:(?i-u)a)b"),
1654 hir_cat(vec![
1655 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1656 hir_lit("b"),
1657 ])
1658 );
1659 assert_eq!(
1660 t("((?i-u)a)b"),
1661 hir_cat(vec![
1662 hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1663 hir_lit("b"),
1664 ])
1665 );
1666 #[cfg(feature = "unicode-case")]
1667 assert_eq!(
1668 t("(?i)(?-i:a)a"),
1669 hir_cat(vec![
1670 hir_group_nocap(hir_lit("a")),
1671 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1672 ])
1673 );
1674 #[cfg(feature = "unicode-case")]
1675 assert_eq!(
1676 t("(?im)a^"),
1677 hir_cat(vec![
1678 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1679 hir_anchor(hir::Anchor::StartLine),
1680 ])
1681 );
1682 #[cfg(feature = "unicode-case")]
1683 assert_eq!(
1684 t("(?im)a^(?i-m)a^"),
1685 hir_cat(vec![
1686 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1687 hir_anchor(hir::Anchor::StartLine),
1688 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1689 hir_anchor(hir::Anchor::StartText),
1690 ])
1691 );
1692 assert_eq!(
1693 t("(?U)a*a*?(?-U)a*a*?"),
1694 hir_cat(vec![
1695 hir_star(false, hir_lit("a")),
1696 hir_star(true, hir_lit("a")),
1697 hir_star(true, hir_lit("a")),
1698 hir_star(false, hir_lit("a")),
1699 ])
1700 );
1701 #[cfg(feature = "unicode-case")]
1702 assert_eq!(
1703 t("(?:a(?i)a)a"),
1704 hir_cat(vec![
1705 hir_group_nocap(hir_cat(vec![
1706 hir_lit("a"),
1707 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1708 ])),
1709 hir_lit("a"),
1710 ])
1711 );
1712 #[cfg(feature = "unicode-case")]
1713 assert_eq!(
1714 t("(?i)(?:a(?-i)a)a"),
1715 hir_cat(vec![
1716 hir_group_nocap(hir_cat(vec![
1717 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1718 hir_lit("a"),
1719 ])),
1720 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1721 ])
1722 );
1723 }
1724
1725 #[test]
escape()1726 fn escape() {
1727 assert_eq!(
1728 t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1729 hir_lit(r"\.+*?()|[]{}^$#")
1730 );
1731 }
1732
1733 #[test]
repetition()1734 fn repetition() {
1735 assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
1736 assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
1737 assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
1738 assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
1739 assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
1740 assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
1741
1742 assert_eq!(
1743 t("a{1}"),
1744 hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1745 );
1746 assert_eq!(
1747 t("a{1,}"),
1748 hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1749 );
1750 assert_eq!(
1751 t("a{1,2}"),
1752 hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
1753 );
1754 assert_eq!(
1755 t("a{1}?"),
1756 hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1757 );
1758 assert_eq!(
1759 t("a{1,}?"),
1760 hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1761 );
1762 assert_eq!(
1763 t("a{1,2}?"),
1764 hir_range(
1765 false,
1766 hir::RepetitionRange::Bounded(1, 2),
1767 hir_lit("a"),
1768 )
1769 );
1770
1771 assert_eq!(
1772 t("ab?"),
1773 hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1774 );
1775 assert_eq!(
1776 t("(ab)?"),
1777 hir_quest(
1778 true,
1779 hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1780 )
1781 );
1782 assert_eq!(
1783 t("a|b?"),
1784 hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1785 );
1786 }
1787
1788 #[test]
cat_alt()1789 fn cat_alt() {
1790 assert_eq!(
1791 t("(ab)"),
1792 hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1793 );
1794 assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
1795 assert_eq!(
1796 t("a|b|c"),
1797 hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1798 );
1799 assert_eq!(
1800 t("ab|bc|cd"),
1801 hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1802 );
1803 assert_eq!(
1804 t("(a|b)"),
1805 hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
1806 );
1807 assert_eq!(
1808 t("(a|b|c)"),
1809 hir_group(
1810 1,
1811 hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1812 )
1813 );
1814 assert_eq!(
1815 t("(ab|bc|cd)"),
1816 hir_group(
1817 1,
1818 hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1819 )
1820 );
1821 assert_eq!(
1822 t("(ab|(bc|(cd)))"),
1823 hir_group(
1824 1,
1825 hir_alt(vec![
1826 hir_lit("ab"),
1827 hir_group(
1828 2,
1829 hir_alt(vec![
1830 hir_lit("bc"),
1831 hir_group(3, hir_lit("cd")),
1832 ])
1833 ),
1834 ])
1835 )
1836 );
1837 }
1838
1839 #[test]
class_ascii()1840 fn class_ascii() {
1841 assert_eq!(
1842 t("[[:alnum:]]"),
1843 hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
1844 );
1845 assert_eq!(
1846 t("[[:alpha:]]"),
1847 hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
1848 );
1849 assert_eq!(
1850 t("[[:ascii:]]"),
1851 hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
1852 );
1853 assert_eq!(
1854 t("[[:blank:]]"),
1855 hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
1856 );
1857 assert_eq!(
1858 t("[[:cntrl:]]"),
1859 hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
1860 );
1861 assert_eq!(
1862 t("[[:digit:]]"),
1863 hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
1864 );
1865 assert_eq!(
1866 t("[[:graph:]]"),
1867 hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
1868 );
1869 assert_eq!(
1870 t("[[:lower:]]"),
1871 hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
1872 );
1873 assert_eq!(
1874 t("[[:print:]]"),
1875 hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
1876 );
1877 assert_eq!(
1878 t("[[:punct:]]"),
1879 hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
1880 );
1881 assert_eq!(
1882 t("[[:space:]]"),
1883 hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
1884 );
1885 assert_eq!(
1886 t("[[:upper:]]"),
1887 hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
1888 );
1889 assert_eq!(
1890 t("[[:word:]]"),
1891 hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
1892 );
1893 assert_eq!(
1894 t("[[:xdigit:]]"),
1895 hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
1896 );
1897
1898 assert_eq!(
1899 t("[[:^lower:]]"),
1900 hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
1901 );
1902 #[cfg(feature = "unicode-case")]
1903 assert_eq!(
1904 t("(?i)[[:lower:]]"),
1905 hir_uclass(&[
1906 ('A', 'Z'),
1907 ('a', 'z'),
1908 ('\u{17F}', '\u{17F}'),
1909 ('\u{212A}', '\u{212A}'),
1910 ])
1911 );
1912
1913 assert_eq!(
1914 t("(?-u)[[:lower:]]"),
1915 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
1916 );
1917 assert_eq!(
1918 t("(?i-u)[[:lower:]]"),
1919 hir_case_fold(hir_bclass_from_char(ascii_class(
1920 &ast::ClassAsciiKind::Lower
1921 )))
1922 );
1923
1924 assert_eq!(
1925 t_err("(?-u)[[:^lower:]]"),
1926 TestError {
1927 kind: hir::ErrorKind::InvalidUtf8,
1928 span: Span::new(
1929 Position::new(6, 1, 7),
1930 Position::new(16, 1, 17)
1931 ),
1932 }
1933 );
1934 assert_eq!(
1935 t_err("(?i-u)[[:^lower:]]"),
1936 TestError {
1937 kind: hir::ErrorKind::InvalidUtf8,
1938 span: Span::new(
1939 Position::new(7, 1, 8),
1940 Position::new(17, 1, 18)
1941 ),
1942 }
1943 );
1944 }
1945
1946 #[test]
1947 #[cfg(feature = "unicode-perl")]
class_perl()1948 fn class_perl() {
1949 // Unicode
1950 assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
1951 assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
1952 assert_eq!(t(r"\w"), hir_uclass_perl_word());
1953 #[cfg(feature = "unicode-case")]
1954 assert_eq!(
1955 t(r"(?i)\d"),
1956 hir_uclass_query(ClassQuery::Binary("digit"))
1957 );
1958 #[cfg(feature = "unicode-case")]
1959 assert_eq!(
1960 t(r"(?i)\s"),
1961 hir_uclass_query(ClassQuery::Binary("space"))
1962 );
1963 #[cfg(feature = "unicode-case")]
1964 assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
1965
1966 // Unicode, negated
1967 assert_eq!(
1968 t(r"\D"),
1969 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1970 );
1971 assert_eq!(
1972 t(r"\S"),
1973 hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1974 );
1975 assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
1976 #[cfg(feature = "unicode-case")]
1977 assert_eq!(
1978 t(r"(?i)\D"),
1979 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
1980 );
1981 #[cfg(feature = "unicode-case")]
1982 assert_eq!(
1983 t(r"(?i)\S"),
1984 hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
1985 );
1986 #[cfg(feature = "unicode-case")]
1987 assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
1988
1989 // ASCII only
1990 assert_eq!(
1991 t(r"(?-u)\d"),
1992 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
1993 );
1994 assert_eq!(
1995 t(r"(?-u)\s"),
1996 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
1997 );
1998 assert_eq!(
1999 t(r"(?-u)\w"),
2000 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2001 );
2002 assert_eq!(
2003 t(r"(?i-u)\d"),
2004 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2005 );
2006 assert_eq!(
2007 t(r"(?i-u)\s"),
2008 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2009 );
2010 assert_eq!(
2011 t(r"(?i-u)\w"),
2012 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2013 );
2014
2015 // ASCII only, negated
2016 assert_eq!(
2017 t(r"(?-u)\D"),
2018 hir_negate(hir_bclass_from_char(ascii_class(
2019 &ast::ClassAsciiKind::Digit
2020 )))
2021 );
2022 assert_eq!(
2023 t(r"(?-u)\S"),
2024 hir_negate(hir_bclass_from_char(ascii_class(
2025 &ast::ClassAsciiKind::Space
2026 )))
2027 );
2028 assert_eq!(
2029 t(r"(?-u)\W"),
2030 hir_negate(hir_bclass_from_char(ascii_class(
2031 &ast::ClassAsciiKind::Word
2032 )))
2033 );
2034 assert_eq!(
2035 t(r"(?i-u)\D"),
2036 hir_negate(hir_bclass_from_char(ascii_class(
2037 &ast::ClassAsciiKind::Digit
2038 )))
2039 );
2040 assert_eq!(
2041 t(r"(?i-u)\S"),
2042 hir_negate(hir_bclass_from_char(ascii_class(
2043 &ast::ClassAsciiKind::Space
2044 )))
2045 );
2046 assert_eq!(
2047 t(r"(?i-u)\W"),
2048 hir_negate(hir_bclass_from_char(ascii_class(
2049 &ast::ClassAsciiKind::Word
2050 )))
2051 );
2052 }
2053
2054 #[test]
2055 #[cfg(not(feature = "unicode-perl"))]
class_perl_word_disabled()2056 fn class_perl_word_disabled() {
2057 assert_eq!(
2058 t_err(r"\w"),
2059 TestError {
2060 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2061 span: Span::new(
2062 Position::new(0, 1, 1),
2063 Position::new(2, 1, 3)
2064 ),
2065 }
2066 );
2067 }
2068
2069 #[test]
2070 #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
class_perl_space_disabled()2071 fn class_perl_space_disabled() {
2072 assert_eq!(
2073 t_err(r"\s"),
2074 TestError {
2075 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2076 span: Span::new(
2077 Position::new(0, 1, 1),
2078 Position::new(2, 1, 3)
2079 ),
2080 }
2081 );
2082 }
2083
2084 #[test]
2085 #[cfg(all(
2086 not(feature = "unicode-perl"),
2087 not(feature = "unicode-gencat")
2088 ))]
class_perl_digit_disabled()2089 fn class_perl_digit_disabled() {
2090 assert_eq!(
2091 t_err(r"\d"),
2092 TestError {
2093 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2094 span: Span::new(
2095 Position::new(0, 1, 1),
2096 Position::new(2, 1, 3)
2097 ),
2098 }
2099 );
2100 }
2101
2102 #[test]
2103 #[cfg(feature = "unicode-gencat")]
class_unicode_gencat()2104 fn class_unicode_gencat() {
2105 assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2106 assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2107 assert_eq!(
2108 t(r"\p{Separator}"),
2109 hir_uclass_query(ClassQuery::Binary("Z"))
2110 );
2111 assert_eq!(
2112 t(r"\p{se PaRa ToR}"),
2113 hir_uclass_query(ClassQuery::Binary("Z"))
2114 );
2115 assert_eq!(
2116 t(r"\p{gc:Separator}"),
2117 hir_uclass_query(ClassQuery::Binary("Z"))
2118 );
2119 assert_eq!(
2120 t(r"\p{gc=Separator}"),
2121 hir_uclass_query(ClassQuery::Binary("Z"))
2122 );
2123 assert_eq!(
2124 t(r"\p{Other}"),
2125 hir_uclass_query(ClassQuery::Binary("Other"))
2126 );
2127 assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2128
2129 assert_eq!(
2130 t(r"\PZ"),
2131 hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2132 );
2133 assert_eq!(
2134 t(r"\P{separator}"),
2135 hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2136 );
2137 assert_eq!(
2138 t(r"\P{gc!=separator}"),
2139 hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2140 );
2141
2142 assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2143 assert_eq!(
2144 t(r"\p{assigned}"),
2145 hir_uclass_query(ClassQuery::Binary("Assigned"))
2146 );
2147 assert_eq!(
2148 t(r"\p{ascii}"),
2149 hir_uclass_query(ClassQuery::Binary("ASCII"))
2150 );
2151 assert_eq!(
2152 t(r"\p{gc:any}"),
2153 hir_uclass_query(ClassQuery::Binary("Any"))
2154 );
2155 assert_eq!(
2156 t(r"\p{gc:assigned}"),
2157 hir_uclass_query(ClassQuery::Binary("Assigned"))
2158 );
2159 assert_eq!(
2160 t(r"\p{gc:ascii}"),
2161 hir_uclass_query(ClassQuery::Binary("ASCII"))
2162 );
2163
2164 assert_eq!(
2165 t_err(r"(?-u)\pZ"),
2166 TestError {
2167 kind: hir::ErrorKind::UnicodeNotAllowed,
2168 span: Span::new(
2169 Position::new(5, 1, 6),
2170 Position::new(8, 1, 9)
2171 ),
2172 }
2173 );
2174 assert_eq!(
2175 t_err(r"(?-u)\p{Separator}"),
2176 TestError {
2177 kind: hir::ErrorKind::UnicodeNotAllowed,
2178 span: Span::new(
2179 Position::new(5, 1, 6),
2180 Position::new(18, 1, 19)
2181 ),
2182 }
2183 );
2184 assert_eq!(
2185 t_err(r"\pE"),
2186 TestError {
2187 kind: hir::ErrorKind::UnicodePropertyNotFound,
2188 span: Span::new(
2189 Position::new(0, 1, 1),
2190 Position::new(3, 1, 4)
2191 ),
2192 }
2193 );
2194 assert_eq!(
2195 t_err(r"\p{Foo}"),
2196 TestError {
2197 kind: hir::ErrorKind::UnicodePropertyNotFound,
2198 span: Span::new(
2199 Position::new(0, 1, 1),
2200 Position::new(7, 1, 8)
2201 ),
2202 }
2203 );
2204 assert_eq!(
2205 t_err(r"\p{gc:Foo}"),
2206 TestError {
2207 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2208 span: Span::new(
2209 Position::new(0, 1, 1),
2210 Position::new(10, 1, 11)
2211 ),
2212 }
2213 );
2214 }
2215
2216 #[test]
2217 #[cfg(not(feature = "unicode-gencat"))]
class_unicode_gencat_disabled()2218 fn class_unicode_gencat_disabled() {
2219 assert_eq!(
2220 t_err(r"\p{Separator}"),
2221 TestError {
2222 kind: hir::ErrorKind::UnicodePropertyNotFound,
2223 span: Span::new(
2224 Position::new(0, 1, 1),
2225 Position::new(13, 1, 14)
2226 ),
2227 }
2228 );
2229
2230 assert_eq!(
2231 t_err(r"\p{Any}"),
2232 TestError {
2233 kind: hir::ErrorKind::UnicodePropertyNotFound,
2234 span: Span::new(
2235 Position::new(0, 1, 1),
2236 Position::new(7, 1, 8)
2237 ),
2238 }
2239 );
2240 }
2241
2242 #[test]
2243 #[cfg(feature = "unicode-script")]
class_unicode_script()2244 fn class_unicode_script() {
2245 assert_eq!(
2246 t(r"\p{Greek}"),
2247 hir_uclass_query(ClassQuery::Binary("Greek"))
2248 );
2249 #[cfg(feature = "unicode-case")]
2250 assert_eq!(
2251 t(r"(?i)\p{Greek}"),
2252 hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2253 );
2254 #[cfg(feature = "unicode-case")]
2255 assert_eq!(
2256 t(r"(?i)\P{Greek}"),
2257 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2258 "Greek"
2259 ))))
2260 );
2261
2262 assert_eq!(
2263 t_err(r"\p{sc:Foo}"),
2264 TestError {
2265 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2266 span: Span::new(
2267 Position::new(0, 1, 1),
2268 Position::new(10, 1, 11)
2269 ),
2270 }
2271 );
2272 assert_eq!(
2273 t_err(r"\p{scx:Foo}"),
2274 TestError {
2275 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2276 span: Span::new(
2277 Position::new(0, 1, 1),
2278 Position::new(11, 1, 12)
2279 ),
2280 }
2281 );
2282 }
2283
2284 #[test]
2285 #[cfg(not(feature = "unicode-script"))]
class_unicode_script_disabled()2286 fn class_unicode_script_disabled() {
2287 assert_eq!(
2288 t_err(r"\p{Greek}"),
2289 TestError {
2290 kind: hir::ErrorKind::UnicodePropertyNotFound,
2291 span: Span::new(
2292 Position::new(0, 1, 1),
2293 Position::new(9, 1, 10)
2294 ),
2295 }
2296 );
2297
2298 assert_eq!(
2299 t_err(r"\p{scx:Greek}"),
2300 TestError {
2301 kind: hir::ErrorKind::UnicodePropertyNotFound,
2302 span: Span::new(
2303 Position::new(0, 1, 1),
2304 Position::new(13, 1, 14)
2305 ),
2306 }
2307 );
2308 }
2309
2310 #[test]
2311 #[cfg(feature = "unicode-age")]
class_unicode_age()2312 fn class_unicode_age() {
2313 assert_eq!(
2314 t_err(r"\p{age:Foo}"),
2315 TestError {
2316 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2317 span: Span::new(
2318 Position::new(0, 1, 1),
2319 Position::new(11, 1, 12)
2320 ),
2321 }
2322 );
2323 }
2324
2325 #[test]
2326 #[cfg(feature = "unicode-gencat")]
class_unicode_any_empty()2327 fn class_unicode_any_empty() {
2328 assert_eq!(
2329 t_err(r"\P{any}"),
2330 TestError {
2331 kind: hir::ErrorKind::EmptyClassNotAllowed,
2332 span: Span::new(
2333 Position::new(0, 1, 1),
2334 Position::new(7, 1, 8)
2335 ),
2336 }
2337 );
2338 }
2339
2340 #[test]
2341 #[cfg(not(feature = "unicode-age"))]
class_unicode_age_disabled()2342 fn class_unicode_age_disabled() {
2343 assert_eq!(
2344 t_err(r"\p{age:3.0}"),
2345 TestError {
2346 kind: hir::ErrorKind::UnicodePropertyNotFound,
2347 span: Span::new(
2348 Position::new(0, 1, 1),
2349 Position::new(11, 1, 12)
2350 ),
2351 }
2352 );
2353 }
2354
2355 #[test]
class_bracketed()2356 fn class_bracketed() {
2357 assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
2358 assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
2359 assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2360 assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2361 assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2362 assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2363 assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2364 assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2365 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2366 assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2367 #[cfg(feature = "unicode-gencat")]
2368 assert_eq!(
2369 t(r"[\pZ]"),
2370 hir_uclass_query(ClassQuery::Binary("separator"))
2371 );
2372 #[cfg(feature = "unicode-gencat")]
2373 assert_eq!(
2374 t(r"[\p{separator}]"),
2375 hir_uclass_query(ClassQuery::Binary("separator"))
2376 );
2377 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2378 assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2379 #[cfg(feature = "unicode-gencat")]
2380 assert_eq!(
2381 t(r"[^\PZ]"),
2382 hir_uclass_query(ClassQuery::Binary("separator"))
2383 );
2384 #[cfg(feature = "unicode-gencat")]
2385 assert_eq!(
2386 t(r"[^\P{separator}]"),
2387 hir_uclass_query(ClassQuery::Binary("separator"))
2388 );
2389 #[cfg(all(
2390 feature = "unicode-case",
2391 any(feature = "unicode-perl", feature = "unicode-gencat")
2392 ))]
2393 assert_eq!(
2394 t(r"(?i)[^\D]"),
2395 hir_uclass_query(ClassQuery::Binary("digit"))
2396 );
2397 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2398 assert_eq!(
2399 t(r"(?i)[^\P{greek}]"),
2400 hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2401 );
2402
2403 assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2404 assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2405 assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2406
2407 #[cfg(feature = "unicode-case")]
2408 assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2409 #[cfg(feature = "unicode-case")]
2410 assert_eq!(
2411 t("(?i)[k]"),
2412 hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2413 );
2414 #[cfg(feature = "unicode-case")]
2415 assert_eq!(
2416 t("(?i)[β]"),
2417 hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2418 );
2419 assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2420
2421 assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
2422 assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
2423 assert_eq!(
2424 t_bytes("(?-u)[^a]"),
2425 hir_negate(hir_bclass(&[(b'a', b'a')]))
2426 );
2427 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2428 assert_eq!(
2429 t(r"[^\d]"),
2430 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2431 );
2432 #[cfg(feature = "unicode-gencat")]
2433 assert_eq!(
2434 t(r"[^\pZ]"),
2435 hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2436 );
2437 #[cfg(feature = "unicode-gencat")]
2438 assert_eq!(
2439 t(r"[^\p{separator}]"),
2440 hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2441 );
2442 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2443 assert_eq!(
2444 t(r"(?i)[^\p{greek}]"),
2445 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2446 "greek"
2447 ))))
2448 );
2449 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2450 assert_eq!(
2451 t(r"(?i)[\P{greek}]"),
2452 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2453 "greek"
2454 ))))
2455 );
2456
2457 // Test some weird cases.
2458 assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2459
2460 assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2461 assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2462 assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2463 assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2464 assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2465
2466 assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2467 assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2468 assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2469 assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2470 assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2471
2472 assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2473 assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2474 assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2475 assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2476 assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2477
2478 assert_eq!(
2479 t_err("(?-u)[^a]"),
2480 TestError {
2481 kind: hir::ErrorKind::InvalidUtf8,
2482 span: Span::new(
2483 Position::new(5, 1, 6),
2484 Position::new(9, 1, 10)
2485 ),
2486 }
2487 );
2488 #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2489 assert_eq!(
2490 t_err(r"[^\s\S]"),
2491 TestError {
2492 kind: hir::ErrorKind::EmptyClassNotAllowed,
2493 span: Span::new(
2494 Position::new(0, 1, 1),
2495 Position::new(7, 1, 8)
2496 ),
2497 }
2498 );
2499 #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2500 assert_eq!(
2501 t_err(r"(?-u)[^\s\S]"),
2502 TestError {
2503 kind: hir::ErrorKind::EmptyClassNotAllowed,
2504 span: Span::new(
2505 Position::new(5, 1, 6),
2506 Position::new(12, 1, 13)
2507 ),
2508 }
2509 );
2510 }
2511
2512 #[test]
class_bracketed_union()2513 fn class_bracketed_union() {
2514 assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2515 #[cfg(feature = "unicode-gencat")]
2516 assert_eq!(
2517 t(r"[a\pZb]"),
2518 hir_union(
2519 hir_uclass(&[('a', 'b')]),
2520 hir_uclass_query(ClassQuery::Binary("separator"))
2521 )
2522 );
2523 #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2524 assert_eq!(
2525 t(r"[\pZ\p{Greek}]"),
2526 hir_union(
2527 hir_uclass_query(ClassQuery::Binary("greek")),
2528 hir_uclass_query(ClassQuery::Binary("separator"))
2529 )
2530 );
2531 #[cfg(all(
2532 feature = "unicode-age",
2533 feature = "unicode-gencat",
2534 feature = "unicode-script"
2535 ))]
2536 assert_eq!(
2537 t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2538 hir_union(
2539 hir_uclass_query(ClassQuery::ByValue {
2540 property_name: "age",
2541 property_value: "3.0",
2542 }),
2543 hir_union(
2544 hir_uclass_query(ClassQuery::Binary("greek")),
2545 hir_uclass_query(ClassQuery::Binary("separator"))
2546 )
2547 )
2548 );
2549 #[cfg(all(
2550 feature = "unicode-age",
2551 feature = "unicode-gencat",
2552 feature = "unicode-script"
2553 ))]
2554 assert_eq!(
2555 t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2556 hir_union(
2557 hir_uclass_query(ClassQuery::ByValue {
2558 property_name: "age",
2559 property_value: "3.0",
2560 }),
2561 hir_union(
2562 hir_uclass_query(ClassQuery::Binary("cyrillic")),
2563 hir_union(
2564 hir_uclass_query(ClassQuery::Binary("greek")),
2565 hir_uclass_query(ClassQuery::Binary("separator"))
2566 )
2567 )
2568 )
2569 );
2570
2571 #[cfg(all(
2572 feature = "unicode-age",
2573 feature = "unicode-case",
2574 feature = "unicode-gencat",
2575 feature = "unicode-script"
2576 ))]
2577 assert_eq!(
2578 t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2579 hir_case_fold(hir_union(
2580 hir_uclass_query(ClassQuery::ByValue {
2581 property_name: "age",
2582 property_value: "3.0",
2583 }),
2584 hir_union(
2585 hir_uclass_query(ClassQuery::Binary("greek")),
2586 hir_uclass_query(ClassQuery::Binary("separator"))
2587 )
2588 ))
2589 );
2590 #[cfg(all(
2591 feature = "unicode-age",
2592 feature = "unicode-gencat",
2593 feature = "unicode-script"
2594 ))]
2595 assert_eq!(
2596 t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2597 hir_negate(hir_union(
2598 hir_uclass_query(ClassQuery::ByValue {
2599 property_name: "age",
2600 property_value: "3.0",
2601 }),
2602 hir_union(
2603 hir_uclass_query(ClassQuery::Binary("greek")),
2604 hir_uclass_query(ClassQuery::Binary("separator"))
2605 )
2606 ))
2607 );
2608 #[cfg(all(
2609 feature = "unicode-age",
2610 feature = "unicode-case",
2611 feature = "unicode-gencat",
2612 feature = "unicode-script"
2613 ))]
2614 assert_eq!(
2615 t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2616 hir_negate(hir_case_fold(hir_union(
2617 hir_uclass_query(ClassQuery::ByValue {
2618 property_name: "age",
2619 property_value: "3.0",
2620 }),
2621 hir_union(
2622 hir_uclass_query(ClassQuery::Binary("greek")),
2623 hir_uclass_query(ClassQuery::Binary("separator"))
2624 )
2625 )))
2626 );
2627 }
2628
2629 #[test]
class_bracketed_nested()2630 fn class_bracketed_nested() {
2631 assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2632 assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2633 assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2634
2635 assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2636 assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2637
2638 #[cfg(feature = "unicode-case")]
2639 assert_eq!(
2640 t(r"(?i)[a[^c]]"),
2641 hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2642 );
2643 #[cfg(feature = "unicode-case")]
2644 assert_eq!(
2645 t(r"(?i)[a-b[^c]]"),
2646 hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2647 );
2648
2649 #[cfg(feature = "unicode-case")]
2650 assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2651 #[cfg(feature = "unicode-case")]
2652 assert_eq!(
2653 t(r"(?i)[^a-b[^c]]"),
2654 hir_uclass(&[('C', 'C'), ('c', 'c')])
2655 );
2656
2657 assert_eq!(
2658 t_err(r"[^a-c[^c]]"),
2659 TestError {
2660 kind: hir::ErrorKind::EmptyClassNotAllowed,
2661 span: Span::new(
2662 Position::new(0, 1, 1),
2663 Position::new(10, 1, 11)
2664 ),
2665 }
2666 );
2667 #[cfg(feature = "unicode-case")]
2668 assert_eq!(
2669 t_err(r"(?i)[^a-c[^c]]"),
2670 TestError {
2671 kind: hir::ErrorKind::EmptyClassNotAllowed,
2672 span: Span::new(
2673 Position::new(4, 1, 5),
2674 Position::new(14, 1, 15)
2675 ),
2676 }
2677 );
2678 }
2679
2680 #[test]
class_bracketed_intersect()2681 fn class_bracketed_intersect() {
2682 assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2683 assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2684 assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2685 assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2686 assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2687 assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2688 assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2689 assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2690 assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2691
2692 assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2693 assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2694 assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2695 assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
2696 assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
2697 assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
2698
2699 #[cfg(feature = "unicode-case")]
2700 assert_eq!(
2701 t("(?i)[abc&&b-c]"),
2702 hir_case_fold(hir_uclass(&[('b', 'c')]))
2703 );
2704 #[cfg(feature = "unicode-case")]
2705 assert_eq!(
2706 t("(?i)[abc&&[b-c]]"),
2707 hir_case_fold(hir_uclass(&[('b', 'c')]))
2708 );
2709 #[cfg(feature = "unicode-case")]
2710 assert_eq!(
2711 t("(?i)[[abc]&&[b-c]]"),
2712 hir_case_fold(hir_uclass(&[('b', 'c')]))
2713 );
2714 #[cfg(feature = "unicode-case")]
2715 assert_eq!(
2716 t("(?i)[a-z&&b-y&&c-x]"),
2717 hir_case_fold(hir_uclass(&[('c', 'x')]))
2718 );
2719 #[cfg(feature = "unicode-case")]
2720 assert_eq!(
2721 t("(?i)[c-da-b&&a-d]"),
2722 hir_case_fold(hir_uclass(&[('a', 'd')]))
2723 );
2724 #[cfg(feature = "unicode-case")]
2725 assert_eq!(
2726 t("(?i)[a-d&&c-da-b]"),
2727 hir_case_fold(hir_uclass(&[('a', 'd')]))
2728 );
2729
2730 assert_eq!(
2731 t("(?i-u)[abc&&b-c]"),
2732 hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2733 );
2734 assert_eq!(
2735 t("(?i-u)[abc&&[b-c]]"),
2736 hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2737 );
2738 assert_eq!(
2739 t("(?i-u)[[abc]&&[b-c]]"),
2740 hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2741 );
2742 assert_eq!(
2743 t("(?i-u)[a-z&&b-y&&c-x]"),
2744 hir_case_fold(hir_bclass(&[(b'c', b'x')]))
2745 );
2746 assert_eq!(
2747 t("(?i-u)[c-da-b&&a-d]"),
2748 hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2749 );
2750 assert_eq!(
2751 t("(?i-u)[a-d&&c-da-b]"),
2752 hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2753 );
2754
2755 // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2756 // `^` is also allowed to be unescaped after `&&`.
2757 assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
2758 // `]` needs to be escaped after `&&` since it's not at start of class.
2759 assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
2760 assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
2761 assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
2762 assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
2763 // Test precedence.
2764 assert_eq!(
2765 t(r"[a-w&&[^c-g]z]"),
2766 hir_uclass(&[('a', 'b'), ('h', 'w')])
2767 );
2768 }
2769
2770 #[test]
class_bracketed_intersect_negate()2771 fn class_bracketed_intersect_negate() {
2772 #[cfg(feature = "unicode-perl")]
2773 assert_eq!(
2774 t(r"[^\w&&\d]"),
2775 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2776 );
2777 assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2778 #[cfg(feature = "unicode-perl")]
2779 assert_eq!(
2780 t(r"[^[\w&&\d]]"),
2781 hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2782 );
2783 #[cfg(feature = "unicode-perl")]
2784 assert_eq!(
2785 t(r"[^[^\w&&\d]]"),
2786 hir_uclass_query(ClassQuery::Binary("digit"))
2787 );
2788 #[cfg(feature = "unicode-perl")]
2789 assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2790
2791 #[cfg(feature = "unicode-perl")]
2792 assert_eq!(
2793 t_bytes(r"(?-u)[^\w&&\d]"),
2794 hir_negate(hir_bclass_from_char(ascii_class(
2795 &ast::ClassAsciiKind::Digit
2796 )))
2797 );
2798 assert_eq!(
2799 t_bytes(r"(?-u)[^[a-z&&a-c]]"),
2800 hir_negate(hir_bclass(&[(b'a', b'c')]))
2801 );
2802 assert_eq!(
2803 t_bytes(r"(?-u)[^[\w&&\d]]"),
2804 hir_negate(hir_bclass_from_char(ascii_class(
2805 &ast::ClassAsciiKind::Digit
2806 )))
2807 );
2808 assert_eq!(
2809 t_bytes(r"(?-u)[^[^\w&&\d]]"),
2810 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2811 );
2812 assert_eq!(
2813 t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
2814 hir_negate(hir_bclass_from_char(ascii_class(
2815 &ast::ClassAsciiKind::Word
2816 )))
2817 );
2818 }
2819
2820 #[test]
class_bracketed_difference()2821 fn class_bracketed_difference() {
2822 #[cfg(feature = "unicode-gencat")]
2823 assert_eq!(
2824 t(r"[\pL--[:ascii:]]"),
2825 hir_difference(
2826 hir_uclass_query(ClassQuery::Binary("letter")),
2827 hir_uclass(&[('\0', '\x7F')])
2828 )
2829 );
2830
2831 assert_eq!(
2832 t(r"(?-u)[[:alpha:]--[:lower:]]"),
2833 hir_bclass(&[(b'A', b'Z')])
2834 );
2835 }
2836
2837 #[test]
class_bracketed_symmetric_difference()2838 fn class_bracketed_symmetric_difference() {
2839 #[cfg(feature = "unicode-script")]
2840 assert_eq!(
2841 t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
2842 hir_uclass(&[
2843 ('\u{0342}', '\u{0342}'),
2844 ('\u{0345}', '\u{0345}'),
2845 ('\u{1DC0}', '\u{1DC1}'),
2846 ])
2847 );
2848 assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
2849
2850 assert_eq!(
2851 t(r"(?-u)[a-g~~c-j]"),
2852 hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
2853 );
2854 }
2855
2856 #[test]
ignore_whitespace()2857 fn ignore_whitespace() {
2858 assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
2859 assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
2860 assert_eq!(
2861 t(r"(?x)\x # comment
2862 { # comment
2863 53 # comment
2864 } #comment"),
2865 hir_lit("S")
2866 );
2867
2868 assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
2869 assert_eq!(
2870 t(r"(?x)\x # comment
2871 53 # comment"),
2872 hir_lit("S")
2873 );
2874 assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
2875
2876 #[cfg(feature = "unicode-gencat")]
2877 assert_eq!(
2878 t(r"(?x)\p # comment
2879 { # comment
2880 Separator # comment
2881 } # comment"),
2882 hir_uclass_query(ClassQuery::Binary("separator"))
2883 );
2884
2885 assert_eq!(
2886 t(r"(?x)a # comment
2887 { # comment
2888 5 # comment
2889 , # comment
2890 10 # comment
2891 } # comment"),
2892 hir_range(
2893 true,
2894 hir::RepetitionRange::Bounded(5, 10),
2895 hir_lit("a")
2896 )
2897 );
2898
2899 assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a "));
2900 }
2901
2902 #[test]
analysis_is_always_utf8()2903 fn analysis_is_always_utf8() {
2904 // Positive examples.
2905 assert!(t_bytes(r"a").is_always_utf8());
2906 assert!(t_bytes(r"ab").is_always_utf8());
2907 assert!(t_bytes(r"(?-u)a").is_always_utf8());
2908 assert!(t_bytes(r"(?-u)ab").is_always_utf8());
2909 assert!(t_bytes(r"\xFF").is_always_utf8());
2910 assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
2911 assert!(t_bytes(r"[^a]").is_always_utf8());
2912 assert!(t_bytes(r"[^a][^a]").is_always_utf8());
2913 assert!(t_bytes(r"\b").is_always_utf8());
2914 assert!(t_bytes(r"\B").is_always_utf8());
2915 assert!(t_bytes(r"(?-u)\b").is_always_utf8());
2916
2917 // Negative examples.
2918 assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
2919 assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
2920 assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
2921 assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
2922 assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
2923 }
2924
2925 #[test]
analysis_is_all_assertions()2926 fn analysis_is_all_assertions() {
2927 // Positive examples.
2928 assert!(t(r"\b").is_all_assertions());
2929 assert!(t(r"\B").is_all_assertions());
2930 assert!(t(r"^").is_all_assertions());
2931 assert!(t(r"$").is_all_assertions());
2932 assert!(t(r"\A").is_all_assertions());
2933 assert!(t(r"\z").is_all_assertions());
2934 assert!(t(r"$^\z\A\b\B").is_all_assertions());
2935 assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
2936 assert!(t(r"^$|$^").is_all_assertions());
2937 assert!(t(r"((\b)+())*^").is_all_assertions());
2938
2939 // Negative examples.
2940 assert!(!t(r"^a").is_all_assertions());
2941 }
2942
2943 #[test]
analysis_is_anchored()2944 fn analysis_is_anchored() {
2945 // Positive examples.
2946 assert!(t(r"^").is_anchored_start());
2947 assert!(t(r"$").is_anchored_end());
2948 assert!(t(r"^").is_line_anchored_start());
2949 assert!(t(r"$").is_line_anchored_end());
2950
2951 assert!(t(r"^^").is_anchored_start());
2952 assert!(t(r"$$").is_anchored_end());
2953 assert!(t(r"^^").is_line_anchored_start());
2954 assert!(t(r"$$").is_line_anchored_end());
2955
2956 assert!(t(r"^$").is_anchored_start());
2957 assert!(t(r"^$").is_anchored_end());
2958 assert!(t(r"^$").is_line_anchored_start());
2959 assert!(t(r"^$").is_line_anchored_end());
2960
2961 assert!(t(r"^foo").is_anchored_start());
2962 assert!(t(r"foo$").is_anchored_end());
2963 assert!(t(r"^foo").is_line_anchored_start());
2964 assert!(t(r"foo$").is_line_anchored_end());
2965
2966 assert!(t(r"^foo|^bar").is_anchored_start());
2967 assert!(t(r"foo$|bar$").is_anchored_end());
2968 assert!(t(r"^foo|^bar").is_line_anchored_start());
2969 assert!(t(r"foo$|bar$").is_line_anchored_end());
2970
2971 assert!(t(r"^(foo|bar)").is_anchored_start());
2972 assert!(t(r"(foo|bar)$").is_anchored_end());
2973 assert!(t(r"^(foo|bar)").is_line_anchored_start());
2974 assert!(t(r"(foo|bar)$").is_line_anchored_end());
2975
2976 assert!(t(r"^+").is_anchored_start());
2977 assert!(t(r"$+").is_anchored_end());
2978 assert!(t(r"^+").is_line_anchored_start());
2979 assert!(t(r"$+").is_line_anchored_end());
2980 assert!(t(r"^++").is_anchored_start());
2981 assert!(t(r"$++").is_anchored_end());
2982 assert!(t(r"^++").is_line_anchored_start());
2983 assert!(t(r"$++").is_line_anchored_end());
2984 assert!(t(r"(^)+").is_anchored_start());
2985 assert!(t(r"($)+").is_anchored_end());
2986 assert!(t(r"(^)+").is_line_anchored_start());
2987 assert!(t(r"($)+").is_line_anchored_end());
2988
2989 assert!(t(r"$^").is_anchored_start());
2990 assert!(t(r"$^").is_anchored_start());
2991 assert!(t(r"$^").is_line_anchored_end());
2992 assert!(t(r"$^").is_line_anchored_end());
2993 assert!(t(r"$^|^$").is_anchored_start());
2994 assert!(t(r"$^|^$").is_anchored_end());
2995 assert!(t(r"$^|^$").is_line_anchored_start());
2996 assert!(t(r"$^|^$").is_line_anchored_end());
2997
2998 assert!(t(r"\b^").is_anchored_start());
2999 assert!(t(r"$\b").is_anchored_end());
3000 assert!(t(r"\b^").is_line_anchored_start());
3001 assert!(t(r"$\b").is_line_anchored_end());
3002 assert!(t(r"^(?m:^)").is_anchored_start());
3003 assert!(t(r"(?m:$)$").is_anchored_end());
3004 assert!(t(r"^(?m:^)").is_line_anchored_start());
3005 assert!(t(r"(?m:$)$").is_line_anchored_end());
3006 assert!(t(r"(?m:^)^").is_anchored_start());
3007 assert!(t(r"$(?m:$)").is_anchored_end());
3008 assert!(t(r"(?m:^)^").is_line_anchored_start());
3009 assert!(t(r"$(?m:$)").is_line_anchored_end());
3010
3011 // Negative examples.
3012 assert!(!t(r"(?m)^").is_anchored_start());
3013 assert!(!t(r"(?m)$").is_anchored_end());
3014 assert!(!t(r"(?m:^$)|$^").is_anchored_start());
3015 assert!(!t(r"(?m:^$)|$^").is_anchored_end());
3016 assert!(!t(r"$^|(?m:^$)").is_anchored_start());
3017 assert!(!t(r"$^|(?m:^$)").is_anchored_end());
3018
3019 assert!(!t(r"a^").is_anchored_start());
3020 assert!(!t(r"$a").is_anchored_start());
3021 assert!(!t(r"a^").is_line_anchored_start());
3022 assert!(!t(r"$a").is_line_anchored_start());
3023
3024 assert!(!t(r"a^").is_anchored_end());
3025 assert!(!t(r"$a").is_anchored_end());
3026 assert!(!t(r"a^").is_line_anchored_end());
3027 assert!(!t(r"$a").is_line_anchored_end());
3028
3029 assert!(!t(r"^foo|bar").is_anchored_start());
3030 assert!(!t(r"foo|bar$").is_anchored_end());
3031 assert!(!t(r"^foo|bar").is_line_anchored_start());
3032 assert!(!t(r"foo|bar$").is_line_anchored_end());
3033
3034 assert!(!t(r"^*").is_anchored_start());
3035 assert!(!t(r"$*").is_anchored_end());
3036 assert!(!t(r"^*").is_line_anchored_start());
3037 assert!(!t(r"$*").is_line_anchored_end());
3038 assert!(!t(r"^*+").is_anchored_start());
3039 assert!(!t(r"$*+").is_anchored_end());
3040 assert!(!t(r"^*+").is_line_anchored_start());
3041 assert!(!t(r"$*+").is_line_anchored_end());
3042 assert!(!t(r"^+*").is_anchored_start());
3043 assert!(!t(r"$+*").is_anchored_end());
3044 assert!(!t(r"^+*").is_line_anchored_start());
3045 assert!(!t(r"$+*").is_line_anchored_end());
3046 assert!(!t(r"(^)*").is_anchored_start());
3047 assert!(!t(r"($)*").is_anchored_end());
3048 assert!(!t(r"(^)*").is_line_anchored_start());
3049 assert!(!t(r"($)*").is_line_anchored_end());
3050 }
3051
3052 #[test]
analysis_is_line_anchored()3053 fn analysis_is_line_anchored() {
3054 assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
3055 assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
3056
3057 assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
3058 assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
3059
3060 assert!(t(r"(?m)^").is_line_anchored_start());
3061 assert!(t(r"(?m)$").is_line_anchored_end());
3062
3063 assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
3064 assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
3065
3066 assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
3067 assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
3068 }
3069
3070 #[test]
analysis_is_any_anchored()3071 fn analysis_is_any_anchored() {
3072 // Positive examples.
3073 assert!(t(r"^").is_any_anchored_start());
3074 assert!(t(r"$").is_any_anchored_end());
3075 assert!(t(r"\A").is_any_anchored_start());
3076 assert!(t(r"\z").is_any_anchored_end());
3077
3078 // Negative examples.
3079 assert!(!t(r"(?m)^").is_any_anchored_start());
3080 assert!(!t(r"(?m)$").is_any_anchored_end());
3081 assert!(!t(r"$").is_any_anchored_start());
3082 assert!(!t(r"^").is_any_anchored_end());
3083 }
3084
3085 #[test]
analysis_is_match_empty()3086 fn analysis_is_match_empty() {
3087 // Positive examples.
3088 assert!(t(r"").is_match_empty());
3089 assert!(t(r"()").is_match_empty());
3090 assert!(t(r"()*").is_match_empty());
3091 assert!(t(r"()+").is_match_empty());
3092 assert!(t(r"()?").is_match_empty());
3093 assert!(t(r"a*").is_match_empty());
3094 assert!(t(r"a?").is_match_empty());
3095 assert!(t(r"a{0}").is_match_empty());
3096 assert!(t(r"a{0,}").is_match_empty());
3097 assert!(t(r"a{0,1}").is_match_empty());
3098 assert!(t(r"a{0,10}").is_match_empty());
3099 #[cfg(feature = "unicode-gencat")]
3100 assert!(t(r"\pL*").is_match_empty());
3101 assert!(t(r"a*|b").is_match_empty());
3102 assert!(t(r"b|a*").is_match_empty());
3103 assert!(t(r"a*a?(abcd)*").is_match_empty());
3104 assert!(t(r"^").is_match_empty());
3105 assert!(t(r"$").is_match_empty());
3106 assert!(t(r"(?m)^").is_match_empty());
3107 assert!(t(r"(?m)$").is_match_empty());
3108 assert!(t(r"\A").is_match_empty());
3109 assert!(t(r"\z").is_match_empty());
3110 assert!(t(r"\B").is_match_empty());
3111 assert!(t_bytes(r"(?-u)\B").is_match_empty());
3112
3113 // Negative examples.
3114 assert!(!t(r"a+").is_match_empty());
3115 assert!(!t(r"a{1}").is_match_empty());
3116 assert!(!t(r"a{1,}").is_match_empty());
3117 assert!(!t(r"a{1,2}").is_match_empty());
3118 assert!(!t(r"a{1,10}").is_match_empty());
3119 assert!(!t(r"b|a").is_match_empty());
3120 assert!(!t(r"a*a+(abcd)*").is_match_empty());
3121 assert!(!t(r"\b").is_match_empty());
3122 assert!(!t(r"(?-u)\b").is_match_empty());
3123 }
3124
3125 #[test]
analysis_is_literal()3126 fn analysis_is_literal() {
3127 // Positive examples.
3128 assert!(t(r"a").is_literal());
3129 assert!(t(r"ab").is_literal());
3130 assert!(t(r"abc").is_literal());
3131 assert!(t(r"(?m)abc").is_literal());
3132
3133 // Negative examples.
3134 assert!(!t(r"").is_literal());
3135 assert!(!t(r"^").is_literal());
3136 assert!(!t(r"a|b").is_literal());
3137 assert!(!t(r"(a)").is_literal());
3138 assert!(!t(r"a+").is_literal());
3139 assert!(!t(r"foo(a)").is_literal());
3140 assert!(!t(r"(a)foo").is_literal());
3141 assert!(!t(r"[a]").is_literal());
3142 }
3143
3144 #[test]
analysis_is_alternation_literal()3145 fn analysis_is_alternation_literal() {
3146 // Positive examples.
3147 assert!(t(r"a").is_alternation_literal());
3148 assert!(t(r"ab").is_alternation_literal());
3149 assert!(t(r"abc").is_alternation_literal());
3150 assert!(t(r"(?m)abc").is_alternation_literal());
3151 assert!(t(r"a|b").is_alternation_literal());
3152 assert!(t(r"a|b|c").is_alternation_literal());
3153 assert!(t(r"foo|bar").is_alternation_literal());
3154 assert!(t(r"foo|bar|baz").is_alternation_literal());
3155
3156 // Negative examples.
3157 assert!(!t(r"").is_alternation_literal());
3158 assert!(!t(r"^").is_alternation_literal());
3159 assert!(!t(r"(a)").is_alternation_literal());
3160 assert!(!t(r"a+").is_alternation_literal());
3161 assert!(!t(r"foo(a)").is_alternation_literal());
3162 assert!(!t(r"(a)foo").is_alternation_literal());
3163 assert!(!t(r"[a]").is_alternation_literal());
3164 assert!(!t(r"[a]|b").is_alternation_literal());
3165 assert!(!t(r"a|[b]").is_alternation_literal());
3166 assert!(!t(r"(a)|b").is_alternation_literal());
3167 assert!(!t(r"a|(b)").is_alternation_literal());
3168 }
3169 }
3170