1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "util/test.h"
6 #include "re2/prog.h"
7 #include "re2/regexp.h"
8
9 namespace re2 {
10
11 struct PCRETest {
12 const char* regexp;
13 bool should_match;
14 };
15
16 static PCRETest tests[] = {
17 // Most things should behave exactly.
18 { "abc", true },
19 { "(a|b)c", true },
20 { "(a*|b)c", true },
21 { "(a|b*)c", true },
22 { "a(b|c)d", true },
23 { "a(()|())c", true },
24 { "ab*c", true },
25 { "ab+c", true },
26 { "a(b*|c*)d", true },
27 { "\\W", true },
28 { "\\W{1,2}", true },
29 { "\\d", true },
30
31 // Check that repeated empty strings do not.
32 { "(a*)*", false },
33 { "x(a*)*y", false },
34 { "(a*)+", false },
35 { "(a+)*", true },
36 { "(a+)+", true },
37 { "(a+)+", true },
38
39 // \v is the only character class that shouldn't.
40 { "\\b", true },
41 { "\\v", false },
42 { "\\d", true },
43
44 // The handling of ^ in multi-line mode is different, as is
45 // the handling of $ in single-line mode. (Both involve
46 // boundary cases if the string ends with \n.)
47 { "\\A", true },
48 { "\\z", true },
49 { "(?m)^", false },
50 { "(?m)$", true },
51 { "(?-m)^", true },
52 { "(?-m)$", false }, // In PCRE, == \Z
53 { "(?m)\\A", true },
54 { "(?m)\\z", true },
55 { "(?-m)\\A", true },
56 { "(?-m)\\z", true },
57 };
58
TEST(MimicsPCRE,SimpleTests)59 TEST(MimicsPCRE, SimpleTests) {
60 for (int i = 0; i < arraysize(tests); i++) {
61 const PCRETest& t = tests[i];
62 for (int j = 0; j < 2; j++) {
63 Regexp::ParseFlags flags = Regexp::LikePerl;
64 if (j == 0)
65 flags = flags | Regexp::Latin1;
66 Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
67 CHECK(re) << " " << t.regexp;
68 CHECK_EQ(t.should_match, re->MimicsPCRE())
69 << " " << t.regexp << " "
70 << (j==0 ? "latin1" : "utf");
71 re->Decref();
72 }
73 }
74 }
75
76 } // namespace re2
77