1 // Copyright 2009 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include <string>
6
7 #include "util/test.h"
8 #include "util/logging.h"
9 #include "re2/prog.h"
10 #include "re2/regexp.h"
11
12 namespace re2 {
13
14 struct PrefixTest {
15 const char* regexp;
16 bool return_value;
17 const char* prefix;
18 bool foldcase;
19 const char* suffix;
20 };
21
22 static PrefixTest tests[] = {
23 // Empty cases.
24 { "", false },
25 { "(?m)^", false },
26 { "(?-m)^", false },
27
28 // If the regexp has no ^, there's no required prefix.
29 { "abc", false },
30
31 // If the regexp immediately goes into
32 // something not a literal match, there's no required prefix.
33 { "^(abc)", false },
34 { "^a*", false },
35
36 // Otherwise, it should work.
37 { "^abc$", true, "abc", false, "(?-m:$)" },
38 { "^abc", true, "abc", false, "" },
39 { "^(?i)abc", true, "abc", true, "" },
40 { "^abcd*", true, "abc", false, "d*" },
41 { "^[Aa][Bb]cd*", true, "ab", true, "cd*" },
42 { "^ab[Cc]d*", true, "ab", false, "[Cc]d*" },
43 { "^☺abc", true, "☺abc", false, "" },
44 };
45
TEST(RequiredPrefix,SimpleTests)46 TEST(RequiredPrefix, SimpleTests) {
47 for (size_t i = 0; i < arraysize(tests); i++) {
48 const PrefixTest& t = tests[i];
49 for (size_t j = 0; j < 2; j++) {
50 Regexp::ParseFlags flags = Regexp::LikePerl;
51 if (j == 0)
52 flags = flags | Regexp::Latin1;
53 Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
54 ASSERT_TRUE(re != NULL) << " " << t.regexp;
55
56 std::string p;
57 bool f;
58 Regexp* s;
59 ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
60 << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
61 << " " << re->Dump();
62 if (t.return_value) {
63 ASSERT_EQ(p, std::string(t.prefix))
64 << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
65 ASSERT_EQ(f, t.foldcase)
66 << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
67 ASSERT_EQ(s->ToString(), std::string(t.suffix))
68 << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
69 s->Decref();
70 }
71 re->Decref();
72 }
73 }
74 }
75
76 static PrefixTest for_accel_tests[] = {
77 // Empty cases.
78 { "", false },
79 { "(?m)^", false },
80 { "(?-m)^", false },
81
82 // If the regexp has a ^, there's no required prefix.
83 { "^abc", false },
84
85 // If the regexp immediately goes into
86 // something not a literal match, there's no required prefix.
87 { "(abc)", false },
88 { "a*", false },
89
90 // Otherwise, it should work.
91 { "abc$", true, "abc", false, },
92 { "abc", true, "abc", false, },
93 { "(?i)abc", true, "abc", true, },
94 { "abcd*", true, "abc", false, },
95 { "[Aa][Bb]cd*", true, "ab", true, },
96 { "ab[Cc]d*", true, "ab", false, },
97 { "☺abc", true, "☺abc", false, },
98 };
99
TEST(RequiredPrefixForAccel,SimpleTests)100 TEST(RequiredPrefixForAccel, SimpleTests) {
101 for (size_t i = 0; i < arraysize(for_accel_tests); i++) {
102 const PrefixTest& t = for_accel_tests[i];
103 for (size_t j = 0; j < 2; j++) {
104 Regexp::ParseFlags flags = Regexp::LikePerl;
105 if (j == 0)
106 flags = flags | Regexp::Latin1;
107 Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
108 ASSERT_TRUE(re != NULL) << " " << t.regexp;
109
110 std::string p;
111 bool f;
112 ASSERT_EQ(t.return_value, re->RequiredPrefixForAccel(&p, &f))
113 << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
114 << " " << re->Dump();
115 if (t.return_value) {
116 ASSERT_EQ(p, std::string(t.prefix))
117 << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
118 ASSERT_EQ(f, t.foldcase)
119 << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
120 }
121 re->Decref();
122 }
123 }
124 }
125
TEST(PrefixAccel,BasicTest)126 TEST(PrefixAccel, BasicTest) {
127 Regexp* re = Regexp::Parse("abc\\d+", Regexp::LikePerl, NULL);
128 ASSERT_TRUE(re != NULL);
129 Prog* prog = re->CompileToProg(0);
130 ASSERT_TRUE(prog != NULL);
131 for (int i = 0; i < 100; i++) {
132 std::string text(i, 'a');
133 const char* p = reinterpret_cast<const char*>(
134 prog->PrefixAccel(text.data(), text.size()));
135 EXPECT_TRUE(p == NULL);
136 text.append("abc");
137 p = reinterpret_cast<const char*>(
138 prog->PrefixAccel(text.data(), text.size()));
139 EXPECT_EQ(i, p-text.data());
140 }
141 delete prog;
142 re->Decref();
143 }
144
145 } // namespace re2
146