• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2009 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #include <string>
6 
7 #include "util/test.h"
8 #include "util/logging.h"
9 #include "re2/prog.h"
10 #include "re2/regexp.h"
11 
12 namespace re2 {
13 
14 struct PrefixTest {
15   const char* regexp;
16   bool return_value;
17   const char* prefix;
18   bool foldcase;
19   const char* suffix;
20 };
21 
22 static PrefixTest tests[] = {
23   // Empty cases.
24   { "", false },
25   { "(?m)^", false },
26   { "(?-m)^", false },
27 
28   // If the regexp has no ^, there's no required prefix.
29   { "abc", false },
30 
31   // If the regexp immediately goes into
32   // something not a literal match, there's no required prefix.
33   { "^(abc)", false },
34   { "^a*",  false },
35 
36   // Otherwise, it should work.
37   { "^abc$", true, "abc", false, "(?-m:$)" },
38   { "^abc", true, "abc", false, "" },
39   { "^(?i)abc", true, "abc", true, "" },
40   { "^abcd*", true, "abc", false, "d*" },
41   { "^[Aa][Bb]cd*", true, "ab", true, "cd*" },
42   { "^ab[Cc]d*", true, "ab", false, "[Cc]d*" },
43   { "^☺abc", true, "☺abc", false, "" },
44 };
45 
TEST(RequiredPrefix,SimpleTests)46 TEST(RequiredPrefix, SimpleTests) {
47   for (size_t i = 0; i < arraysize(tests); i++) {
48     const PrefixTest& t = tests[i];
49     for (size_t j = 0; j < 2; j++) {
50       Regexp::ParseFlags flags = Regexp::LikePerl;
51       if (j == 0)
52         flags = flags | Regexp::Latin1;
53       Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
54       ASSERT_TRUE(re != NULL) << " " << t.regexp;
55 
56       std::string p;
57       bool f;
58       Regexp* s;
59       ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
60         << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
61         << " " << re->Dump();
62       if (t.return_value) {
63         ASSERT_EQ(p, std::string(t.prefix))
64           << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
65         ASSERT_EQ(f, t.foldcase)
66           << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
67         ASSERT_EQ(s->ToString(), std::string(t.suffix))
68           << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
69         s->Decref();
70       }
71       re->Decref();
72     }
73   }
74 }
75 
76 static PrefixTest for_accel_tests[] = {
77   // Empty cases.
78   { "", false },
79   { "(?m)^", false },
80   { "(?-m)^", false },
81 
82   // If the regexp has a ^, there's no required prefix.
83   { "^abc", false },
84 
85   // If the regexp immediately goes into
86   // something not a literal match, there's no required prefix.
87   { "(abc)", false },
88   { "a*",  false },
89 
90   // Otherwise, it should work.
91   { "abc$", true, "abc", false, },
92   { "abc", true, "abc", false, },
93   { "(?i)abc", true, "abc", true, },
94   { "abcd*", true, "abc", false, },
95   { "[Aa][Bb]cd*", true, "ab", true, },
96   { "ab[Cc]d*", true, "ab", false, },
97   { "☺abc", true, "☺abc", false, },
98 };
99 
TEST(RequiredPrefixForAccel,SimpleTests)100 TEST(RequiredPrefixForAccel, SimpleTests) {
101   for (size_t i = 0; i < arraysize(for_accel_tests); i++) {
102     const PrefixTest& t = for_accel_tests[i];
103     for (size_t j = 0; j < 2; j++) {
104       Regexp::ParseFlags flags = Regexp::LikePerl;
105       if (j == 0)
106         flags = flags | Regexp::Latin1;
107       Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
108       ASSERT_TRUE(re != NULL) << " " << t.regexp;
109 
110       std::string p;
111       bool f;
112       ASSERT_EQ(t.return_value, re->RequiredPrefixForAccel(&p, &f))
113         << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
114         << " " << re->Dump();
115       if (t.return_value) {
116         ASSERT_EQ(p, std::string(t.prefix))
117           << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
118         ASSERT_EQ(f, t.foldcase)
119           << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
120       }
121       re->Decref();
122     }
123   }
124 }
125 
TEST(PrefixAccel,BasicTest)126 TEST(PrefixAccel, BasicTest) {
127   Regexp* re = Regexp::Parse("abc\\d+", Regexp::LikePerl, NULL);
128   ASSERT_TRUE(re != NULL);
129   Prog* prog = re->CompileToProg(0);
130   ASSERT_TRUE(prog != NULL);
131   for (int i = 0; i < 100; i++) {
132     std::string text(i, 'a');
133     const char* p = reinterpret_cast<const char*>(
134         prog->PrefixAccel(text.data(), text.size()));
135     EXPECT_TRUE(p == NULL);
136     text.append("abc");
137     p = reinterpret_cast<const char*>(
138         prog->PrefixAccel(text.data(), text.size()));
139     EXPECT_EQ(i, p-text.data());
140   }
141   delete prog;
142   re->Decref();
143 }
144 
145 }  // namespace re2
146