1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Exhaustive testing of regular expression matching.
6
7 // Each test picks an alphabet (e.g., "abc"), a maximum string length,
8 // a maximum regular expression length, and a maximum number of letters
9 // that can appear in the regular expression. Given these parameters,
10 // it tries every possible regular expression and string, verifying that
11 // the NFA, DFA, and a trivial backtracking implementation agree about
12 // the location of the match.
13
14 #include <stdio.h>
15
16 #include "util/test.h"
17 #include "util/flags.h"
18 #include "util/logging.h"
19 #include "util/strutil.h"
20 #include "re2/testing/exhaustive_tester.h"
21 #include "re2/testing/tester.h"
22
23 // For target `log' in the Makefile.
24 #ifndef LOGGING
25 #define LOGGING 0
26 #endif
27
28 DEFINE_FLAG(bool, show_regexps, false, "show regexps during testing");
29
30 DEFINE_FLAG(int, max_bad_regexp_inputs, 1,
31 "Stop testing a regular expression after finding this many "
32 "strings that break it.");
33
34 namespace re2 {
35
escape(const StringPiece & sp)36 static char* escape(const StringPiece& sp) {
37 static char buf[512];
38 char* p = buf;
39 *p++ = '\"';
40 for (size_t i = 0; i < sp.size(); i++) {
41 if(p+5 >= buf+sizeof buf)
42 LOG(FATAL) << "ExhaustiveTester escape: too long";
43 if(sp[i] == '\\' || sp[i] == '\"') {
44 *p++ = '\\';
45 *p++ = sp[i];
46 } else if(sp[i] == '\n') {
47 *p++ = '\\';
48 *p++ = 'n';
49 } else {
50 *p++ = sp[i];
51 }
52 }
53 *p++ = '\"';
54 *p = '\0';
55 return buf;
56 }
57
PrintResult(const RE2 & re,const StringPiece & input,RE2::Anchor anchor,StringPiece * m,int n)58 static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
59 if (!re.Match(input, 0, input.size(), anchor, m, n)) {
60 printf("-");
61 return;
62 }
63 for (int i = 0; i < n; i++) {
64 if (i > 0)
65 printf(" ");
66 if (m[i].data() == NULL)
67 printf("-");
68 else
69 printf("%td-%td",
70 m[i].begin() - input.begin(),
71 m[i].end() - input.begin());
72 }
73 }
74
75 // Processes a single generated regexp.
76 // Compiles it using Regexp interface and PCRE, and then
77 // checks that NFA, DFA, and PCRE all return the same results.
HandleRegexp(const std::string & const_regexp)78 void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
79 regexps_++;
80 std::string regexp = const_regexp;
81 if (!topwrapper_.empty()) {
82 regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
83 }
84
85 if (GetFlag(FLAGS_show_regexps)) {
86 printf("\r%s", regexp.c_str());
87 fflush(stdout);
88 }
89
90 if (LOGGING) {
91 // Write out test cases and answers for use in testing
92 // other implementations, such as Go's regexp package.
93 if (randomstrings_)
94 LOG(ERROR) << "Cannot log with random strings.";
95 if (regexps_ == 1) { // first
96 printf("strings\n");
97 strgen_.Reset();
98 while (strgen_.HasNext())
99 printf("%s\n", escape(strgen_.Next()));
100 printf("regexps\n");
101 }
102 printf("%s\n", escape(regexp));
103
104 RE2 re(regexp);
105 RE2::Options longest;
106 longest.set_longest_match(true);
107 RE2 relongest(regexp, longest);
108 int ngroup = re.NumberOfCapturingGroups()+1;
109 StringPiece* group = new StringPiece[ngroup];
110
111 strgen_.Reset();
112 while (strgen_.HasNext()) {
113 StringPiece input = strgen_.Next();
114 PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
115 printf(";");
116 PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
117 printf(";");
118 PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
119 printf(";");
120 PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
121 printf("\n");
122 }
123 delete[] group;
124 return;
125 }
126
127 Tester tester(regexp);
128 if (tester.error())
129 return;
130
131 strgen_.Reset();
132 strgen_.GenerateNULL();
133 if (randomstrings_)
134 strgen_.Random(stringseed_, stringcount_);
135 int bad_inputs = 0;
136 while (strgen_.HasNext()) {
137 tests_++;
138 if (!tester.TestInput(strgen_.Next())) {
139 failures_++;
140 if (++bad_inputs >= GetFlag(FLAGS_max_bad_regexp_inputs))
141 break;
142 }
143 }
144 }
145
146 // Runs an exhaustive test on the given parameters.
ExhaustiveTest(int maxatoms,int maxops,const std::vector<std::string> & alphabet,const std::vector<std::string> & ops,int maxstrlen,const std::vector<std::string> & stralphabet,const std::string & wrapper,const std::string & topwrapper)147 void ExhaustiveTest(int maxatoms, int maxops,
148 const std::vector<std::string>& alphabet,
149 const std::vector<std::string>& ops,
150 int maxstrlen,
151 const std::vector<std::string>& stralphabet,
152 const std::string& wrapper,
153 const std::string& topwrapper) {
154 if (RE2_DEBUG_MODE) {
155 if (maxatoms > 1)
156 maxatoms--;
157 if (maxops > 1)
158 maxops--;
159 if (maxstrlen > 1)
160 maxstrlen--;
161 }
162 ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
163 maxstrlen, stralphabet, wrapper,
164 topwrapper);
165 t.Generate();
166 if (!LOGGING) {
167 printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
168 t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
169 }
170 EXPECT_EQ(0, t.failures());
171 }
172
173 // Runs an exhaustive test using the given parameters and
174 // the basic egrep operators.
EgrepTest(int maxatoms,int maxops,const std::string & alphabet,int maxstrlen,const std::string & stralphabet,const std::string & wrapper)175 void EgrepTest(int maxatoms, int maxops, const std::string& alphabet,
176 int maxstrlen, const std::string& stralphabet,
177 const std::string& wrapper) {
178 const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
179
180 for (size_t i = 0; i < arraysize(tops); i++) {
181 ExhaustiveTest(maxatoms, maxops,
182 Split("", alphabet),
183 RegexpGenerator::EgrepOps(),
184 maxstrlen,
185 Split("", stralphabet),
186 wrapper,
187 tops[i]);
188 }
189 }
190
191 } // namespace re2
192