1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Exhaustive testing of regular expression matching.
6
7 // Each test picks an alphabet (e.g., "abc"), a maximum string length,
8 // a maximum regular expression length, and a maximum number of letters
9 // that can appear in the regular expression. Given these parameters,
10 // it tries every possible regular expression and string, verifying that
11 // the NFA, DFA, and a trivial backtracking implementation agree about
12 // the location of the match.
13
14 #include <stdlib.h>
15 #include <stdio.h>
16
17 #ifndef LOGGING
18 #define LOGGING 0
19 #endif
20
21 #include "util/test.h"
22 #include "re2/testing/exhaustive_tester.h"
23 #include "re2/testing/tester.h"
24
25 DEFINE_bool(show_regexps, false, "show regexps during testing");
26
27 DEFINE_int32(max_bad_regexp_inputs, 1,
28 "Stop testing a regular expression after finding this many "
29 "strings that break it.");
30
31 // Compiled in debug mode, the usual tests run for over an hour.
32 // Have to cut it down to make the unit test machines happy.
33 DEFINE_bool(quick_debug_mode, true, "Run fewer tests in debug mode.");
34
35 namespace re2 {
36
escape(const StringPiece & sp)37 static char* escape(const StringPiece& sp) {
38 static char buf[512];
39 char* p = buf;
40 *p++ = '\"';
41 for (int i = 0; i < sp.size(); i++) {
42 if(p+5 >= buf+sizeof buf)
43 LOG(FATAL) << "ExhaustiveTester escape: too long";
44 if(sp[i] == '\\' || sp[i] == '\"') {
45 *p++ = '\\';
46 *p++ = sp[i];
47 } else if(sp[i] == '\n') {
48 *p++ = '\\';
49 *p++ = 'n';
50 } else {
51 *p++ = sp[i];
52 }
53 }
54 *p++ = '\"';
55 *p = '\0';
56 return buf;
57 }
58
PrintResult(const RE2 & re,const StringPiece & input,RE2::Anchor anchor,StringPiece * m,int n)59 static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
60 if (!re.Match(input, 0, input.size(), anchor, m, n)) {
61 printf("-");
62 return;
63 }
64 for (int i = 0; i < n; i++) {
65 if (i > 0)
66 printf(" ");
67 if (m[i].begin() == NULL)
68 printf("-");
69 else
70 printf("%d-%d", static_cast<int>(m[i].begin() - input.begin()), static_cast<int>(m[i].end() - input.begin()));
71 }
72 }
73
74 // Processes a single generated regexp.
75 // Compiles it using Regexp interface and PCRE, and then
76 // checks that NFA, DFA, and PCRE all return the same results.
HandleRegexp(const string & const_regexp)77 void ExhaustiveTester::HandleRegexp(const string& const_regexp) {
78 regexps_++;
79 string regexp = const_regexp;
80 if (!topwrapper_.empty())
81 regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
82
83 if (FLAGS_show_regexps) {
84 printf("\r%s", regexp.c_str());
85 fflush(stdout);
86 }
87
88 if (LOGGING) {
89 // Write out test cases and answers for use in testing
90 // other implementations, such as Go's regexp package.
91 if (randomstrings_)
92 LOG(ERROR) << "Cannot log with random strings.";
93 if (regexps_ == 1) { // first
94 printf("strings\n");
95 strgen_.Reset();
96 while (strgen_.HasNext())
97 printf("%s\n", escape(strgen_.Next()));
98 printf("regexps\n");
99 }
100 printf("%s\n", escape(regexp));
101
102 RE2 re(regexp);
103 RE2::Options longest;
104 longest.set_longest_match(true);
105 RE2 relongest(regexp, longest);
106 int ngroup = re.NumberOfCapturingGroups()+1;
107 StringPiece* group = new StringPiece[ngroup];
108
109 strgen_.Reset();
110 while (strgen_.HasNext()) {
111 StringPiece input = strgen_.Next();
112 PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
113 printf(";");
114 PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
115 printf(";");
116 PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
117 printf(";");
118 PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
119 printf("\n");
120 }
121 delete[] group;
122 return;
123 }
124
125 Tester tester(regexp);
126 if (tester.error())
127 return;
128
129 strgen_.Reset();
130 strgen_.GenerateNULL();
131 if (randomstrings_)
132 strgen_.Random(stringseed_, stringcount_);
133 int bad_inputs = 0;
134 while (strgen_.HasNext()) {
135 tests_++;
136 if (!tester.TestInput(strgen_.Next())) {
137 failures_++;
138 if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
139 break;
140 }
141 }
142 }
143
144 // Runs an exhaustive test on the given parameters.
ExhaustiveTest(int maxatoms,int maxops,const vector<string> & alphabet,const vector<string> & ops,int maxstrlen,const vector<string> & stralphabet,const string & wrapper,const string & topwrapper)145 void ExhaustiveTest(int maxatoms, int maxops,
146 const vector<string>& alphabet,
147 const vector<string>& ops,
148 int maxstrlen, const vector<string>& stralphabet,
149 const string& wrapper,
150 const string& topwrapper) {
151 if (DEBUG_MODE && FLAGS_quick_debug_mode) {
152 if (maxatoms > 1)
153 maxatoms--;
154 if (maxops > 1)
155 maxops--;
156 if (maxstrlen > 1)
157 maxstrlen--;
158 }
159 ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
160 maxstrlen, stralphabet, wrapper,
161 topwrapper);
162 t.Generate();
163 if (!LOGGING) {
164 printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
165 t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
166 }
167 EXPECT_EQ(0, t.failures());
168 }
169
170 // Runs an exhaustive test using the given parameters and
171 // the basic egrep operators.
EgrepTest(int maxatoms,int maxops,const string & alphabet,int maxstrlen,const string & stralphabet,const string & wrapper)172 void EgrepTest(int maxatoms, int maxops, const string& alphabet,
173 int maxstrlen, const string& stralphabet,
174 const string& wrapper) {
175 const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
176
177 for (int i = 0; i < arraysize(tops); i++) {
178 ExhaustiveTest(maxatoms, maxops,
179 Split("", alphabet),
180 RegexpGenerator::EgrepOps(),
181 maxstrlen,
182 Split("", stralphabet),
183 wrapper,
184 tops[i]);
185 }
186 }
187
188 } // namespace re2
189