• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Exhaustive testing of regular expression matching.
6 
7 // Each test picks an alphabet (e.g., "abc"), a maximum string length,
8 // a maximum regular expression length, and a maximum number of letters
9 // that can appear in the regular expression.  Given these parameters,
10 // it tries every possible regular expression and string, verifying that
11 // the NFA, DFA, and a trivial backtracking implementation agree about
12 // the location of the match.
13 
14 #include "re2/testing/exhaustive_tester.h"
15 
16 #include <stdio.h>
17 
18 #include <string>
19 #include <vector>
20 
21 #include "absl/base/macros.h"
22 #include "absl/flags/flag.h"
23 #include "absl/log/absl_check.h"
24 #include "absl/log/absl_log.h"
25 #include "absl/strings/str_format.h"
26 #include "absl/strings/string_view.h"
27 #include "gtest/gtest.h"
28 #include "re2/prog.h"
29 #include "re2/re2.h"
30 #include "re2/testing/regexp_generator.h"
31 #include "re2/testing/tester.h"
32 
33 // For target `log' in the Makefile.
34 #ifndef LOGGING
35 #define LOGGING 0
36 #endif
37 
38 ABSL_FLAG(bool, show_regexps, false, "show regexps during testing");
39 
40 ABSL_FLAG(int, max_bad_regexp_inputs, 1,
41           "Stop testing a regular expression after finding this many "
42           "strings that break it.");
43 
44 namespace re2 {
45 
escape(absl::string_view sp)46 static char* escape(absl::string_view sp) {
47   static char buf[512];
48   char* p = buf;
49   *p++ = '\"';
50   for (size_t i = 0; i < sp.size(); i++) {
51     if(p+5 >= buf+sizeof buf)
52       ABSL_LOG(FATAL) << "ExhaustiveTester escape: too long";
53     if(sp[i] == '\\' || sp[i] == '\"') {
54       *p++ = '\\';
55       *p++ = sp[i];
56     } else if(sp[i] == '\n') {
57       *p++ = '\\';
58       *p++ = 'n';
59     } else {
60       *p++ = sp[i];
61     }
62   }
63   *p++ = '\"';
64   *p = '\0';
65   return buf;
66 }
67 
PrintResult(const RE2 & re,absl::string_view input,RE2::Anchor anchor,absl::string_view * m,int n)68 static void PrintResult(const RE2& re, absl::string_view input,
69                         RE2::Anchor anchor, absl::string_view* m, int n) {
70   if (!re.Match(input, 0, input.size(), anchor, m, n)) {
71     absl::PrintF("-");
72     return;
73   }
74   for (int i = 0; i < n; i++) {
75     if (i > 0)
76       absl::PrintF(" ");
77     if (m[i].data() == NULL)
78       absl::PrintF("-");
79     else
80       absl::PrintF("%d-%d",
81                    BeginPtr(m[i]) - BeginPtr(input),
82                    EndPtr(m[i]) - BeginPtr(input));
83   }
84 }
85 
86 // Processes a single generated regexp.
87 // Compiles it using Regexp interface and PCRE, and then
88 // checks that NFA, DFA, and PCRE all return the same results.
HandleRegexp(const std::string & const_regexp)89 void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
90   regexps_++;
91   std::string regexp = const_regexp;
92   if (!topwrapper_.empty()) {
93     auto fmt = absl::ParsedFormat<'s'>::New(topwrapper_);
94     ABSL_CHECK(fmt != nullptr);
95     regexp = absl::StrFormat(*fmt, regexp);
96   }
97 
98   if (absl::GetFlag(FLAGS_show_regexps)) {
99     absl::PrintF("\r%s", regexp);
100     fflush(stdout);
101   }
102 
103   if (LOGGING) {
104     // Write out test cases and answers for use in testing
105     // other implementations, such as Go's regexp package.
106     if (randomstrings_)
107       ABSL_LOG(ERROR) << "Cannot log with random strings.";
108     if (regexps_ == 1) {  // first
109       absl::PrintF("strings\n");
110       strgen_.Reset();
111       while (strgen_.HasNext())
112         absl::PrintF("%s\n", escape(strgen_.Next()));
113       absl::PrintF("regexps\n");
114     }
115     absl::PrintF("%s\n", escape(regexp));
116 
117     RE2 re(regexp);
118     RE2::Options longest;
119     longest.set_longest_match(true);
120     RE2 relongest(regexp, longest);
121     int ngroup = re.NumberOfCapturingGroups()+1;
122     absl::string_view* group = new absl::string_view[ngroup];
123 
124     strgen_.Reset();
125     while (strgen_.HasNext()) {
126       absl::string_view input = strgen_.Next();
127       PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
128       absl::PrintF(";");
129       PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
130       absl::PrintF(";");
131       PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
132       absl::PrintF(";");
133       PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
134       absl::PrintF("\n");
135     }
136     delete[] group;
137     return;
138   }
139 
140   Tester tester(regexp);
141   if (tester.error())
142     return;
143 
144   strgen_.Reset();
145   strgen_.GenerateNULL();
146   if (randomstrings_)
147     strgen_.Random(stringseed_, stringcount_);
148   int bad_inputs = 0;
149   while (strgen_.HasNext()) {
150     tests_++;
151     if (!tester.TestInput(strgen_.Next())) {
152       failures_++;
153       if (++bad_inputs >= absl::GetFlag(FLAGS_max_bad_regexp_inputs))
154         break;
155     }
156   }
157 }
158 
159 // Runs an exhaustive test on the given parameters.
ExhaustiveTest(int maxatoms,int maxops,const std::vector<std::string> & alphabet,const std::vector<std::string> & ops,int maxstrlen,const std::vector<std::string> & stralphabet,const std::string & wrapper,const std::string & topwrapper)160 void ExhaustiveTest(int maxatoms, int maxops,
161                     const std::vector<std::string>& alphabet,
162                     const std::vector<std::string>& ops,
163                     int maxstrlen,
164                     const std::vector<std::string>& stralphabet,
165                     const std::string& wrapper,
166                     const std::string& topwrapper) {
167   if (RE2_DEBUG_MODE) {
168     if (maxatoms > 1)
169       maxatoms--;
170     if (maxops > 1)
171       maxops--;
172     if (maxstrlen > 1)
173       maxstrlen--;
174   }
175   ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
176                      maxstrlen, stralphabet, wrapper,
177                      topwrapper);
178   t.Generate();
179   if (!LOGGING) {
180     absl::PrintF("%d regexps, %d tests, %d failures [%d/%d str]\n",
181                  t.regexps(), t.tests(), t.failures(), maxstrlen, stralphabet.size());
182   }
183   EXPECT_EQ(0, t.failures());
184 }
185 
186 // Runs an exhaustive test using the given parameters and
187 // the basic egrep operators.
EgrepTest(int maxatoms,int maxops,const std::string & alphabet,int maxstrlen,const std::string & stralphabet,const std::string & wrapper)188 void EgrepTest(int maxatoms, int maxops, const std::string& alphabet,
189                int maxstrlen, const std::string& stralphabet,
190                const std::string& wrapper) {
191   const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
192 
193   for (size_t i = 0; i < ABSL_ARRAYSIZE(tops); i++) {
194     ExhaustiveTest(maxatoms, maxops,
195                    Split("", alphabet),
196                    RegexpGenerator::EgrepOps(),
197                    maxstrlen,
198                    Split("", stralphabet),
199                    wrapper,
200                    tops[i]);
201   }
202 }
203 
204 }  // namespace re2
205