• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Benchmarks for regular expression implementations.
6 
7 #include "util/test.h"
8 #include "re2/prog.h"
9 #include "re2/re2.h"
10 #include "re2/regexp.h"
11 #include "util/pcre.h"
12 #include "util/benchmark.h"
13 
14 namespace re2 {
15 void Test();
16 void MemoryUsage();
17 }  // namespace re2
18 
19 typedef testing::MallocCounter MallocCounter;
20 
21 namespace re2 {
22 
Test()23 void Test() {
24   Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
25   CHECK(re);
26   Prog* prog = re->CompileToProg(0);
27   CHECK(prog);
28   CHECK(prog->IsOnePass());
29   const char* text = "650-253-0001";
30   StringPiece sp[4];
31   CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
32   CHECK_EQ(sp[0], "650-253-0001");
33   CHECK_EQ(sp[1], "650");
34   CHECK_EQ(sp[2], "253");
35   CHECK_EQ(sp[3], "0001");
36   delete prog;
37   re->Decref();
38   LOG(INFO) << "test passed\n";
39 }
40 
MemoryUsage()41 void MemoryUsage() {
42   const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
43   const char* text = "650-253-0001";
44   {
45     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
46     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
47     CHECK(re);
48     // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
49     // because LOG(INFO) might do a big allocation before they get evaluated.
50     fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
51     mc.Reset();
52 
53     Prog* prog = re->CompileToProg(0);
54     CHECK(prog);
55     CHECK(prog->IsOnePass());
56     fprintf(stderr, "Prog:   %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
57     mc.Reset();
58 
59     StringPiece sp[4];
60     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
61     fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
62     delete prog;
63     re->Decref();
64   }
65 
66   {
67     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
68 
69     PCRE re(regexp, PCRE::UTF8);
70     fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
71     PCRE::FullMatch(text, re);
72     fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
73   }
74 
75   {
76     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
77 
78     PCRE* re = new PCRE(regexp, PCRE::UTF8);
79     fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
80     PCRE::FullMatch(text, *re);
81     fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
82     delete re;
83   }
84 
85   {
86     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
87 
88     RE2 re(regexp);
89     fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
90     RE2::FullMatch(text, re);
91     fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
92   }
93 
94   fprintf(stderr, "sizeof: PCRE=%d RE2=%d Prog=%d Inst=%d\n",
95           static_cast<int>(sizeof(PCRE)),
96           static_cast<int>(sizeof(RE2)),
97           static_cast<int>(sizeof(Prog)),
98           static_cast<int>(sizeof(Prog::Inst)));
99 }
100 
101 // Regular expression implementation wrappers.
102 // Defined at bottom of file, but they are repetitive
103 // and not interesting.
104 
105 typedef void SearchImpl(int iters, const char* regexp, const StringPiece& text,
106              Prog::Anchor anchor, bool expect_match);
107 
108 SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState,
109            SearchPCRE, SearchRE2,
110            SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass, SearchCachedBitState,
111            SearchCachedPCRE, SearchCachedRE2;
112 
113 typedef void ParseImpl(int iters, const char* regexp, const StringPiece& text);
114 
115 ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState,
116           Parse1PCRE, Parse1RE2,
117           Parse1Backtrack,
118           Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
119           Parse1CachedPCRE, Parse1CachedRE2,
120           Parse1CachedBacktrack;
121 
122 ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState,
123           Parse3PCRE, Parse3RE2,
124           Parse3Backtrack,
125           Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
126           Parse3CachedPCRE, Parse3CachedRE2,
127           Parse3CachedBacktrack;
128 
129 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
130 
131 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
132 
133 // Benchmark: failed search for regexp in random text.
134 
135 // Generate random text that won't contain the search string,
136 // to test worst-case search behavior.
MakeText(string * text,int nbytes)137 void MakeText(string* text, int nbytes) {
138   text->resize(nbytes);
139   srand(0);
140   for (int i = 0; i < nbytes; i++) {
141     if (!rand()%30)
142       (*text)[i] = '\n';
143     else
144       (*text)[i] = rand()%(0x7E + 1 - 0x20)+0x20;
145   }
146 }
147 
148 // Makes text of size nbytes, then calls run to search
149 // the text for regexp iters times.
Search(int iters,int nbytes,const char * regexp,SearchImpl * search)150 void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) {
151   StopBenchmarkTiming();
152   string s;
153   MakeText(&s, nbytes);
154   BenchmarkMemoryUsage();
155   StartBenchmarkTiming();
156   search(iters, regexp, s, Prog::kUnanchored, false);
157   SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
158 }
159 
160 // These two are easy because they start with an A,
161 // giving the search loop something to memchr for.
162 #define EASY0      "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
163 #define EASY1      "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
164 
165 // This is a little harder, since it starts with a character class
166 // and thus can't be memchr'ed.  Could look for ABC and work backward,
167 // but no one does that.
168 #define MEDIUM     "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
169 
170 // This is a fair amount harder, because of the leading [ -~]*.
171 // A bad backtracking implementation will take O(text^2) time to
172 // figure out there's no match.
173 #define HARD       "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
174 
175 // This stresses engines that are trying to track parentheses.
176 #define PARENS     "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
177                    "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
178 
Search_Easy0_CachedDFA(int i,int n)179 void Search_Easy0_CachedDFA(int i, int n)     { Search(i, n, EASY0, SearchCachedDFA); }
Search_Easy0_CachedNFA(int i,int n)180 void Search_Easy0_CachedNFA(int i, int n)     { Search(i, n, EASY0, SearchCachedNFA); }
Search_Easy0_CachedPCRE(int i,int n)181 void Search_Easy0_CachedPCRE(int i, int n)    { Search(i, n, EASY0, SearchCachedPCRE); }
Search_Easy0_CachedRE2(int i,int n)182 void Search_Easy0_CachedRE2(int i, int n)     { Search(i, n, EASY0, SearchCachedRE2); }
183 
184 BENCHMARK_RANGE(Search_Easy0_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
185 BENCHMARK_RANGE(Search_Easy0_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
186 #ifdef USEPCRE
187 BENCHMARK_RANGE(Search_Easy0_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
188 #endif
189 BENCHMARK_RANGE(Search_Easy0_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
190 
Search_Easy1_CachedDFA(int i,int n)191 void Search_Easy1_CachedDFA(int i, int n)     { Search(i, n, EASY1, SearchCachedDFA); }
Search_Easy1_CachedNFA(int i,int n)192 void Search_Easy1_CachedNFA(int i, int n)     { Search(i, n, EASY1, SearchCachedNFA); }
Search_Easy1_CachedPCRE(int i,int n)193 void Search_Easy1_CachedPCRE(int i, int n)    { Search(i, n, EASY1, SearchCachedPCRE); }
Search_Easy1_CachedRE2(int i,int n)194 void Search_Easy1_CachedRE2(int i, int n)     { Search(i, n, EASY1, SearchCachedRE2); }
195 
196 BENCHMARK_RANGE(Search_Easy1_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
197 BENCHMARK_RANGE(Search_Easy1_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
198 #ifdef USEPCRE
199 BENCHMARK_RANGE(Search_Easy1_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
200 #endif
201 BENCHMARK_RANGE(Search_Easy1_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
202 
Search_Medium_CachedDFA(int i,int n)203 void Search_Medium_CachedDFA(int i, int n)     { Search(i, n, MEDIUM, SearchCachedDFA); }
Search_Medium_CachedNFA(int i,int n)204 void Search_Medium_CachedNFA(int i, int n)     { Search(i, n, MEDIUM, SearchCachedNFA); }
Search_Medium_CachedPCRE(int i,int n)205 void Search_Medium_CachedPCRE(int i, int n)    { Search(i, n, MEDIUM, SearchCachedPCRE); }
Search_Medium_CachedRE2(int i,int n)206 void Search_Medium_CachedRE2(int i, int n)     { Search(i, n, MEDIUM, SearchCachedRE2); }
207 
208 BENCHMARK_RANGE(Search_Medium_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
209 BENCHMARK_RANGE(Search_Medium_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
210 #ifdef USEPCRE
211 BENCHMARK_RANGE(Search_Medium_CachedPCRE,    8, 256<<10)->ThreadRange(1, NumCPUs());
212 #endif
213 BENCHMARK_RANGE(Search_Medium_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
214 
Search_Hard_CachedDFA(int i,int n)215 void Search_Hard_CachedDFA(int i, int n)     { Search(i, n, HARD, SearchCachedDFA); }
Search_Hard_CachedNFA(int i,int n)216 void Search_Hard_CachedNFA(int i, int n)     { Search(i, n, HARD, SearchCachedNFA); }
Search_Hard_CachedPCRE(int i,int n)217 void Search_Hard_CachedPCRE(int i, int n)    { Search(i, n, HARD, SearchCachedPCRE); }
Search_Hard_CachedRE2(int i,int n)218 void Search_Hard_CachedRE2(int i, int n)     { Search(i, n, HARD, SearchCachedRE2); }
219 
220 BENCHMARK_RANGE(Search_Hard_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
221 BENCHMARK_RANGE(Search_Hard_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
222 #ifdef USEPCRE
223 BENCHMARK_RANGE(Search_Hard_CachedPCRE,    8, 4<<10)->ThreadRange(1, NumCPUs());
224 #endif
225 BENCHMARK_RANGE(Search_Hard_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
226 
Search_Parens_CachedDFA(int i,int n)227 void Search_Parens_CachedDFA(int i, int n)     { Search(i, n, PARENS, SearchCachedDFA); }
Search_Parens_CachedNFA(int i,int n)228 void Search_Parens_CachedNFA(int i, int n)     { Search(i, n, PARENS, SearchCachedNFA); }
Search_Parens_CachedPCRE(int i,int n)229 void Search_Parens_CachedPCRE(int i, int n)    { Search(i, n, PARENS, SearchCachedPCRE); }
Search_Parens_CachedRE2(int i,int n)230 void Search_Parens_CachedRE2(int i, int n)     { Search(i, n, PARENS, SearchCachedRE2); }
231 
232 BENCHMARK_RANGE(Search_Parens_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
233 BENCHMARK_RANGE(Search_Parens_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
234 #ifdef USEPCRE
235 BENCHMARK_RANGE(Search_Parens_CachedPCRE,    8, 8)->ThreadRange(1, NumCPUs());
236 #endif
237 BENCHMARK_RANGE(Search_Parens_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
238 
SearchBigFixed(int iters,int nbytes,SearchImpl * search)239 void SearchBigFixed(int iters, int nbytes, SearchImpl* search) {
240   StopBenchmarkTiming();
241   string s;
242   s.append(nbytes/2, 'x');
243   string regexp = "^" + s + ".*$";
244   string t;
245   MakeText(&t, nbytes/2);
246   s += t;
247   BenchmarkMemoryUsage();
248   StartBenchmarkTiming();
249   search(iters, regexp.c_str(), s, Prog::kUnanchored, true);
250   SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
251 }
252 
Search_BigFixed_CachedDFA(int i,int n)253 void Search_BigFixed_CachedDFA(int i, int n)     { SearchBigFixed(i, n, SearchCachedDFA); }
Search_BigFixed_CachedNFA(int i,int n)254 void Search_BigFixed_CachedNFA(int i, int n)     { SearchBigFixed(i, n, SearchCachedNFA); }
Search_BigFixed_CachedPCRE(int i,int n)255 void Search_BigFixed_CachedPCRE(int i, int n)    { SearchBigFixed(i, n, SearchCachedPCRE); }
Search_BigFixed_CachedRE2(int i,int n)256 void Search_BigFixed_CachedRE2(int i, int n)     { SearchBigFixed(i, n, SearchCachedRE2); }
257 
258 BENCHMARK_RANGE(Search_BigFixed_CachedDFA,     8, 1<<20)->ThreadRange(1, NumCPUs());
259 BENCHMARK_RANGE(Search_BigFixed_CachedNFA,     8, 32<<10)->ThreadRange(1, NumCPUs());
260 #ifdef USEPCRE
261 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE,    8, 32<<10)->ThreadRange(1, NumCPUs());
262 #endif
263 BENCHMARK_RANGE(Search_BigFixed_CachedRE2,     8, 1<<20)->ThreadRange(1, NumCPUs());
264 
265 // Benchmark: FindAndConsume
FindAndConsume(int iters,int nbytes)266 void FindAndConsume(int iters, int nbytes) {
267   StopBenchmarkTiming();
268   string s;
269   MakeText(&s, nbytes);
270   s.append("Hello World");
271   StartBenchmarkTiming();
272   RE2 re("((Hello World))");
273   for (int i = 0; i < iters; i++) {
274     StringPiece t = s;
275     StringPiece u;
276     CHECK(RE2::FindAndConsume(&t, re, &u));
277     CHECK_EQ(u, "Hello World");
278   }
279   SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
280 }
281 
282 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
283 
284 // Benchmark: successful anchored search.
285 
SearchSuccess(int iters,int nbytes,const char * regexp,SearchImpl * search)286 void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search) {
287   string s;
288   MakeText(&s, nbytes);
289   BenchmarkMemoryUsage();
290   search(iters, regexp, s, Prog::kAnchored, true);
291   SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
292 }
293 
294 // Unambiguous search (RE2 can use OnePass).
295 
Search_Success_DFA(int i,int n)296 void Search_Success_DFA(int i, int n)     { SearchSuccess(i, n, ".*$", SearchDFA); }
Search_Success_OnePass(int i,int n)297 void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); }
Search_Success_PCRE(int i,int n)298 void Search_Success_PCRE(int i, int n)    { SearchSuccess(i, n, ".*$", SearchPCRE); }
Search_Success_RE2(int i,int n)299 void Search_Success_RE2(int i, int n)     { SearchSuccess(i, n, ".*$", SearchRE2); }
300 
301 BENCHMARK_RANGE(Search_Success_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
302 #ifdef USEPCRE
303 BENCHMARK_RANGE(Search_Success_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
304 #endif
305 BENCHMARK_RANGE(Search_Success_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
306 BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
307 
Search_Success_CachedDFA(int i,int n)308 void Search_Success_CachedDFA(int i, int n)     { SearchSuccess(i, n, ".*$", SearchCachedDFA); }
Search_Success_CachedOnePass(int i,int n)309 void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); }
Search_Success_CachedPCRE(int i,int n)310 void Search_Success_CachedPCRE(int i, int n)    { SearchSuccess(i, n, ".*$", SearchCachedPCRE); }
Search_Success_CachedRE2(int i,int n)311 void Search_Success_CachedRE2(int i, int n)     { SearchSuccess(i, n, ".*$", SearchCachedRE2); }
312 
313 BENCHMARK_RANGE(Search_Success_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
314 #ifdef USEPCRE
315 BENCHMARK_RANGE(Search_Success_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
316 #endif
317 BENCHMARK_RANGE(Search_Success_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
318 BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
319 
320 // Ambiguous search (RE2 cannot use OnePass).
321 
Search_Success1_DFA(int i,int n)322 void Search_Success1_DFA(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchDFA); }
Search_Success1_PCRE(int i,int n)323 void Search_Success1_PCRE(int i, int n)    { SearchSuccess(i, n, ".*.$", SearchPCRE); }
Search_Success1_RE2(int i,int n)324 void Search_Success1_RE2(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchRE2); }
Search_Success1_BitState(int i,int n)325 void Search_Success1_BitState(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchBitState); }
326 
327 BENCHMARK_RANGE(Search_Success1_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
328 #ifdef USEPCRE
329 BENCHMARK_RANGE(Search_Success1_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
330 #endif
331 BENCHMARK_RANGE(Search_Success1_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
332 BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
333 
Search_Success1_Cached_DFA(int i,int n)334 void Search_Success1_Cached_DFA(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchCachedDFA); }
Search_Success1_Cached_PCRE(int i,int n)335 void Search_Success1_Cached_PCRE(int i, int n)    { SearchSuccess(i, n, ".*.$", SearchCachedPCRE); }
Search_Success1_Cached_RE2(int i,int n)336 void Search_Success1_Cached_RE2(int i, int n)     { SearchSuccess(i, n, ".*.$", SearchCachedRE2); }
337 
338 BENCHMARK_RANGE(Search_Success1_Cached_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
339 #ifdef USEPCRE
340 BENCHMARK_RANGE(Search_Success1_Cached_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
341 #endif
342 BENCHMARK_RANGE(Search_Success1_Cached_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
343 
344 // Benchmark: use regexp to find phone number.
345 
SearchDigits(int iters,SearchImpl * search)346 void SearchDigits(int iters, SearchImpl* search) {
347   const char *text = "650-253-0001";
348   int len = strlen(text);
349   BenchmarkMemoryUsage();
350   search(iters, "([0-9]+)-([0-9]+)-([0-9]+)",
351          StringPiece(text, len), Prog::kAnchored, true);
352   SetBenchmarkItemsProcessed(iters);
353 }
354 
Search_Digits_DFA(int i)355 void Search_Digits_DFA(int i)         { SearchDigits(i, SearchDFA); }
Search_Digits_NFA(int i)356 void Search_Digits_NFA(int i)         { SearchDigits(i, SearchNFA); }
Search_Digits_OnePass(int i)357 void Search_Digits_OnePass(int i)     { SearchDigits(i, SearchOnePass); }
Search_Digits_PCRE(int i)358 void Search_Digits_PCRE(int i)        { SearchDigits(i, SearchPCRE); }
Search_Digits_RE2(int i)359 void Search_Digits_RE2(int i)         { SearchDigits(i, SearchRE2); }
Search_Digits_BitState(int i)360 void Search_Digits_BitState(int i)         { SearchDigits(i, SearchBitState); }
361 
362 BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
363 BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
364 BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
365 #ifdef USEPCRE
366 BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
367 #endif
368 BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
369 BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
370 
371 // Benchmark: use regexp to parse digit fields in phone number.
372 
Parse3Digits(int iters,void (* parse3)(int,const char *,const StringPiece &))373 void Parse3Digits(int iters,
374                void (*parse3)(int, const char*, const StringPiece&)) {
375   BenchmarkMemoryUsage();
376   parse3(iters, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
377   SetBenchmarkItemsProcessed(iters);
378 }
379 
Parse_Digits_NFA(int i)380 void Parse_Digits_NFA(int i)         { Parse3Digits(i, Parse3NFA); }
Parse_Digits_OnePass(int i)381 void Parse_Digits_OnePass(int i)     { Parse3Digits(i, Parse3OnePass); }
Parse_Digits_PCRE(int i)382 void Parse_Digits_PCRE(int i)        { Parse3Digits(i, Parse3PCRE); }
Parse_Digits_RE2(int i)383 void Parse_Digits_RE2(int i)         { Parse3Digits(i, Parse3RE2); }
Parse_Digits_Backtrack(int i)384 void Parse_Digits_Backtrack(int i)   { Parse3Digits(i, Parse3Backtrack); }
Parse_Digits_BitState(int i)385 void Parse_Digits_BitState(int i)   { Parse3Digits(i, Parse3BitState); }
386 
387 BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
388 BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
389 #ifdef USEPCRE
390 BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
391 #endif
392 BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
393 BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
394 BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
395 
Parse_CachedDigits_NFA(int i)396 void Parse_CachedDigits_NFA(int i)         { Parse3Digits(i, Parse3CachedNFA); }
Parse_CachedDigits_OnePass(int i)397 void Parse_CachedDigits_OnePass(int i)     { Parse3Digits(i, Parse3CachedOnePass); }
Parse_CachedDigits_PCRE(int i)398 void Parse_CachedDigits_PCRE(int i)        { Parse3Digits(i, Parse3CachedPCRE); }
Parse_CachedDigits_RE2(int i)399 void Parse_CachedDigits_RE2(int i)         { Parse3Digits(i, Parse3CachedRE2); }
Parse_CachedDigits_Backtrack(int i)400 void Parse_CachedDigits_Backtrack(int i)   { Parse3Digits(i, Parse3CachedBacktrack); }
Parse_CachedDigits_BitState(int i)401 void Parse_CachedDigits_BitState(int i)   { Parse3Digits(i, Parse3CachedBitState); }
402 
403 BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
404 BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
405 #ifdef USEPCRE
406 BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
407 #endif
408 BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
409 BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
410 BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
411 
Parse3DigitDs(int iters,void (* parse3)(int,const char *,const StringPiece &))412 void Parse3DigitDs(int iters,
413                void (*parse3)(int, const char*, const StringPiece&)) {
414   BenchmarkMemoryUsage();
415   parse3(iters, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
416   SetBenchmarkItemsProcessed(iters);
417 }
418 
Parse_DigitDs_NFA(int i)419 void Parse_DigitDs_NFA(int i)         { Parse3DigitDs(i, Parse3NFA); }
Parse_DigitDs_OnePass(int i)420 void Parse_DigitDs_OnePass(int i)     { Parse3DigitDs(i, Parse3OnePass); }
Parse_DigitDs_PCRE(int i)421 void Parse_DigitDs_PCRE(int i)        { Parse3DigitDs(i, Parse3PCRE); }
Parse_DigitDs_RE2(int i)422 void Parse_DigitDs_RE2(int i)         { Parse3DigitDs(i, Parse3RE2); }
Parse_DigitDs_Backtrack(int i)423 void Parse_DigitDs_Backtrack(int i)   { Parse3DigitDs(i, Parse3CachedBacktrack); }
Parse_DigitDs_BitState(int i)424 void Parse_DigitDs_BitState(int i)   { Parse3DigitDs(i, Parse3CachedBitState); }
425 
426 BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
427 BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
428 #ifdef USEPCRE
429 BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
430 #endif
431 BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
432 BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
433 BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
434 
Parse_CachedDigitDs_NFA(int i)435 void Parse_CachedDigitDs_NFA(int i)         { Parse3DigitDs(i, Parse3CachedNFA); }
Parse_CachedDigitDs_OnePass(int i)436 void Parse_CachedDigitDs_OnePass(int i)     { Parse3DigitDs(i, Parse3CachedOnePass); }
Parse_CachedDigitDs_PCRE(int i)437 void Parse_CachedDigitDs_PCRE(int i)        { Parse3DigitDs(i, Parse3CachedPCRE); }
Parse_CachedDigitDs_RE2(int i)438 void Parse_CachedDigitDs_RE2(int i)         { Parse3DigitDs(i, Parse3CachedRE2); }
Parse_CachedDigitDs_Backtrack(int i)439 void Parse_CachedDigitDs_Backtrack(int i)   { Parse3DigitDs(i, Parse3CachedBacktrack); }
Parse_CachedDigitDs_BitState(int i)440 void Parse_CachedDigitDs_BitState(int i)   { Parse3DigitDs(i, Parse3CachedBitState); }
441 
442 BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
443 BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
444 #ifdef USEPCRE
445 BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
446 #endif
447 BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
448 BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
449 BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
450 
451 // Benchmark: splitting off leading number field.
452 
Parse1Split(int iters,void (* parse1)(int,const char *,const StringPiece &))453 void Parse1Split(int iters,
454               void (*parse1)(int, const char*, const StringPiece&)) {
455   BenchmarkMemoryUsage();
456   parse1(iters, "[0-9]+-(.*)", "650-253-0001");
457   SetBenchmarkItemsProcessed(iters);
458 }
459 
Parse_Split_NFA(int i)460 void Parse_Split_NFA(int i)         { Parse1Split(i, Parse1NFA); }
Parse_Split_OnePass(int i)461 void Parse_Split_OnePass(int i)     { Parse1Split(i, Parse1OnePass); }
Parse_Split_PCRE(int i)462 void Parse_Split_PCRE(int i)        { Parse1Split(i, Parse1PCRE); }
Parse_Split_RE2(int i)463 void Parse_Split_RE2(int i)         { Parse1Split(i, Parse1RE2); }
Parse_Split_BitState(int i)464 void Parse_Split_BitState(int i)         { Parse1Split(i, Parse1BitState); }
465 
466 BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
467 BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
468 #ifdef USEPCRE
469 BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
470 #endif
471 BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
472 BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
473 
Parse_CachedSplit_NFA(int i)474 void Parse_CachedSplit_NFA(int i)         { Parse1Split(i, Parse1CachedNFA); }
Parse_CachedSplit_OnePass(int i)475 void Parse_CachedSplit_OnePass(int i)     { Parse1Split(i, Parse1CachedOnePass); }
Parse_CachedSplit_PCRE(int i)476 void Parse_CachedSplit_PCRE(int i)        { Parse1Split(i, Parse1CachedPCRE); }
Parse_CachedSplit_RE2(int i)477 void Parse_CachedSplit_RE2(int i)         { Parse1Split(i, Parse1CachedRE2); }
Parse_CachedSplit_BitState(int i)478 void Parse_CachedSplit_BitState(int i)         { Parse1Split(i, Parse1CachedBitState); }
479 
480 BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
481 BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
482 #ifdef USEPCRE
483 BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
484 #endif
485 BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
486 BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
487 
488 // Benchmark: splitting off leading number field but harder (ambiguous regexp).
489 
Parse1SplitHard(int iters,void (* run)(int,const char *,const StringPiece &))490 void Parse1SplitHard(int iters,
491                   void (*run)(int, const char*, const StringPiece&)) {
492   BenchmarkMemoryUsage();
493   run(iters, "[0-9]+.(.*)", "650-253-0001");
494   SetBenchmarkItemsProcessed(iters);
495 }
496 
Parse_SplitHard_NFA(int i)497 void Parse_SplitHard_NFA(int i)         { Parse1SplitHard(i, Parse1NFA); }
Parse_SplitHard_PCRE(int i)498 void Parse_SplitHard_PCRE(int i)        { Parse1SplitHard(i, Parse1PCRE); }
Parse_SplitHard_RE2(int i)499 void Parse_SplitHard_RE2(int i)         { Parse1SplitHard(i, Parse1RE2); }
Parse_SplitHard_BitState(int i)500 void Parse_SplitHard_BitState(int i)         { Parse1SplitHard(i, Parse1BitState); }
501 
502 #ifdef USEPCRE
503 BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
504 #endif
505 BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
506 BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
507 BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
508 
Parse_CachedSplitHard_NFA(int i)509 void Parse_CachedSplitHard_NFA(int i)       { Parse1SplitHard(i, Parse1CachedNFA); }
Parse_CachedSplitHard_PCRE(int i)510 void Parse_CachedSplitHard_PCRE(int i)      { Parse1SplitHard(i, Parse1CachedPCRE); }
Parse_CachedSplitHard_RE2(int i)511 void Parse_CachedSplitHard_RE2(int i)       { Parse1SplitHard(i, Parse1CachedRE2); }
Parse_CachedSplitHard_BitState(int i)512 void Parse_CachedSplitHard_BitState(int i)       { Parse1SplitHard(i, Parse1CachedBitState); }
Parse_CachedSplitHard_Backtrack(int i)513 void Parse_CachedSplitHard_Backtrack(int i)       { Parse1SplitHard(i, Parse1CachedBacktrack); }
514 
515 #ifdef USEPCRE
516 BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
517 #endif
518 BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
519 BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
520 BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
521 BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
522 
523 // Benchmark: Parse1SplitHard, big text, small match.
524 
Parse1SplitBig1(int iters,void (* run)(int,const char *,const StringPiece &))525 void Parse1SplitBig1(int iters,
526                   void (*run)(int, const char*, const StringPiece&)) {
527   string s;
528   s.append(100000, 'x');
529   s.append("650-253-0001");
530   BenchmarkMemoryUsage();
531   run(iters, "[0-9]+.(.*)", s);
532   SetBenchmarkItemsProcessed(iters);
533 }
534 
Parse_CachedSplitBig1_PCRE(int i)535 void Parse_CachedSplitBig1_PCRE(int i)      { Parse1SplitBig1(i, SearchParse1CachedPCRE); }
Parse_CachedSplitBig1_RE2(int i)536 void Parse_CachedSplitBig1_RE2(int i)       { Parse1SplitBig1(i, SearchParse1CachedRE2); }
537 
538 #ifdef USEPCRE
539 BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
540 #endif
541 BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
542 
543 // Benchmark: Parse1SplitHard, big text, big match.
544 
Parse1SplitBig2(int iters,void (* run)(int,const char *,const StringPiece &))545 void Parse1SplitBig2(int iters,
546                   void (*run)(int, const char*, const StringPiece&)) {
547   string s;
548   s.append("650-253-");
549   s.append(100000, '0');
550   BenchmarkMemoryUsage();
551   run(iters, "[0-9]+.(.*)", s);
552   SetBenchmarkItemsProcessed(iters);
553 }
554 
Parse_CachedSplitBig2_PCRE(int i)555 void Parse_CachedSplitBig2_PCRE(int i)      { Parse1SplitBig2(i, SearchParse1CachedPCRE); }
Parse_CachedSplitBig2_RE2(int i)556 void Parse_CachedSplitBig2_RE2(int i)       { Parse1SplitBig2(i, SearchParse1CachedRE2); }
557 
558 #ifdef USEPCRE
559 BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
560 #endif
561 BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
562 
563 // Benchmark: measure time required to parse (but not execute)
564 // a simple regular expression.
565 
ParseRegexp(int iters,const string & regexp)566 void ParseRegexp(int iters, const string& regexp) {
567   for (int i = 0; i < iters; i++) {
568     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
569     CHECK(re);
570     re->Decref();
571   }
572 }
573 
SimplifyRegexp(int iters,const string & regexp)574 void SimplifyRegexp(int iters, const string& regexp) {
575   for (int i = 0; i < iters; i++) {
576     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
577     CHECK(re);
578     Regexp* sre = re->Simplify();
579     CHECK(sre);
580     sre->Decref();
581     re->Decref();
582   }
583 }
584 
NullWalkRegexp(int iters,const string & regexp)585 void NullWalkRegexp(int iters, const string& regexp) {
586   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
587   CHECK(re);
588   for (int i = 0; i < iters; i++) {
589     re->NullWalk();
590   }
591   re->Decref();
592 }
593 
SimplifyCompileRegexp(int iters,const string & regexp)594 void SimplifyCompileRegexp(int iters, const string& regexp) {
595   for (int i = 0; i < iters; i++) {
596     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
597     CHECK(re);
598     Regexp* sre = re->Simplify();
599     CHECK(sre);
600     Prog* prog = sre->CompileToProg(0);
601     CHECK(prog);
602     delete prog;
603     sre->Decref();
604     re->Decref();
605   }
606 }
607 
CompileRegexp(int iters,const string & regexp)608 void CompileRegexp(int iters, const string& regexp) {
609   for (int i = 0; i < iters; i++) {
610     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
611     CHECK(re);
612     Prog* prog = re->CompileToProg(0);
613     CHECK(prog);
614     delete prog;
615     re->Decref();
616   }
617 }
618 
CompileToProg(int iters,const string & regexp)619 void CompileToProg(int iters, const string& regexp) {
620   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
621   CHECK(re);
622   for (int i = 0; i < iters; i++) {
623     Prog* prog = re->CompileToProg(0);
624     CHECK(prog);
625     delete prog;
626   }
627   re->Decref();
628 }
629 
CompileByteMap(int iters,const string & regexp)630 void CompileByteMap(int iters, const string& regexp) {
631   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
632   CHECK(re);
633   Prog* prog = re->CompileToProg(0);
634   CHECK(prog);
635   for (int i = 0; i < iters; i++) {
636     prog->ComputeByteMap();
637   }
638   delete prog;
639   re->Decref();
640 }
641 
CompilePCRE(int iters,const string & regexp)642 void CompilePCRE(int iters, const string& regexp) {
643   for (int i = 0; i < iters; i++) {
644     PCRE re(regexp, PCRE::UTF8);
645     CHECK_EQ(re.error(), "");
646   }
647 }
648 
CompileRE2(int iters,const string & regexp)649 void CompileRE2(int iters, const string& regexp) {
650   for (int i = 0; i < iters; i++) {
651     RE2 re(regexp);
652     CHECK_EQ(re.error(), "");
653   }
654 }
655 
RunBuild(int iters,const string & regexp,void (* run)(int,const string &))656 void RunBuild(int iters, const string& regexp, void (*run)(int, const string&)) {
657   run(iters, regexp);
658   SetBenchmarkItemsProcessed(iters);
659 }
660 
661 }  // namespace re2
662 
663 DEFINE_string(compile_regexp, "(.*)-(\\d+)-of-(\\d+)", "regexp for compile benchmarks");
664 
665 namespace re2 {
666 
BM_PCRE_Compile(int i)667 void BM_PCRE_Compile(int i)      { RunBuild(i, FLAGS_compile_regexp, CompilePCRE); }
BM_Regexp_Parse(int i)668 void BM_Regexp_Parse(int i)      { RunBuild(i, FLAGS_compile_regexp, ParseRegexp); }
BM_Regexp_Simplify(int i)669 void BM_Regexp_Simplify(int i)   { RunBuild(i, FLAGS_compile_regexp, SimplifyRegexp); }
BM_CompileToProg(int i)670 void BM_CompileToProg(int i)     { RunBuild(i, FLAGS_compile_regexp, CompileToProg); }
BM_CompileByteMap(int i)671 void BM_CompileByteMap(int i)     { RunBuild(i, FLAGS_compile_regexp, CompileByteMap); }
BM_Regexp_Compile(int i)672 void BM_Regexp_Compile(int i)    { RunBuild(i, FLAGS_compile_regexp, CompileRegexp); }
BM_Regexp_SimplifyCompile(int i)673 void BM_Regexp_SimplifyCompile(int i)   { RunBuild(i, FLAGS_compile_regexp, SimplifyCompileRegexp); }
BM_Regexp_NullWalk(int i)674 void BM_Regexp_NullWalk(int i)   { RunBuild(i, FLAGS_compile_regexp, NullWalkRegexp); }
BM_RE2_Compile(int i)675 void BM_RE2_Compile(int i)       { RunBuild(i, FLAGS_compile_regexp, CompileRE2); }
676 
677 #ifdef USEPCRE
678 BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
679 #endif
680 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
681 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
682 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
683 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
684 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
685 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
686 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
687 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
688 
689 
690 // Makes text of size nbytes, then calls run to search
691 // the text for regexp iters times.
SearchPhone(int iters,int nbytes,ParseImpl * search)692 void SearchPhone(int iters, int nbytes, ParseImpl* search) {
693   StopBenchmarkTiming();
694   string s;
695   MakeText(&s, nbytes);
696   s.append("(650) 253-0001");
697   BenchmarkMemoryUsage();
698   StartBenchmarkTiming();
699   search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
700   SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
701 }
702 
SearchPhone_CachedPCRE(int i,int n)703 void SearchPhone_CachedPCRE(int i, int n) {
704   SearchPhone(i, n, SearchParse2CachedPCRE);
705 }
SearchPhone_CachedRE2(int i,int n)706 void SearchPhone_CachedRE2(int i, int n) {
707   SearchPhone(i, n, SearchParse2CachedRE2);
708 }
709 
710 #ifdef USEPCRE
711 BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
712 #endif
713 BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
714 
715 /*
716 TODO(rsc): Make this work again.
717 
718 // Generates and returns a string over binary alphabet {0,1} that contains
719 // all possible binary sequences of length n as subsequences.  The obvious
720 // brute force method would generate a string of length n * 2^n, but this
721 // generates a string of length n + 2^n - 1 called a De Bruijn cycle.
722 // See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
723 static string DeBruijnString(int n) {
724   CHECK_LT(n, 8*sizeof(int));
725   CHECK_GT(n, 0);
726 
727   vector<bool> did(1<<n);
728   for (int i = 0; i < 1<<n; i++)
729     did[i] = false;
730 
731   string s;
732   for (int i = 0; i < n-1; i++)
733     s.append("0");
734   int bits = 0;
735   int mask = (1<<n) - 1;
736   for (int i = 0; i < (1<<n); i++) {
737     bits <<= 1;
738     bits &= mask;
739     if (!did[bits|1]) {
740       bits |= 1;
741       s.append("1");
742     } else {
743       s.append("0");
744     }
745     CHECK(!did[bits]);
746     did[bits] = true;
747   }
748   return s;
749 }
750 
751 void CacheFill(int iters, int n, SearchImpl *srch) {
752   string s = DeBruijnString(n+1);
753   string t;
754   for (int i = n+1; i < 20; i++) {
755     t = s + s;
756     swap(s, t);
757   }
758   srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
759        Prog::kUnanchored, true);
760   SetBenchmarkBytesProcessed(static_cast<int64>(iters)*s.size());
761 }
762 
763 void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
764 void CacheFillRE2(int i, int n)  { CacheFill(i, n, SearchCachedRE2); }
765 void CacheFillNFA(int i, int n)  { CacheFill(i, n, SearchCachedNFA); }
766 void CacheFillDFA(int i, int n)  { CacheFill(i, n, SearchCachedDFA); }
767 
768 // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
769 // for the static BenchmarkRegisterer, which makes it unusable inside
770 // a macro like DO24 below.  MY_BENCHMARK_WITH_ARG uses the argument a
771 // to make the identifiers distinct (only possible when 'a' is a simple
772 // expression like 2, not like 1+1).
773 #define MY_BENCHMARK_WITH_ARG(n, a) \
774   bool __benchmark_ ## n ## a =     \
775     (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
776 
777 #define DO24(A, B) \
778   A(B, 1);    A(B, 2);    A(B, 3);    A(B, 4);    A(B, 5);    A(B, 6);  \
779   A(B, 7);    A(B, 8);    A(B, 9);    A(B, 10);   A(B, 11);   A(B, 12); \
780   A(B, 13);   A(B, 14);   A(B, 15);   A(B, 16);   A(B, 17);   A(B, 18); \
781   A(B, 19);   A(B, 20);   A(B, 21);   A(B, 22);   A(B, 23);   A(B, 24);
782 
783 DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
784 DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
785 DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
786 DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
787 
788 #undef DO24
789 #undef MY_BENCHMARK_WITH_ARG
790 */
791 
792 ////////////////////////////////////////////////////////////////////////
793 //
794 // Implementation routines.  Sad that there are so many,
795 // but all the interfaces are slightly different.
796 
797 // Runs implementation to search for regexp in text, iters times.
798 // Expect_match says whether the regexp should be found.
799 // Anchored says whether to run an anchored search.
800 
SearchDFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)801 void SearchDFA(int iters, const char* regexp, const StringPiece& text,
802             Prog::Anchor anchor, bool expect_match) {
803   for (int i = 0; i < iters; i++) {
804     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
805     CHECK(re);
806     Prog* prog = re->CompileToProg(0);
807     CHECK(prog);
808     bool failed = false;
809     CHECK_EQ(prog->SearchDFA(text, NULL, anchor, Prog::kFirstMatch,
810                              NULL, &failed, NULL),
811              expect_match);
812     CHECK(!failed);
813     delete prog;
814     re->Decref();
815   }
816 }
817 
SearchNFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)818 void SearchNFA(int iters, const char* regexp, const StringPiece& text,
819             Prog::Anchor anchor, bool expect_match) {
820   for (int i = 0; i < iters; i++) {
821     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
822     CHECK(re);
823     Prog* prog = re->CompileToProg(0);
824     CHECK(prog);
825     CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0),
826              expect_match);
827     delete prog;
828     re->Decref();
829   }
830 }
831 
SearchOnePass(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)832 void SearchOnePass(int iters, const char* regexp, const StringPiece& text,
833             Prog::Anchor anchor, bool expect_match) {
834   for (int i = 0; i < iters; i++) {
835     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
836     CHECK(re);
837     Prog* prog = re->CompileToProg(0);
838     CHECK(prog);
839     CHECK(prog->IsOnePass());
840     CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
841              expect_match);
842     delete prog;
843     re->Decref();
844   }
845 }
846 
SearchBitState(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)847 void SearchBitState(int iters, const char* regexp, const StringPiece& text,
848             Prog::Anchor anchor, bool expect_match) {
849   for (int i = 0; i < iters; i++) {
850     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
851     CHECK(re);
852     Prog* prog = re->CompileToProg(0);
853     CHECK(prog);
854     CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
855              expect_match);
856     delete prog;
857     re->Decref();
858   }
859 }
860 
SearchPCRE(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)861 void SearchPCRE(int iters, const char* regexp, const StringPiece& text,
862                 Prog::Anchor anchor, bool expect_match) {
863   for (int i = 0; i < iters; i++) {
864     PCRE re(regexp, PCRE::UTF8);
865     CHECK_EQ(re.error(), "");
866     if (anchor == Prog::kAnchored)
867       CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
868     else
869       CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
870   }
871 }
872 
SearchRE2(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)873 void SearchRE2(int iters, const char* regexp, const StringPiece& text,
874                Prog::Anchor anchor, bool expect_match) {
875   for (int i = 0; i < iters; i++) {
876     RE2 re(regexp);
877     CHECK_EQ(re.error(), "");
878     if (anchor == Prog::kAnchored)
879       CHECK_EQ(RE2::FullMatch(text, re), expect_match);
880     else
881       CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
882   }
883 }
884 
885 // SearchCachedXXX is like SearchXXX but only does the
886 // regexp parsing and compiling once.  This lets us measure
887 // search time without the per-regexp overhead.
888 
SearchCachedDFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)889 void SearchCachedDFA(int iters, const char* regexp, const StringPiece& text,
890                      Prog::Anchor anchor, bool expect_match) {
891   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
892   CHECK(re);
893   Prog* prog = re->CompileToProg(1LL<<31);
894   CHECK(prog);
895   for (int i = 0; i < iters; i++) {
896     bool failed = false;
897     CHECK_EQ(prog->SearchDFA(text, NULL, anchor,
898                              Prog::kFirstMatch, NULL, &failed, NULL),
899              expect_match);
900     CHECK(!failed);
901   }
902   delete prog;
903   re->Decref();
904 }
905 
SearchCachedNFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)906 void SearchCachedNFA(int iters, const char* regexp, const StringPiece& text,
907                      Prog::Anchor anchor, bool expect_match) {
908   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
909   CHECK(re);
910   Prog* prog = re->CompileToProg(0);
911   CHECK(prog);
912   for (int i = 0; i < iters; i++) {
913     CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0),
914              expect_match);
915   }
916   delete prog;
917   re->Decref();
918 }
919 
SearchCachedOnePass(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)920 void SearchCachedOnePass(int iters, const char* regexp, const StringPiece& text,
921                      Prog::Anchor anchor, bool expect_match) {
922   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
923   CHECK(re);
924   Prog* prog = re->CompileToProg(0);
925   CHECK(prog);
926   CHECK(prog->IsOnePass());
927   for (int i = 0; i < iters; i++)
928     CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
929              expect_match);
930   delete prog;
931   re->Decref();
932 }
933 
SearchCachedBitState(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)934 void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text,
935                      Prog::Anchor anchor, bool expect_match) {
936   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
937   CHECK(re);
938   Prog* prog = re->CompileToProg(0);
939   CHECK(prog);
940   for (int i = 0; i < iters; i++)
941     CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
942              expect_match);
943   delete prog;
944   re->Decref();
945 }
946 
SearchCachedPCRE(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)947 void SearchCachedPCRE(int iters, const char* regexp, const StringPiece& text,
948                      Prog::Anchor anchor, bool expect_match) {
949   PCRE re(regexp, PCRE::UTF8);
950   CHECK_EQ(re.error(), "");
951   for (int i = 0; i < iters; i++) {
952     if (anchor == Prog::kAnchored)
953       CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
954     else
955       CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
956   }
957 }
958 
SearchCachedRE2(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)959 void SearchCachedRE2(int iters, const char* regexp, const StringPiece& text,
960                      Prog::Anchor anchor, bool expect_match) {
961   RE2 re(regexp);
962   CHECK_EQ(re.error(), "");
963   for (int i = 0; i < iters; i++) {
964     if (anchor == Prog::kAnchored)
965       CHECK_EQ(RE2::FullMatch(text, re), expect_match);
966     else
967       CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
968   }
969 }
970 
971 
972 // Runs implementation to full match regexp against text,
973 // extracting three submatches.  Expects match always.
974 
Parse3NFA(int iters,const char * regexp,const StringPiece & text)975 void Parse3NFA(int iters, const char* regexp, const StringPiece& text) {
976   for (int i = 0; i < iters; i++) {
977     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
978     CHECK(re);
979     Prog* prog = re->CompileToProg(0);
980     CHECK(prog);
981     StringPiece sp[4];  // 4 because sp[0] is whole match.
982     CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4));
983     delete prog;
984     re->Decref();
985   }
986 }
987 
Parse3OnePass(int iters,const char * regexp,const StringPiece & text)988 void Parse3OnePass(int iters, const char* regexp, const StringPiece& text) {
989   for (int i = 0; i < iters; i++) {
990     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
991     CHECK(re);
992     Prog* prog = re->CompileToProg(0);
993     CHECK(prog);
994     CHECK(prog->IsOnePass());
995     StringPiece sp[4];  // 4 because sp[0] is whole match.
996     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
997     delete prog;
998     re->Decref();
999   }
1000 }
1001 
Parse3BitState(int iters,const char * regexp,const StringPiece & text)1002 void Parse3BitState(int iters, const char* regexp, const StringPiece& text) {
1003   for (int i = 0; i < iters; i++) {
1004     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1005     CHECK(re);
1006     Prog* prog = re->CompileToProg(0);
1007     CHECK(prog);
1008     StringPiece sp[4];  // 4 because sp[0] is whole match.
1009     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1010     delete prog;
1011     re->Decref();
1012   }
1013 }
1014 
Parse3Backtrack(int iters,const char * regexp,const StringPiece & text)1015 void Parse3Backtrack(int iters, const char* regexp, const StringPiece& text) {
1016   for (int i = 0; i < iters; i++) {
1017     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1018     CHECK(re);
1019     Prog* prog = re->CompileToProg(0);
1020     CHECK(prog);
1021     StringPiece sp[4];  // 4 because sp[0] is whole match.
1022     CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1023     delete prog;
1024     re->Decref();
1025   }
1026 }
1027 
Parse3PCRE(int iters,const char * regexp,const StringPiece & text)1028 void Parse3PCRE(int iters, const char* regexp, const StringPiece& text) {
1029   for (int i = 0; i < iters; i++) {
1030     PCRE re(regexp, PCRE::UTF8);
1031     CHECK_EQ(re.error(), "");
1032     StringPiece sp1, sp2, sp3;
1033     CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1034   }
1035 }
1036 
Parse3RE2(int iters,const char * regexp,const StringPiece & text)1037 void Parse3RE2(int iters, const char* regexp, const StringPiece& text) {
1038   for (int i = 0; i < iters; i++) {
1039     RE2 re(regexp);
1040     CHECK_EQ(re.error(), "");
1041     StringPiece sp1, sp2, sp3;
1042     CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1043   }
1044 }
1045 
Parse3CachedNFA(int iters,const char * regexp,const StringPiece & text)1046 void Parse3CachedNFA(int iters, const char* regexp, const StringPiece& text) {
1047   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1048   CHECK(re);
1049   Prog* prog = re->CompileToProg(0);
1050   CHECK(prog);
1051   StringPiece sp[4];  // 4 because sp[0] is whole match.
1052   for (int i = 0; i < iters; i++) {
1053     CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1054   }
1055   delete prog;
1056   re->Decref();
1057 }
1058 
Parse3CachedOnePass(int iters,const char * regexp,const StringPiece & text)1059 void Parse3CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
1060   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1061   CHECK(re);
1062   Prog* prog = re->CompileToProg(0);
1063   CHECK(prog);
1064   CHECK(prog->IsOnePass());
1065   StringPiece sp[4];  // 4 because sp[0] is whole match.
1066   for (int i = 0; i < iters; i++)
1067     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1068   delete prog;
1069   re->Decref();
1070 }
1071 
Parse3CachedBitState(int iters,const char * regexp,const StringPiece & text)1072 void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text) {
1073   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1074   CHECK(re);
1075   Prog* prog = re->CompileToProg(0);
1076   CHECK(prog);
1077   StringPiece sp[4];  // 4 because sp[0] is whole match.
1078   for (int i = 0; i < iters; i++)
1079     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1080   delete prog;
1081   re->Decref();
1082 }
1083 
Parse3CachedBacktrack(int iters,const char * regexp,const StringPiece & text)1084 void Parse3CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
1085   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1086   CHECK(re);
1087   Prog* prog = re->CompileToProg(0);
1088   CHECK(prog);
1089   StringPiece sp[4];  // 4 because sp[0] is whole match.
1090   for (int i = 0; i < iters; i++)
1091     CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1092   delete prog;
1093   re->Decref();
1094 }
1095 
Parse3CachedPCRE(int iters,const char * regexp,const StringPiece & text)1096 void Parse3CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
1097   PCRE re(regexp, PCRE::UTF8);
1098   CHECK_EQ(re.error(), "");
1099   StringPiece sp1, sp2, sp3;
1100   for (int i = 0; i < iters; i++) {
1101     CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1102   }
1103 }
1104 
Parse3CachedRE2(int iters,const char * regexp,const StringPiece & text)1105 void Parse3CachedRE2(int iters, const char* regexp, const StringPiece& text) {
1106   RE2 re(regexp);
1107   CHECK_EQ(re.error(), "");
1108   StringPiece sp1, sp2, sp3;
1109   for (int i = 0; i < iters; i++) {
1110     CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1111   }
1112 }
1113 
1114 
1115 // Runs implementation to full match regexp against text,
1116 // extracting three submatches.  Expects match always.
1117 
Parse1NFA(int iters,const char * regexp,const StringPiece & text)1118 void Parse1NFA(int iters, const char* regexp, const StringPiece& text) {
1119   for (int i = 0; i < iters; i++) {
1120     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1121     CHECK(re);
1122     Prog* prog = re->CompileToProg(0);
1123     CHECK(prog);
1124     StringPiece sp[2];  // 2 because sp[0] is whole match.
1125     CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1126     delete prog;
1127     re->Decref();
1128   }
1129 }
1130 
Parse1OnePass(int iters,const char * regexp,const StringPiece & text)1131 void Parse1OnePass(int iters, const char* regexp, const StringPiece& text) {
1132   for (int i = 0; i < iters; i++) {
1133     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1134     CHECK(re);
1135     Prog* prog = re->CompileToProg(0);
1136     CHECK(prog);
1137     CHECK(prog->IsOnePass());
1138     StringPiece sp[2];  // 2 because sp[0] is whole match.
1139     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1140     delete prog;
1141     re->Decref();
1142   }
1143 }
1144 
Parse1BitState(int iters,const char * regexp,const StringPiece & text)1145 void Parse1BitState(int iters, const char* regexp, const StringPiece& text) {
1146   for (int i = 0; i < iters; i++) {
1147     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1148     CHECK(re);
1149     Prog* prog = re->CompileToProg(0);
1150     CHECK(prog);
1151     StringPiece sp[2];  // 2 because sp[0] is whole match.
1152     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1153     delete prog;
1154     re->Decref();
1155   }
1156 }
1157 
Parse1PCRE(int iters,const char * regexp,const StringPiece & text)1158 void Parse1PCRE(int iters, const char* regexp, const StringPiece& text) {
1159   for (int i = 0; i < iters; i++) {
1160     PCRE re(regexp, PCRE::UTF8);
1161     CHECK_EQ(re.error(), "");
1162     StringPiece sp1;
1163     CHECK(PCRE::FullMatch(text, re, &sp1));
1164   }
1165 }
1166 
Parse1RE2(int iters,const char * regexp,const StringPiece & text)1167 void Parse1RE2(int iters, const char* regexp, const StringPiece& text) {
1168   for (int i = 0; i < iters; i++) {
1169     RE2 re(regexp);
1170     CHECK_EQ(re.error(), "");
1171     StringPiece sp1;
1172     CHECK(RE2::FullMatch(text, re, &sp1));
1173   }
1174 }
1175 
Parse1CachedNFA(int iters,const char * regexp,const StringPiece & text)1176 void Parse1CachedNFA(int iters, const char* regexp, const StringPiece& text) {
1177   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1178   CHECK(re);
1179   Prog* prog = re->CompileToProg(0);
1180   CHECK(prog);
1181   StringPiece sp[2];  // 2 because sp[0] is whole match.
1182   for (int i = 0; i < iters; i++) {
1183     CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1184   }
1185   delete prog;
1186   re->Decref();
1187 }
1188 
Parse1CachedOnePass(int iters,const char * regexp,const StringPiece & text)1189 void Parse1CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
1190   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1191   CHECK(re);
1192   Prog* prog = re->CompileToProg(0);
1193   CHECK(prog);
1194   CHECK(prog->IsOnePass());
1195   StringPiece sp[2];  // 2 because sp[0] is whole match.
1196   for (int i = 0; i < iters; i++)
1197     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1198   delete prog;
1199   re->Decref();
1200 }
1201 
Parse1CachedBitState(int iters,const char * regexp,const StringPiece & text)1202 void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text) {
1203   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1204   CHECK(re);
1205   Prog* prog = re->CompileToProg(0);
1206   CHECK(prog);
1207   StringPiece sp[2];  // 2 because sp[0] is whole match.
1208   for (int i = 0; i < iters; i++)
1209     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1210   delete prog;
1211   re->Decref();
1212 }
1213 
Parse1CachedBacktrack(int iters,const char * regexp,const StringPiece & text)1214 void Parse1CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
1215   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1216   CHECK(re);
1217   Prog* prog = re->CompileToProg(0);
1218   CHECK(prog);
1219   StringPiece sp[2];  // 2 because sp[0] is whole match.
1220   for (int i = 0; i < iters; i++)
1221     CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1222   delete prog;
1223   re->Decref();
1224 }
1225 
Parse1CachedPCRE(int iters,const char * regexp,const StringPiece & text)1226 void Parse1CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
1227   PCRE re(regexp, PCRE::UTF8);
1228   CHECK_EQ(re.error(), "");
1229   StringPiece sp1;
1230   for (int i = 0; i < iters; i++) {
1231     CHECK(PCRE::FullMatch(text, re, &sp1));
1232   }
1233 }
1234 
Parse1CachedRE2(int iters,const char * regexp,const StringPiece & text)1235 void Parse1CachedRE2(int iters, const char* regexp, const StringPiece& text) {
1236   RE2 re(regexp);
1237   CHECK_EQ(re.error(), "");
1238   StringPiece sp1;
1239   for (int i = 0; i < iters; i++) {
1240     CHECK(RE2::FullMatch(text, re, &sp1));
1241   }
1242 }
1243 
SearchParse2CachedPCRE(int iters,const char * regexp,const StringPiece & text)1244 void SearchParse2CachedPCRE(int iters, const char* regexp,
1245                             const StringPiece& text) {
1246   PCRE re(regexp, PCRE::UTF8);
1247   CHECK_EQ(re.error(), "");
1248   for (int i = 0; i < iters; i++) {
1249     StringPiece sp1, sp2;
1250     CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
1251   }
1252 }
1253 
SearchParse2CachedRE2(int iters,const char * regexp,const StringPiece & text)1254 void SearchParse2CachedRE2(int iters, const char* regexp,
1255                            const StringPiece& text) {
1256   RE2 re(regexp);
1257   CHECK_EQ(re.error(), "");
1258   for (int i = 0; i < iters; i++) {
1259     StringPiece sp1, sp2;
1260     CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
1261   }
1262 }
1263 
SearchParse1CachedPCRE(int iters,const char * regexp,const StringPiece & text)1264 void SearchParse1CachedPCRE(int iters, const char* regexp,
1265                             const StringPiece& text) {
1266   PCRE re(regexp, PCRE::UTF8);
1267   CHECK_EQ(re.error(), "");
1268   for (int i = 0; i < iters; i++) {
1269     StringPiece sp1;
1270     CHECK(PCRE::PartialMatch(text, re, &sp1));
1271   }
1272 }
1273 
SearchParse1CachedRE2(int iters,const char * regexp,const StringPiece & text)1274 void SearchParse1CachedRE2(int iters, const char* regexp,
1275                            const StringPiece& text) {
1276   RE2 re(regexp);
1277   CHECK_EQ(re.error(), "");
1278   for (int i = 0; i < iters; i++) {
1279     StringPiece sp1;
1280     CHECK(RE2::PartialMatch(text, re, &sp1));
1281   }
1282 }
1283 
EmptyPartialMatchPCRE(int n)1284 void EmptyPartialMatchPCRE(int n) {
1285   PCRE re("");
1286   for (int i = 0; i < n; i++) {
1287     PCRE::PartialMatch("", re);
1288   }
1289 }
1290 
EmptyPartialMatchRE2(int n)1291 void EmptyPartialMatchRE2(int n) {
1292   RE2 re("");
1293   for (int i = 0; i < n; i++) {
1294     RE2::PartialMatch("", re);
1295   }
1296 }
1297 #ifdef USEPCRE
1298 BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1299 #endif
1300 BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
1301 
SimplePartialMatchPCRE(int n)1302 void SimplePartialMatchPCRE(int n) {
1303   PCRE re("abcdefg");
1304   for (int i = 0; i < n; i++) {
1305     PCRE::PartialMatch("abcdefg", re);
1306   }
1307 }
1308 
SimplePartialMatchRE2(int n)1309 void SimplePartialMatchRE2(int n) {
1310   RE2 re("abcdefg");
1311   for (int i = 0; i < n; i++) {
1312     RE2::PartialMatch("abcdefg", re);
1313   }
1314 }
1315 #ifdef USEPCRE
1316 BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
1317 #endif
1318 BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
1319 
1320 static string http_text =
1321   "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
1322   "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
1323 
HTTPPartialMatchPCRE(int n)1324 void HTTPPartialMatchPCRE(int n) {
1325   StringPiece a;
1326   PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1327   for (int i = 0; i < n; i++) {
1328     PCRE::PartialMatch(http_text, re, &a);
1329   }
1330 }
1331 
HTTPPartialMatchRE2(int n)1332 void HTTPPartialMatchRE2(int n) {
1333   StringPiece a;
1334   RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1335   for (int i = 0; i < n; i++) {
1336     RE2::PartialMatch(http_text, re, &a);
1337   }
1338 }
1339 
1340 #ifdef USEPCRE
1341 BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1342 #endif
1343 BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1344 
1345 static string http_smalltext =
1346   "GET /abc HTTP/1.1";
1347 
SmallHTTPPartialMatchPCRE(int n)1348 void SmallHTTPPartialMatchPCRE(int n) {
1349   StringPiece a;
1350   PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1351   for (int i = 0; i < n; i++) {
1352     PCRE::PartialMatch(http_text, re, &a);
1353   }
1354 }
1355 
SmallHTTPPartialMatchRE2(int n)1356 void SmallHTTPPartialMatchRE2(int n) {
1357   StringPiece a;
1358   RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1359   for (int i = 0; i < n; i++) {
1360     RE2::PartialMatch(http_text, re, &a);
1361   }
1362 }
1363 
1364 #ifdef USEPCRE
1365 BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1366 #endif
1367 BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1368 
DotMatchPCRE(int n)1369 void DotMatchPCRE(int n) {
1370   StringPiece a;
1371   PCRE re("(?-s)^(.+)");
1372   for (int i = 0; i < n; i++) {
1373     PCRE::PartialMatch(http_text, re, &a);
1374   }
1375 }
1376 
DotMatchRE2(int n)1377 void DotMatchRE2(int n) {
1378   StringPiece a;
1379   RE2 re("(?-s)^(.+)");
1380   for (int i = 0; i < n; i++) {
1381     RE2::PartialMatch(http_text, re, &a);
1382   }
1383 }
1384 
1385 #ifdef USEPCRE
1386 BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
1387 #endif
1388 BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
1389 
ASCIIMatchPCRE(int n)1390 void ASCIIMatchPCRE(int n) {
1391   StringPiece a;
1392   PCRE re("(?-s)^([ -~]+)");
1393   for (int i = 0; i < n; i++) {
1394     PCRE::PartialMatch(http_text, re, &a);
1395   }
1396 }
1397 
ASCIIMatchRE2(int n)1398 void ASCIIMatchRE2(int n) {
1399   StringPiece a;
1400   RE2 re("(?-s)^([ -~]+)");
1401   for (int i = 0; i < n; i++) {
1402     RE2::PartialMatch(http_text, re, &a);
1403   }
1404 }
1405 
1406 #ifdef USEPCRE
1407 BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
1408 #endif
1409 BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
1410 
FullMatchPCRE(int iter,int n,const char * regexp)1411 void FullMatchPCRE(int iter, int n, const char *regexp) {
1412   StopBenchmarkTiming();
1413   string s;
1414   MakeText(&s, n);
1415   s += "ABCDEFGHIJ";
1416   BenchmarkMemoryUsage();
1417   PCRE re(regexp);
1418   StartBenchmarkTiming();
1419   for (int i = 0; i < iter; i++)
1420     CHECK(PCRE::FullMatch(s, re));
1421   SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n);
1422 }
1423 
FullMatchRE2(int iter,int n,const char * regexp)1424 void FullMatchRE2(int iter, int n, const char *regexp) {
1425   StopBenchmarkTiming();
1426   string s;
1427   MakeText(&s, n);
1428   s += "ABCDEFGHIJ";
1429   BenchmarkMemoryUsage();
1430   RE2 re(regexp, RE2::Latin1);
1431   StartBenchmarkTiming();
1432   for (int i = 0; i < iter; i++)
1433     CHECK(RE2::FullMatch(s, re));
1434   SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n);
1435 }
1436 
FullMatch_DotStar_CachedPCRE(int i,int n)1437 void FullMatch_DotStar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*"); }
FullMatch_DotStar_CachedRE2(int i,int n)1438 void FullMatch_DotStar_CachedRE2(int i, int n)  { FullMatchRE2(i, n, "(?s).*"); }
1439 
FullMatch_DotStarDollar_CachedPCRE(int i,int n)1440 void FullMatch_DotStarDollar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*$"); }
FullMatch_DotStarDollar_CachedRE2(int i,int n)1441 void FullMatch_DotStarDollar_CachedRE2(int i, int n)  { FullMatchRE2(i, n, "(?s).*$"); }
1442 
FullMatch_DotStarCapture_CachedPCRE(int i,int n)1443 void FullMatch_DotStarCapture_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s)((.*)()()($))"); }
FullMatch_DotStarCapture_CachedRE2(int i,int n)1444 void FullMatch_DotStarCapture_CachedRE2(int i, int n)  { FullMatchRE2(i, n, "(?s)((.*)()()($))"); }
1445 
1446 #ifdef USEPCRE
1447 BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
1448 #endif
1449 BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2,  8, 2<<20);
1450 
1451 #ifdef USEPCRE
1452 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
1453 #endif
1454 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2,  8, 2<<20);
1455 
1456 #ifdef USEPCRE
1457 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
1458 #endif
1459 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2,  8, 2<<20);
1460 
1461 }  // namespace re2
1462