1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Benchmarks for regular expression implementations.
6
7 #include "util/test.h"
8 #include "re2/prog.h"
9 #include "re2/re2.h"
10 #include "re2/regexp.h"
11 #include "util/pcre.h"
12 #include "util/benchmark.h"
13
14 namespace re2 {
15 void Test();
16 void MemoryUsage();
17 } // namespace re2
18
19 typedef testing::MallocCounter MallocCounter;
20
21 namespace re2 {
22
Test()23 void Test() {
24 Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
25 CHECK(re);
26 Prog* prog = re->CompileToProg(0);
27 CHECK(prog);
28 CHECK(prog->IsOnePass());
29 const char* text = "650-253-0001";
30 StringPiece sp[4];
31 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
32 CHECK_EQ(sp[0], "650-253-0001");
33 CHECK_EQ(sp[1], "650");
34 CHECK_EQ(sp[2], "253");
35 CHECK_EQ(sp[3], "0001");
36 delete prog;
37 re->Decref();
38 LOG(INFO) << "test passed\n";
39 }
40
MemoryUsage()41 void MemoryUsage() {
42 const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
43 const char* text = "650-253-0001";
44 {
45 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
46 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
47 CHECK(re);
48 // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
49 // because LOG(INFO) might do a big allocation before they get evaluated.
50 fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
51 mc.Reset();
52
53 Prog* prog = re->CompileToProg(0);
54 CHECK(prog);
55 CHECK(prog->IsOnePass());
56 fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
57 mc.Reset();
58
59 StringPiece sp[4];
60 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
61 fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
62 delete prog;
63 re->Decref();
64 }
65
66 {
67 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
68
69 PCRE re(regexp, PCRE::UTF8);
70 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
71 PCRE::FullMatch(text, re);
72 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
73 }
74
75 {
76 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
77
78 PCRE* re = new PCRE(regexp, PCRE::UTF8);
79 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
80 PCRE::FullMatch(text, *re);
81 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
82 delete re;
83 }
84
85 {
86 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
87
88 RE2 re(regexp);
89 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
90 RE2::FullMatch(text, re);
91 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
92 }
93
94 fprintf(stderr, "sizeof: PCRE=%d RE2=%d Prog=%d Inst=%d\n",
95 static_cast<int>(sizeof(PCRE)),
96 static_cast<int>(sizeof(RE2)),
97 static_cast<int>(sizeof(Prog)),
98 static_cast<int>(sizeof(Prog::Inst)));
99 }
100
101 // Regular expression implementation wrappers.
102 // Defined at bottom of file, but they are repetitive
103 // and not interesting.
104
105 typedef void SearchImpl(int iters, const char* regexp, const StringPiece& text,
106 Prog::Anchor anchor, bool expect_match);
107
108 SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState,
109 SearchPCRE, SearchRE2,
110 SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass, SearchCachedBitState,
111 SearchCachedPCRE, SearchCachedRE2;
112
113 typedef void ParseImpl(int iters, const char* regexp, const StringPiece& text);
114
115 ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState,
116 Parse1PCRE, Parse1RE2,
117 Parse1Backtrack,
118 Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
119 Parse1CachedPCRE, Parse1CachedRE2,
120 Parse1CachedBacktrack;
121
122 ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState,
123 Parse3PCRE, Parse3RE2,
124 Parse3Backtrack,
125 Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
126 Parse3CachedPCRE, Parse3CachedRE2,
127 Parse3CachedBacktrack;
128
129 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
130
131 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
132
133 // Benchmark: failed search for regexp in random text.
134
135 // Generate random text that won't contain the search string,
136 // to test worst-case search behavior.
MakeText(string * text,int nbytes)137 void MakeText(string* text, int nbytes) {
138 text->resize(nbytes);
139 srand(0);
140 for (int i = 0; i < nbytes; i++) {
141 if (!rand()%30)
142 (*text)[i] = '\n';
143 else
144 (*text)[i] = rand()%(0x7E + 1 - 0x20)+0x20;
145 }
146 }
147
148 // Makes text of size nbytes, then calls run to search
149 // the text for regexp iters times.
Search(int iters,int nbytes,const char * regexp,SearchImpl * search)150 void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) {
151 StopBenchmarkTiming();
152 string s;
153 MakeText(&s, nbytes);
154 BenchmarkMemoryUsage();
155 StartBenchmarkTiming();
156 search(iters, regexp, s, Prog::kUnanchored, false);
157 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
158 }
159
160 // These two are easy because they start with an A,
161 // giving the search loop something to memchr for.
162 #define EASY0 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
163 #define EASY1 "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
164
165 // This is a little harder, since it starts with a character class
166 // and thus can't be memchr'ed. Could look for ABC and work backward,
167 // but no one does that.
168 #define MEDIUM "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
169
170 // This is a fair amount harder, because of the leading [ -~]*.
171 // A bad backtracking implementation will take O(text^2) time to
172 // figure out there's no match.
173 #define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
174
175 // This stresses engines that are trying to track parentheses.
176 #define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
177 "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
178
Search_Easy0_CachedDFA(int i,int n)179 void Search_Easy0_CachedDFA(int i, int n) { Search(i, n, EASY0, SearchCachedDFA); }
Search_Easy0_CachedNFA(int i,int n)180 void Search_Easy0_CachedNFA(int i, int n) { Search(i, n, EASY0, SearchCachedNFA); }
Search_Easy0_CachedPCRE(int i,int n)181 void Search_Easy0_CachedPCRE(int i, int n) { Search(i, n, EASY0, SearchCachedPCRE); }
Search_Easy0_CachedRE2(int i,int n)182 void Search_Easy0_CachedRE2(int i, int n) { Search(i, n, EASY0, SearchCachedRE2); }
183
184 BENCHMARK_RANGE(Search_Easy0_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
185 BENCHMARK_RANGE(Search_Easy0_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
186 #ifdef USEPCRE
187 BENCHMARK_RANGE(Search_Easy0_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
188 #endif
189 BENCHMARK_RANGE(Search_Easy0_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
190
Search_Easy1_CachedDFA(int i,int n)191 void Search_Easy1_CachedDFA(int i, int n) { Search(i, n, EASY1, SearchCachedDFA); }
Search_Easy1_CachedNFA(int i,int n)192 void Search_Easy1_CachedNFA(int i, int n) { Search(i, n, EASY1, SearchCachedNFA); }
Search_Easy1_CachedPCRE(int i,int n)193 void Search_Easy1_CachedPCRE(int i, int n) { Search(i, n, EASY1, SearchCachedPCRE); }
Search_Easy1_CachedRE2(int i,int n)194 void Search_Easy1_CachedRE2(int i, int n) { Search(i, n, EASY1, SearchCachedRE2); }
195
196 BENCHMARK_RANGE(Search_Easy1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
197 BENCHMARK_RANGE(Search_Easy1_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
198 #ifdef USEPCRE
199 BENCHMARK_RANGE(Search_Easy1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
200 #endif
201 BENCHMARK_RANGE(Search_Easy1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
202
Search_Medium_CachedDFA(int i,int n)203 void Search_Medium_CachedDFA(int i, int n) { Search(i, n, MEDIUM, SearchCachedDFA); }
Search_Medium_CachedNFA(int i,int n)204 void Search_Medium_CachedNFA(int i, int n) { Search(i, n, MEDIUM, SearchCachedNFA); }
Search_Medium_CachedPCRE(int i,int n)205 void Search_Medium_CachedPCRE(int i, int n) { Search(i, n, MEDIUM, SearchCachedPCRE); }
Search_Medium_CachedRE2(int i,int n)206 void Search_Medium_CachedRE2(int i, int n) { Search(i, n, MEDIUM, SearchCachedRE2); }
207
208 BENCHMARK_RANGE(Search_Medium_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
209 BENCHMARK_RANGE(Search_Medium_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
210 #ifdef USEPCRE
211 BENCHMARK_RANGE(Search_Medium_CachedPCRE, 8, 256<<10)->ThreadRange(1, NumCPUs());
212 #endif
213 BENCHMARK_RANGE(Search_Medium_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
214
Search_Hard_CachedDFA(int i,int n)215 void Search_Hard_CachedDFA(int i, int n) { Search(i, n, HARD, SearchCachedDFA); }
Search_Hard_CachedNFA(int i,int n)216 void Search_Hard_CachedNFA(int i, int n) { Search(i, n, HARD, SearchCachedNFA); }
Search_Hard_CachedPCRE(int i,int n)217 void Search_Hard_CachedPCRE(int i, int n) { Search(i, n, HARD, SearchCachedPCRE); }
Search_Hard_CachedRE2(int i,int n)218 void Search_Hard_CachedRE2(int i, int n) { Search(i, n, HARD, SearchCachedRE2); }
219
220 BENCHMARK_RANGE(Search_Hard_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
221 BENCHMARK_RANGE(Search_Hard_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
222 #ifdef USEPCRE
223 BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
224 #endif
225 BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
226
Search_Parens_CachedDFA(int i,int n)227 void Search_Parens_CachedDFA(int i, int n) { Search(i, n, PARENS, SearchCachedDFA); }
Search_Parens_CachedNFA(int i,int n)228 void Search_Parens_CachedNFA(int i, int n) { Search(i, n, PARENS, SearchCachedNFA); }
Search_Parens_CachedPCRE(int i,int n)229 void Search_Parens_CachedPCRE(int i, int n) { Search(i, n, PARENS, SearchCachedPCRE); }
Search_Parens_CachedRE2(int i,int n)230 void Search_Parens_CachedRE2(int i, int n) { Search(i, n, PARENS, SearchCachedRE2); }
231
232 BENCHMARK_RANGE(Search_Parens_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
233 BENCHMARK_RANGE(Search_Parens_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
234 #ifdef USEPCRE
235 BENCHMARK_RANGE(Search_Parens_CachedPCRE, 8, 8)->ThreadRange(1, NumCPUs());
236 #endif
237 BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
238
SearchBigFixed(int iters,int nbytes,SearchImpl * search)239 void SearchBigFixed(int iters, int nbytes, SearchImpl* search) {
240 StopBenchmarkTiming();
241 string s;
242 s.append(nbytes/2, 'x');
243 string regexp = "^" + s + ".*$";
244 string t;
245 MakeText(&t, nbytes/2);
246 s += t;
247 BenchmarkMemoryUsage();
248 StartBenchmarkTiming();
249 search(iters, regexp.c_str(), s, Prog::kUnanchored, true);
250 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
251 }
252
Search_BigFixed_CachedDFA(int i,int n)253 void Search_BigFixed_CachedDFA(int i, int n) { SearchBigFixed(i, n, SearchCachedDFA); }
Search_BigFixed_CachedNFA(int i,int n)254 void Search_BigFixed_CachedNFA(int i, int n) { SearchBigFixed(i, n, SearchCachedNFA); }
Search_BigFixed_CachedPCRE(int i,int n)255 void Search_BigFixed_CachedPCRE(int i, int n) { SearchBigFixed(i, n, SearchCachedPCRE); }
Search_BigFixed_CachedRE2(int i,int n)256 void Search_BigFixed_CachedRE2(int i, int n) { SearchBigFixed(i, n, SearchCachedRE2); }
257
258 BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs());
259 BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPUs());
260 #ifdef USEPCRE
261 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPUs());
262 #endif
263 BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs());
264
265 // Benchmark: FindAndConsume
FindAndConsume(int iters,int nbytes)266 void FindAndConsume(int iters, int nbytes) {
267 StopBenchmarkTiming();
268 string s;
269 MakeText(&s, nbytes);
270 s.append("Hello World");
271 StartBenchmarkTiming();
272 RE2 re("((Hello World))");
273 for (int i = 0; i < iters; i++) {
274 StringPiece t = s;
275 StringPiece u;
276 CHECK(RE2::FindAndConsume(&t, re, &u));
277 CHECK_EQ(u, "Hello World");
278 }
279 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
280 }
281
282 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
283
284 // Benchmark: successful anchored search.
285
SearchSuccess(int iters,int nbytes,const char * regexp,SearchImpl * search)286 void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search) {
287 string s;
288 MakeText(&s, nbytes);
289 BenchmarkMemoryUsage();
290 search(iters, regexp, s, Prog::kAnchored, true);
291 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
292 }
293
294 // Unambiguous search (RE2 can use OnePass).
295
Search_Success_DFA(int i,int n)296 void Search_Success_DFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchDFA); }
Search_Success_OnePass(int i,int n)297 void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); }
Search_Success_PCRE(int i,int n)298 void Search_Success_PCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchPCRE); }
Search_Success_RE2(int i,int n)299 void Search_Success_RE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchRE2); }
300
301 BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
302 #ifdef USEPCRE
303 BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
304 #endif
305 BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
306 BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
307
Search_Success_CachedDFA(int i,int n)308 void Search_Success_CachedDFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedDFA); }
Search_Success_CachedOnePass(int i,int n)309 void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); }
Search_Success_CachedPCRE(int i,int n)310 void Search_Success_CachedPCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedPCRE); }
Search_Success_CachedRE2(int i,int n)311 void Search_Success_CachedRE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedRE2); }
312
313 BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
314 #ifdef USEPCRE
315 BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
316 #endif
317 BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
318 BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
319
320 // Ambiguous search (RE2 cannot use OnePass).
321
Search_Success1_DFA(int i,int n)322 void Search_Success1_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchDFA); }
Search_Success1_PCRE(int i,int n)323 void Search_Success1_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchPCRE); }
Search_Success1_RE2(int i,int n)324 void Search_Success1_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchRE2); }
Search_Success1_BitState(int i,int n)325 void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*.$", SearchBitState); }
326
327 BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
328 #ifdef USEPCRE
329 BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
330 #endif
331 BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
332 BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
333
Search_Success1_Cached_DFA(int i,int n)334 void Search_Success1_Cached_DFA(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedDFA); }
Search_Success1_Cached_PCRE(int i,int n)335 void Search_Success1_Cached_PCRE(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedPCRE); }
Search_Success1_Cached_RE2(int i,int n)336 void Search_Success1_Cached_RE2(int i, int n) { SearchSuccess(i, n, ".*.$", SearchCachedRE2); }
337
338 BENCHMARK_RANGE(Search_Success1_Cached_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
339 #ifdef USEPCRE
340 BENCHMARK_RANGE(Search_Success1_Cached_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
341 #endif
342 BENCHMARK_RANGE(Search_Success1_Cached_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
343
344 // Benchmark: use regexp to find phone number.
345
SearchDigits(int iters,SearchImpl * search)346 void SearchDigits(int iters, SearchImpl* search) {
347 const char *text = "650-253-0001";
348 int len = strlen(text);
349 BenchmarkMemoryUsage();
350 search(iters, "([0-9]+)-([0-9]+)-([0-9]+)",
351 StringPiece(text, len), Prog::kAnchored, true);
352 SetBenchmarkItemsProcessed(iters);
353 }
354
Search_Digits_DFA(int i)355 void Search_Digits_DFA(int i) { SearchDigits(i, SearchDFA); }
Search_Digits_NFA(int i)356 void Search_Digits_NFA(int i) { SearchDigits(i, SearchNFA); }
Search_Digits_OnePass(int i)357 void Search_Digits_OnePass(int i) { SearchDigits(i, SearchOnePass); }
Search_Digits_PCRE(int i)358 void Search_Digits_PCRE(int i) { SearchDigits(i, SearchPCRE); }
Search_Digits_RE2(int i)359 void Search_Digits_RE2(int i) { SearchDigits(i, SearchRE2); }
Search_Digits_BitState(int i)360 void Search_Digits_BitState(int i) { SearchDigits(i, SearchBitState); }
361
362 BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
363 BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
364 BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
365 #ifdef USEPCRE
366 BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
367 #endif
368 BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
369 BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
370
371 // Benchmark: use regexp to parse digit fields in phone number.
372
Parse3Digits(int iters,void (* parse3)(int,const char *,const StringPiece &))373 void Parse3Digits(int iters,
374 void (*parse3)(int, const char*, const StringPiece&)) {
375 BenchmarkMemoryUsage();
376 parse3(iters, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
377 SetBenchmarkItemsProcessed(iters);
378 }
379
Parse_Digits_NFA(int i)380 void Parse_Digits_NFA(int i) { Parse3Digits(i, Parse3NFA); }
Parse_Digits_OnePass(int i)381 void Parse_Digits_OnePass(int i) { Parse3Digits(i, Parse3OnePass); }
Parse_Digits_PCRE(int i)382 void Parse_Digits_PCRE(int i) { Parse3Digits(i, Parse3PCRE); }
Parse_Digits_RE2(int i)383 void Parse_Digits_RE2(int i) { Parse3Digits(i, Parse3RE2); }
Parse_Digits_Backtrack(int i)384 void Parse_Digits_Backtrack(int i) { Parse3Digits(i, Parse3Backtrack); }
Parse_Digits_BitState(int i)385 void Parse_Digits_BitState(int i) { Parse3Digits(i, Parse3BitState); }
386
387 BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
388 BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
389 #ifdef USEPCRE
390 BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
391 #endif
392 BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
393 BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
394 BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
395
Parse_CachedDigits_NFA(int i)396 void Parse_CachedDigits_NFA(int i) { Parse3Digits(i, Parse3CachedNFA); }
Parse_CachedDigits_OnePass(int i)397 void Parse_CachedDigits_OnePass(int i) { Parse3Digits(i, Parse3CachedOnePass); }
Parse_CachedDigits_PCRE(int i)398 void Parse_CachedDigits_PCRE(int i) { Parse3Digits(i, Parse3CachedPCRE); }
Parse_CachedDigits_RE2(int i)399 void Parse_CachedDigits_RE2(int i) { Parse3Digits(i, Parse3CachedRE2); }
Parse_CachedDigits_Backtrack(int i)400 void Parse_CachedDigits_Backtrack(int i) { Parse3Digits(i, Parse3CachedBacktrack); }
Parse_CachedDigits_BitState(int i)401 void Parse_CachedDigits_BitState(int i) { Parse3Digits(i, Parse3CachedBitState); }
402
403 BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
404 BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
405 #ifdef USEPCRE
406 BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
407 #endif
408 BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
409 BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
410 BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
411
Parse3DigitDs(int iters,void (* parse3)(int,const char *,const StringPiece &))412 void Parse3DigitDs(int iters,
413 void (*parse3)(int, const char*, const StringPiece&)) {
414 BenchmarkMemoryUsage();
415 parse3(iters, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
416 SetBenchmarkItemsProcessed(iters);
417 }
418
Parse_DigitDs_NFA(int i)419 void Parse_DigitDs_NFA(int i) { Parse3DigitDs(i, Parse3NFA); }
Parse_DigitDs_OnePass(int i)420 void Parse_DigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3OnePass); }
Parse_DigitDs_PCRE(int i)421 void Parse_DigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3PCRE); }
Parse_DigitDs_RE2(int i)422 void Parse_DigitDs_RE2(int i) { Parse3DigitDs(i, Parse3RE2); }
Parse_DigitDs_Backtrack(int i)423 void Parse_DigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBacktrack); }
Parse_DigitDs_BitState(int i)424 void Parse_DigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitState); }
425
426 BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
427 BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
428 #ifdef USEPCRE
429 BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
430 #endif
431 BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
432 BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
433 BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
434
Parse_CachedDigitDs_NFA(int i)435 void Parse_CachedDigitDs_NFA(int i) { Parse3DigitDs(i, Parse3CachedNFA); }
Parse_CachedDigitDs_OnePass(int i)436 void Parse_CachedDigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3CachedOnePass); }
Parse_CachedDigitDs_PCRE(int i)437 void Parse_CachedDigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3CachedPCRE); }
Parse_CachedDigitDs_RE2(int i)438 void Parse_CachedDigitDs_RE2(int i) { Parse3DigitDs(i, Parse3CachedRE2); }
Parse_CachedDigitDs_Backtrack(int i)439 void Parse_CachedDigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBacktrack); }
Parse_CachedDigitDs_BitState(int i)440 void Parse_CachedDigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitState); }
441
442 BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
443 BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
444 #ifdef USEPCRE
445 BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
446 #endif
447 BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
448 BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
449 BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
450
451 // Benchmark: splitting off leading number field.
452
Parse1Split(int iters,void (* parse1)(int,const char *,const StringPiece &))453 void Parse1Split(int iters,
454 void (*parse1)(int, const char*, const StringPiece&)) {
455 BenchmarkMemoryUsage();
456 parse1(iters, "[0-9]+-(.*)", "650-253-0001");
457 SetBenchmarkItemsProcessed(iters);
458 }
459
Parse_Split_NFA(int i)460 void Parse_Split_NFA(int i) { Parse1Split(i, Parse1NFA); }
Parse_Split_OnePass(int i)461 void Parse_Split_OnePass(int i) { Parse1Split(i, Parse1OnePass); }
Parse_Split_PCRE(int i)462 void Parse_Split_PCRE(int i) { Parse1Split(i, Parse1PCRE); }
Parse_Split_RE2(int i)463 void Parse_Split_RE2(int i) { Parse1Split(i, Parse1RE2); }
Parse_Split_BitState(int i)464 void Parse_Split_BitState(int i) { Parse1Split(i, Parse1BitState); }
465
466 BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
467 BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
468 #ifdef USEPCRE
469 BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
470 #endif
471 BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
472 BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
473
Parse_CachedSplit_NFA(int i)474 void Parse_CachedSplit_NFA(int i) { Parse1Split(i, Parse1CachedNFA); }
Parse_CachedSplit_OnePass(int i)475 void Parse_CachedSplit_OnePass(int i) { Parse1Split(i, Parse1CachedOnePass); }
Parse_CachedSplit_PCRE(int i)476 void Parse_CachedSplit_PCRE(int i) { Parse1Split(i, Parse1CachedPCRE); }
Parse_CachedSplit_RE2(int i)477 void Parse_CachedSplit_RE2(int i) { Parse1Split(i, Parse1CachedRE2); }
Parse_CachedSplit_BitState(int i)478 void Parse_CachedSplit_BitState(int i) { Parse1Split(i, Parse1CachedBitState); }
479
480 BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
481 BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
482 #ifdef USEPCRE
483 BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
484 #endif
485 BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
486 BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
487
488 // Benchmark: splitting off leading number field but harder (ambiguous regexp).
489
Parse1SplitHard(int iters,void (* run)(int,const char *,const StringPiece &))490 void Parse1SplitHard(int iters,
491 void (*run)(int, const char*, const StringPiece&)) {
492 BenchmarkMemoryUsage();
493 run(iters, "[0-9]+.(.*)", "650-253-0001");
494 SetBenchmarkItemsProcessed(iters);
495 }
496
Parse_SplitHard_NFA(int i)497 void Parse_SplitHard_NFA(int i) { Parse1SplitHard(i, Parse1NFA); }
Parse_SplitHard_PCRE(int i)498 void Parse_SplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1PCRE); }
Parse_SplitHard_RE2(int i)499 void Parse_SplitHard_RE2(int i) { Parse1SplitHard(i, Parse1RE2); }
Parse_SplitHard_BitState(int i)500 void Parse_SplitHard_BitState(int i) { Parse1SplitHard(i, Parse1BitState); }
501
502 #ifdef USEPCRE
503 BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
504 #endif
505 BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
506 BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
507 BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
508
Parse_CachedSplitHard_NFA(int i)509 void Parse_CachedSplitHard_NFA(int i) { Parse1SplitHard(i, Parse1CachedNFA); }
Parse_CachedSplitHard_PCRE(int i)510 void Parse_CachedSplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1CachedPCRE); }
Parse_CachedSplitHard_RE2(int i)511 void Parse_CachedSplitHard_RE2(int i) { Parse1SplitHard(i, Parse1CachedRE2); }
Parse_CachedSplitHard_BitState(int i)512 void Parse_CachedSplitHard_BitState(int i) { Parse1SplitHard(i, Parse1CachedBitState); }
Parse_CachedSplitHard_Backtrack(int i)513 void Parse_CachedSplitHard_Backtrack(int i) { Parse1SplitHard(i, Parse1CachedBacktrack); }
514
515 #ifdef USEPCRE
516 BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
517 #endif
518 BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
519 BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
520 BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
521 BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
522
523 // Benchmark: Parse1SplitHard, big text, small match.
524
Parse1SplitBig1(int iters,void (* run)(int,const char *,const StringPiece &))525 void Parse1SplitBig1(int iters,
526 void (*run)(int, const char*, const StringPiece&)) {
527 string s;
528 s.append(100000, 'x');
529 s.append("650-253-0001");
530 BenchmarkMemoryUsage();
531 run(iters, "[0-9]+.(.*)", s);
532 SetBenchmarkItemsProcessed(iters);
533 }
534
Parse_CachedSplitBig1_PCRE(int i)535 void Parse_CachedSplitBig1_PCRE(int i) { Parse1SplitBig1(i, SearchParse1CachedPCRE); }
Parse_CachedSplitBig1_RE2(int i)536 void Parse_CachedSplitBig1_RE2(int i) { Parse1SplitBig1(i, SearchParse1CachedRE2); }
537
538 #ifdef USEPCRE
539 BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
540 #endif
541 BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
542
543 // Benchmark: Parse1SplitHard, big text, big match.
544
Parse1SplitBig2(int iters,void (* run)(int,const char *,const StringPiece &))545 void Parse1SplitBig2(int iters,
546 void (*run)(int, const char*, const StringPiece&)) {
547 string s;
548 s.append("650-253-");
549 s.append(100000, '0');
550 BenchmarkMemoryUsage();
551 run(iters, "[0-9]+.(.*)", s);
552 SetBenchmarkItemsProcessed(iters);
553 }
554
Parse_CachedSplitBig2_PCRE(int i)555 void Parse_CachedSplitBig2_PCRE(int i) { Parse1SplitBig2(i, SearchParse1CachedPCRE); }
Parse_CachedSplitBig2_RE2(int i)556 void Parse_CachedSplitBig2_RE2(int i) { Parse1SplitBig2(i, SearchParse1CachedRE2); }
557
558 #ifdef USEPCRE
559 BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
560 #endif
561 BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
562
563 // Benchmark: measure time required to parse (but not execute)
564 // a simple regular expression.
565
ParseRegexp(int iters,const string & regexp)566 void ParseRegexp(int iters, const string& regexp) {
567 for (int i = 0; i < iters; i++) {
568 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
569 CHECK(re);
570 re->Decref();
571 }
572 }
573
SimplifyRegexp(int iters,const string & regexp)574 void SimplifyRegexp(int iters, const string& regexp) {
575 for (int i = 0; i < iters; i++) {
576 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
577 CHECK(re);
578 Regexp* sre = re->Simplify();
579 CHECK(sre);
580 sre->Decref();
581 re->Decref();
582 }
583 }
584
NullWalkRegexp(int iters,const string & regexp)585 void NullWalkRegexp(int iters, const string& regexp) {
586 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
587 CHECK(re);
588 for (int i = 0; i < iters; i++) {
589 re->NullWalk();
590 }
591 re->Decref();
592 }
593
SimplifyCompileRegexp(int iters,const string & regexp)594 void SimplifyCompileRegexp(int iters, const string& regexp) {
595 for (int i = 0; i < iters; i++) {
596 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
597 CHECK(re);
598 Regexp* sre = re->Simplify();
599 CHECK(sre);
600 Prog* prog = sre->CompileToProg(0);
601 CHECK(prog);
602 delete prog;
603 sre->Decref();
604 re->Decref();
605 }
606 }
607
CompileRegexp(int iters,const string & regexp)608 void CompileRegexp(int iters, const string& regexp) {
609 for (int i = 0; i < iters; i++) {
610 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
611 CHECK(re);
612 Prog* prog = re->CompileToProg(0);
613 CHECK(prog);
614 delete prog;
615 re->Decref();
616 }
617 }
618
CompileToProg(int iters,const string & regexp)619 void CompileToProg(int iters, const string& regexp) {
620 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
621 CHECK(re);
622 for (int i = 0; i < iters; i++) {
623 Prog* prog = re->CompileToProg(0);
624 CHECK(prog);
625 delete prog;
626 }
627 re->Decref();
628 }
629
CompileByteMap(int iters,const string & regexp)630 void CompileByteMap(int iters, const string& regexp) {
631 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
632 CHECK(re);
633 Prog* prog = re->CompileToProg(0);
634 CHECK(prog);
635 for (int i = 0; i < iters; i++) {
636 prog->ComputeByteMap();
637 }
638 delete prog;
639 re->Decref();
640 }
641
CompilePCRE(int iters,const string & regexp)642 void CompilePCRE(int iters, const string& regexp) {
643 for (int i = 0; i < iters; i++) {
644 PCRE re(regexp, PCRE::UTF8);
645 CHECK_EQ(re.error(), "");
646 }
647 }
648
CompileRE2(int iters,const string & regexp)649 void CompileRE2(int iters, const string& regexp) {
650 for (int i = 0; i < iters; i++) {
651 RE2 re(regexp);
652 CHECK_EQ(re.error(), "");
653 }
654 }
655
RunBuild(int iters,const string & regexp,void (* run)(int,const string &))656 void RunBuild(int iters, const string& regexp, void (*run)(int, const string&)) {
657 run(iters, regexp);
658 SetBenchmarkItemsProcessed(iters);
659 }
660
661 } // namespace re2
662
663 DEFINE_string(compile_regexp, "(.*)-(\\d+)-of-(\\d+)", "regexp for compile benchmarks");
664
665 namespace re2 {
666
BM_PCRE_Compile(int i)667 void BM_PCRE_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompilePCRE); }
BM_Regexp_Parse(int i)668 void BM_Regexp_Parse(int i) { RunBuild(i, FLAGS_compile_regexp, ParseRegexp); }
BM_Regexp_Simplify(int i)669 void BM_Regexp_Simplify(int i) { RunBuild(i, FLAGS_compile_regexp, SimplifyRegexp); }
BM_CompileToProg(int i)670 void BM_CompileToProg(int i) { RunBuild(i, FLAGS_compile_regexp, CompileToProg); }
BM_CompileByteMap(int i)671 void BM_CompileByteMap(int i) { RunBuild(i, FLAGS_compile_regexp, CompileByteMap); }
BM_Regexp_Compile(int i)672 void BM_Regexp_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRegexp); }
BM_Regexp_SimplifyCompile(int i)673 void BM_Regexp_SimplifyCompile(int i) { RunBuild(i, FLAGS_compile_regexp, SimplifyCompileRegexp); }
BM_Regexp_NullWalk(int i)674 void BM_Regexp_NullWalk(int i) { RunBuild(i, FLAGS_compile_regexp, NullWalkRegexp); }
BM_RE2_Compile(int i)675 void BM_RE2_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRE2); }
676
677 #ifdef USEPCRE
678 BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
679 #endif
680 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
681 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
682 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
683 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
684 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
685 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
686 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
687 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
688
689
690 // Makes text of size nbytes, then calls run to search
691 // the text for regexp iters times.
SearchPhone(int iters,int nbytes,ParseImpl * search)692 void SearchPhone(int iters, int nbytes, ParseImpl* search) {
693 StopBenchmarkTiming();
694 string s;
695 MakeText(&s, nbytes);
696 s.append("(650) 253-0001");
697 BenchmarkMemoryUsage();
698 StartBenchmarkTiming();
699 search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
700 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*nbytes);
701 }
702
SearchPhone_CachedPCRE(int i,int n)703 void SearchPhone_CachedPCRE(int i, int n) {
704 SearchPhone(i, n, SearchParse2CachedPCRE);
705 }
SearchPhone_CachedRE2(int i,int n)706 void SearchPhone_CachedRE2(int i, int n) {
707 SearchPhone(i, n, SearchParse2CachedRE2);
708 }
709
710 #ifdef USEPCRE
711 BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
712 #endif
713 BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
714
715 /*
716 TODO(rsc): Make this work again.
717
718 // Generates and returns a string over binary alphabet {0,1} that contains
719 // all possible binary sequences of length n as subsequences. The obvious
720 // brute force method would generate a string of length n * 2^n, but this
721 // generates a string of length n + 2^n - 1 called a De Bruijn cycle.
722 // See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
723 static string DeBruijnString(int n) {
724 CHECK_LT(n, 8*sizeof(int));
725 CHECK_GT(n, 0);
726
727 vector<bool> did(1<<n);
728 for (int i = 0; i < 1<<n; i++)
729 did[i] = false;
730
731 string s;
732 for (int i = 0; i < n-1; i++)
733 s.append("0");
734 int bits = 0;
735 int mask = (1<<n) - 1;
736 for (int i = 0; i < (1<<n); i++) {
737 bits <<= 1;
738 bits &= mask;
739 if (!did[bits|1]) {
740 bits |= 1;
741 s.append("1");
742 } else {
743 s.append("0");
744 }
745 CHECK(!did[bits]);
746 did[bits] = true;
747 }
748 return s;
749 }
750
751 void CacheFill(int iters, int n, SearchImpl *srch) {
752 string s = DeBruijnString(n+1);
753 string t;
754 for (int i = n+1; i < 20; i++) {
755 t = s + s;
756 swap(s, t);
757 }
758 srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
759 Prog::kUnanchored, true);
760 SetBenchmarkBytesProcessed(static_cast<int64>(iters)*s.size());
761 }
762
763 void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
764 void CacheFillRE2(int i, int n) { CacheFill(i, n, SearchCachedRE2); }
765 void CacheFillNFA(int i, int n) { CacheFill(i, n, SearchCachedNFA); }
766 void CacheFillDFA(int i, int n) { CacheFill(i, n, SearchCachedDFA); }
767
768 // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
769 // for the static BenchmarkRegisterer, which makes it unusable inside
770 // a macro like DO24 below. MY_BENCHMARK_WITH_ARG uses the argument a
771 // to make the identifiers distinct (only possible when 'a' is a simple
772 // expression like 2, not like 1+1).
773 #define MY_BENCHMARK_WITH_ARG(n, a) \
774 bool __benchmark_ ## n ## a = \
775 (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
776
777 #define DO24(A, B) \
778 A(B, 1); A(B, 2); A(B, 3); A(B, 4); A(B, 5); A(B, 6); \
779 A(B, 7); A(B, 8); A(B, 9); A(B, 10); A(B, 11); A(B, 12); \
780 A(B, 13); A(B, 14); A(B, 15); A(B, 16); A(B, 17); A(B, 18); \
781 A(B, 19); A(B, 20); A(B, 21); A(B, 22); A(B, 23); A(B, 24);
782
783 DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
784 DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
785 DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
786 DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
787
788 #undef DO24
789 #undef MY_BENCHMARK_WITH_ARG
790 */
791
792 ////////////////////////////////////////////////////////////////////////
793 //
794 // Implementation routines. Sad that there are so many,
795 // but all the interfaces are slightly different.
796
797 // Runs implementation to search for regexp in text, iters times.
798 // Expect_match says whether the regexp should be found.
799 // Anchored says whether to run an anchored search.
800
SearchDFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)801 void SearchDFA(int iters, const char* regexp, const StringPiece& text,
802 Prog::Anchor anchor, bool expect_match) {
803 for (int i = 0; i < iters; i++) {
804 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
805 CHECK(re);
806 Prog* prog = re->CompileToProg(0);
807 CHECK(prog);
808 bool failed = false;
809 CHECK_EQ(prog->SearchDFA(text, NULL, anchor, Prog::kFirstMatch,
810 NULL, &failed, NULL),
811 expect_match);
812 CHECK(!failed);
813 delete prog;
814 re->Decref();
815 }
816 }
817
SearchNFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)818 void SearchNFA(int iters, const char* regexp, const StringPiece& text,
819 Prog::Anchor anchor, bool expect_match) {
820 for (int i = 0; i < iters; i++) {
821 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
822 CHECK(re);
823 Prog* prog = re->CompileToProg(0);
824 CHECK(prog);
825 CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0),
826 expect_match);
827 delete prog;
828 re->Decref();
829 }
830 }
831
SearchOnePass(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)832 void SearchOnePass(int iters, const char* regexp, const StringPiece& text,
833 Prog::Anchor anchor, bool expect_match) {
834 for (int i = 0; i < iters; i++) {
835 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
836 CHECK(re);
837 Prog* prog = re->CompileToProg(0);
838 CHECK(prog);
839 CHECK(prog->IsOnePass());
840 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
841 expect_match);
842 delete prog;
843 re->Decref();
844 }
845 }
846
SearchBitState(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)847 void SearchBitState(int iters, const char* regexp, const StringPiece& text,
848 Prog::Anchor anchor, bool expect_match) {
849 for (int i = 0; i < iters; i++) {
850 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
851 CHECK(re);
852 Prog* prog = re->CompileToProg(0);
853 CHECK(prog);
854 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
855 expect_match);
856 delete prog;
857 re->Decref();
858 }
859 }
860
SearchPCRE(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)861 void SearchPCRE(int iters, const char* regexp, const StringPiece& text,
862 Prog::Anchor anchor, bool expect_match) {
863 for (int i = 0; i < iters; i++) {
864 PCRE re(regexp, PCRE::UTF8);
865 CHECK_EQ(re.error(), "");
866 if (anchor == Prog::kAnchored)
867 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
868 else
869 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
870 }
871 }
872
SearchRE2(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)873 void SearchRE2(int iters, const char* regexp, const StringPiece& text,
874 Prog::Anchor anchor, bool expect_match) {
875 for (int i = 0; i < iters; i++) {
876 RE2 re(regexp);
877 CHECK_EQ(re.error(), "");
878 if (anchor == Prog::kAnchored)
879 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
880 else
881 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
882 }
883 }
884
885 // SearchCachedXXX is like SearchXXX but only does the
886 // regexp parsing and compiling once. This lets us measure
887 // search time without the per-regexp overhead.
888
SearchCachedDFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)889 void SearchCachedDFA(int iters, const char* regexp, const StringPiece& text,
890 Prog::Anchor anchor, bool expect_match) {
891 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
892 CHECK(re);
893 Prog* prog = re->CompileToProg(1LL<<31);
894 CHECK(prog);
895 for (int i = 0; i < iters; i++) {
896 bool failed = false;
897 CHECK_EQ(prog->SearchDFA(text, NULL, anchor,
898 Prog::kFirstMatch, NULL, &failed, NULL),
899 expect_match);
900 CHECK(!failed);
901 }
902 delete prog;
903 re->Decref();
904 }
905
SearchCachedNFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)906 void SearchCachedNFA(int iters, const char* regexp, const StringPiece& text,
907 Prog::Anchor anchor, bool expect_match) {
908 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
909 CHECK(re);
910 Prog* prog = re->CompileToProg(0);
911 CHECK(prog);
912 for (int i = 0; i < iters; i++) {
913 CHECK_EQ(prog->SearchNFA(text, NULL, anchor, Prog::kFirstMatch, NULL, 0),
914 expect_match);
915 }
916 delete prog;
917 re->Decref();
918 }
919
SearchCachedOnePass(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)920 void SearchCachedOnePass(int iters, const char* regexp, const StringPiece& text,
921 Prog::Anchor anchor, bool expect_match) {
922 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
923 CHECK(re);
924 Prog* prog = re->CompileToProg(0);
925 CHECK(prog);
926 CHECK(prog->IsOnePass());
927 for (int i = 0; i < iters; i++)
928 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
929 expect_match);
930 delete prog;
931 re->Decref();
932 }
933
SearchCachedBitState(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)934 void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text,
935 Prog::Anchor anchor, bool expect_match) {
936 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
937 CHECK(re);
938 Prog* prog = re->CompileToProg(0);
939 CHECK(prog);
940 for (int i = 0; i < iters; i++)
941 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
942 expect_match);
943 delete prog;
944 re->Decref();
945 }
946
SearchCachedPCRE(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)947 void SearchCachedPCRE(int iters, const char* regexp, const StringPiece& text,
948 Prog::Anchor anchor, bool expect_match) {
949 PCRE re(regexp, PCRE::UTF8);
950 CHECK_EQ(re.error(), "");
951 for (int i = 0; i < iters; i++) {
952 if (anchor == Prog::kAnchored)
953 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
954 else
955 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
956 }
957 }
958
SearchCachedRE2(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)959 void SearchCachedRE2(int iters, const char* regexp, const StringPiece& text,
960 Prog::Anchor anchor, bool expect_match) {
961 RE2 re(regexp);
962 CHECK_EQ(re.error(), "");
963 for (int i = 0; i < iters; i++) {
964 if (anchor == Prog::kAnchored)
965 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
966 else
967 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
968 }
969 }
970
971
972 // Runs implementation to full match regexp against text,
973 // extracting three submatches. Expects match always.
974
Parse3NFA(int iters,const char * regexp,const StringPiece & text)975 void Parse3NFA(int iters, const char* regexp, const StringPiece& text) {
976 for (int i = 0; i < iters; i++) {
977 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
978 CHECK(re);
979 Prog* prog = re->CompileToProg(0);
980 CHECK(prog);
981 StringPiece sp[4]; // 4 because sp[0] is whole match.
982 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4));
983 delete prog;
984 re->Decref();
985 }
986 }
987
Parse3OnePass(int iters,const char * regexp,const StringPiece & text)988 void Parse3OnePass(int iters, const char* regexp, const StringPiece& text) {
989 for (int i = 0; i < iters; i++) {
990 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
991 CHECK(re);
992 Prog* prog = re->CompileToProg(0);
993 CHECK(prog);
994 CHECK(prog->IsOnePass());
995 StringPiece sp[4]; // 4 because sp[0] is whole match.
996 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
997 delete prog;
998 re->Decref();
999 }
1000 }
1001
Parse3BitState(int iters,const char * regexp,const StringPiece & text)1002 void Parse3BitState(int iters, const char* regexp, const StringPiece& text) {
1003 for (int i = 0; i < iters; i++) {
1004 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1005 CHECK(re);
1006 Prog* prog = re->CompileToProg(0);
1007 CHECK(prog);
1008 StringPiece sp[4]; // 4 because sp[0] is whole match.
1009 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1010 delete prog;
1011 re->Decref();
1012 }
1013 }
1014
Parse3Backtrack(int iters,const char * regexp,const StringPiece & text)1015 void Parse3Backtrack(int iters, const char* regexp, const StringPiece& text) {
1016 for (int i = 0; i < iters; i++) {
1017 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1018 CHECK(re);
1019 Prog* prog = re->CompileToProg(0);
1020 CHECK(prog);
1021 StringPiece sp[4]; // 4 because sp[0] is whole match.
1022 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1023 delete prog;
1024 re->Decref();
1025 }
1026 }
1027
Parse3PCRE(int iters,const char * regexp,const StringPiece & text)1028 void Parse3PCRE(int iters, const char* regexp, const StringPiece& text) {
1029 for (int i = 0; i < iters; i++) {
1030 PCRE re(regexp, PCRE::UTF8);
1031 CHECK_EQ(re.error(), "");
1032 StringPiece sp1, sp2, sp3;
1033 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1034 }
1035 }
1036
Parse3RE2(int iters,const char * regexp,const StringPiece & text)1037 void Parse3RE2(int iters, const char* regexp, const StringPiece& text) {
1038 for (int i = 0; i < iters; i++) {
1039 RE2 re(regexp);
1040 CHECK_EQ(re.error(), "");
1041 StringPiece sp1, sp2, sp3;
1042 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1043 }
1044 }
1045
Parse3CachedNFA(int iters,const char * regexp,const StringPiece & text)1046 void Parse3CachedNFA(int iters, const char* regexp, const StringPiece& text) {
1047 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1048 CHECK(re);
1049 Prog* prog = re->CompileToProg(0);
1050 CHECK(prog);
1051 StringPiece sp[4]; // 4 because sp[0] is whole match.
1052 for (int i = 0; i < iters; i++) {
1053 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1054 }
1055 delete prog;
1056 re->Decref();
1057 }
1058
Parse3CachedOnePass(int iters,const char * regexp,const StringPiece & text)1059 void Parse3CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
1060 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1061 CHECK(re);
1062 Prog* prog = re->CompileToProg(0);
1063 CHECK(prog);
1064 CHECK(prog->IsOnePass());
1065 StringPiece sp[4]; // 4 because sp[0] is whole match.
1066 for (int i = 0; i < iters; i++)
1067 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1068 delete prog;
1069 re->Decref();
1070 }
1071
Parse3CachedBitState(int iters,const char * regexp,const StringPiece & text)1072 void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text) {
1073 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1074 CHECK(re);
1075 Prog* prog = re->CompileToProg(0);
1076 CHECK(prog);
1077 StringPiece sp[4]; // 4 because sp[0] is whole match.
1078 for (int i = 0; i < iters; i++)
1079 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1080 delete prog;
1081 re->Decref();
1082 }
1083
Parse3CachedBacktrack(int iters,const char * regexp,const StringPiece & text)1084 void Parse3CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
1085 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1086 CHECK(re);
1087 Prog* prog = re->CompileToProg(0);
1088 CHECK(prog);
1089 StringPiece sp[4]; // 4 because sp[0] is whole match.
1090 for (int i = 0; i < iters; i++)
1091 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1092 delete prog;
1093 re->Decref();
1094 }
1095
Parse3CachedPCRE(int iters,const char * regexp,const StringPiece & text)1096 void Parse3CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
1097 PCRE re(regexp, PCRE::UTF8);
1098 CHECK_EQ(re.error(), "");
1099 StringPiece sp1, sp2, sp3;
1100 for (int i = 0; i < iters; i++) {
1101 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1102 }
1103 }
1104
Parse3CachedRE2(int iters,const char * regexp,const StringPiece & text)1105 void Parse3CachedRE2(int iters, const char* regexp, const StringPiece& text) {
1106 RE2 re(regexp);
1107 CHECK_EQ(re.error(), "");
1108 StringPiece sp1, sp2, sp3;
1109 for (int i = 0; i < iters; i++) {
1110 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1111 }
1112 }
1113
1114
1115 // Runs implementation to full match regexp against text,
1116 // extracting three submatches. Expects match always.
1117
Parse1NFA(int iters,const char * regexp,const StringPiece & text)1118 void Parse1NFA(int iters, const char* regexp, const StringPiece& text) {
1119 for (int i = 0; i < iters; i++) {
1120 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1121 CHECK(re);
1122 Prog* prog = re->CompileToProg(0);
1123 CHECK(prog);
1124 StringPiece sp[2]; // 2 because sp[0] is whole match.
1125 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1126 delete prog;
1127 re->Decref();
1128 }
1129 }
1130
Parse1OnePass(int iters,const char * regexp,const StringPiece & text)1131 void Parse1OnePass(int iters, const char* regexp, const StringPiece& text) {
1132 for (int i = 0; i < iters; i++) {
1133 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1134 CHECK(re);
1135 Prog* prog = re->CompileToProg(0);
1136 CHECK(prog);
1137 CHECK(prog->IsOnePass());
1138 StringPiece sp[2]; // 2 because sp[0] is whole match.
1139 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1140 delete prog;
1141 re->Decref();
1142 }
1143 }
1144
Parse1BitState(int iters,const char * regexp,const StringPiece & text)1145 void Parse1BitState(int iters, const char* regexp, const StringPiece& text) {
1146 for (int i = 0; i < iters; i++) {
1147 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1148 CHECK(re);
1149 Prog* prog = re->CompileToProg(0);
1150 CHECK(prog);
1151 StringPiece sp[2]; // 2 because sp[0] is whole match.
1152 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1153 delete prog;
1154 re->Decref();
1155 }
1156 }
1157
Parse1PCRE(int iters,const char * regexp,const StringPiece & text)1158 void Parse1PCRE(int iters, const char* regexp, const StringPiece& text) {
1159 for (int i = 0; i < iters; i++) {
1160 PCRE re(regexp, PCRE::UTF8);
1161 CHECK_EQ(re.error(), "");
1162 StringPiece sp1;
1163 CHECK(PCRE::FullMatch(text, re, &sp1));
1164 }
1165 }
1166
Parse1RE2(int iters,const char * regexp,const StringPiece & text)1167 void Parse1RE2(int iters, const char* regexp, const StringPiece& text) {
1168 for (int i = 0; i < iters; i++) {
1169 RE2 re(regexp);
1170 CHECK_EQ(re.error(), "");
1171 StringPiece sp1;
1172 CHECK(RE2::FullMatch(text, re, &sp1));
1173 }
1174 }
1175
Parse1CachedNFA(int iters,const char * regexp,const StringPiece & text)1176 void Parse1CachedNFA(int iters, const char* regexp, const StringPiece& text) {
1177 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1178 CHECK(re);
1179 Prog* prog = re->CompileToProg(0);
1180 CHECK(prog);
1181 StringPiece sp[2]; // 2 because sp[0] is whole match.
1182 for (int i = 0; i < iters; i++) {
1183 CHECK(prog->SearchNFA(text, NULL, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1184 }
1185 delete prog;
1186 re->Decref();
1187 }
1188
Parse1CachedOnePass(int iters,const char * regexp,const StringPiece & text)1189 void Parse1CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
1190 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1191 CHECK(re);
1192 Prog* prog = re->CompileToProg(0);
1193 CHECK(prog);
1194 CHECK(prog->IsOnePass());
1195 StringPiece sp[2]; // 2 because sp[0] is whole match.
1196 for (int i = 0; i < iters; i++)
1197 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1198 delete prog;
1199 re->Decref();
1200 }
1201
Parse1CachedBitState(int iters,const char * regexp,const StringPiece & text)1202 void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text) {
1203 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1204 CHECK(re);
1205 Prog* prog = re->CompileToProg(0);
1206 CHECK(prog);
1207 StringPiece sp[2]; // 2 because sp[0] is whole match.
1208 for (int i = 0; i < iters; i++)
1209 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1210 delete prog;
1211 re->Decref();
1212 }
1213
Parse1CachedBacktrack(int iters,const char * regexp,const StringPiece & text)1214 void Parse1CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
1215 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1216 CHECK(re);
1217 Prog* prog = re->CompileToProg(0);
1218 CHECK(prog);
1219 StringPiece sp[2]; // 2 because sp[0] is whole match.
1220 for (int i = 0; i < iters; i++)
1221 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1222 delete prog;
1223 re->Decref();
1224 }
1225
Parse1CachedPCRE(int iters,const char * regexp,const StringPiece & text)1226 void Parse1CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
1227 PCRE re(regexp, PCRE::UTF8);
1228 CHECK_EQ(re.error(), "");
1229 StringPiece sp1;
1230 for (int i = 0; i < iters; i++) {
1231 CHECK(PCRE::FullMatch(text, re, &sp1));
1232 }
1233 }
1234
Parse1CachedRE2(int iters,const char * regexp,const StringPiece & text)1235 void Parse1CachedRE2(int iters, const char* regexp, const StringPiece& text) {
1236 RE2 re(regexp);
1237 CHECK_EQ(re.error(), "");
1238 StringPiece sp1;
1239 for (int i = 0; i < iters; i++) {
1240 CHECK(RE2::FullMatch(text, re, &sp1));
1241 }
1242 }
1243
SearchParse2CachedPCRE(int iters,const char * regexp,const StringPiece & text)1244 void SearchParse2CachedPCRE(int iters, const char* regexp,
1245 const StringPiece& text) {
1246 PCRE re(regexp, PCRE::UTF8);
1247 CHECK_EQ(re.error(), "");
1248 for (int i = 0; i < iters; i++) {
1249 StringPiece sp1, sp2;
1250 CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
1251 }
1252 }
1253
SearchParse2CachedRE2(int iters,const char * regexp,const StringPiece & text)1254 void SearchParse2CachedRE2(int iters, const char* regexp,
1255 const StringPiece& text) {
1256 RE2 re(regexp);
1257 CHECK_EQ(re.error(), "");
1258 for (int i = 0; i < iters; i++) {
1259 StringPiece sp1, sp2;
1260 CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
1261 }
1262 }
1263
SearchParse1CachedPCRE(int iters,const char * regexp,const StringPiece & text)1264 void SearchParse1CachedPCRE(int iters, const char* regexp,
1265 const StringPiece& text) {
1266 PCRE re(regexp, PCRE::UTF8);
1267 CHECK_EQ(re.error(), "");
1268 for (int i = 0; i < iters; i++) {
1269 StringPiece sp1;
1270 CHECK(PCRE::PartialMatch(text, re, &sp1));
1271 }
1272 }
1273
SearchParse1CachedRE2(int iters,const char * regexp,const StringPiece & text)1274 void SearchParse1CachedRE2(int iters, const char* regexp,
1275 const StringPiece& text) {
1276 RE2 re(regexp);
1277 CHECK_EQ(re.error(), "");
1278 for (int i = 0; i < iters; i++) {
1279 StringPiece sp1;
1280 CHECK(RE2::PartialMatch(text, re, &sp1));
1281 }
1282 }
1283
EmptyPartialMatchPCRE(int n)1284 void EmptyPartialMatchPCRE(int n) {
1285 PCRE re("");
1286 for (int i = 0; i < n; i++) {
1287 PCRE::PartialMatch("", re);
1288 }
1289 }
1290
EmptyPartialMatchRE2(int n)1291 void EmptyPartialMatchRE2(int n) {
1292 RE2 re("");
1293 for (int i = 0; i < n; i++) {
1294 RE2::PartialMatch("", re);
1295 }
1296 }
1297 #ifdef USEPCRE
1298 BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1299 #endif
1300 BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
1301
SimplePartialMatchPCRE(int n)1302 void SimplePartialMatchPCRE(int n) {
1303 PCRE re("abcdefg");
1304 for (int i = 0; i < n; i++) {
1305 PCRE::PartialMatch("abcdefg", re);
1306 }
1307 }
1308
SimplePartialMatchRE2(int n)1309 void SimplePartialMatchRE2(int n) {
1310 RE2 re("abcdefg");
1311 for (int i = 0; i < n; i++) {
1312 RE2::PartialMatch("abcdefg", re);
1313 }
1314 }
1315 #ifdef USEPCRE
1316 BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
1317 #endif
1318 BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
1319
1320 static string http_text =
1321 "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
1322 "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
1323
HTTPPartialMatchPCRE(int n)1324 void HTTPPartialMatchPCRE(int n) {
1325 StringPiece a;
1326 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1327 for (int i = 0; i < n; i++) {
1328 PCRE::PartialMatch(http_text, re, &a);
1329 }
1330 }
1331
HTTPPartialMatchRE2(int n)1332 void HTTPPartialMatchRE2(int n) {
1333 StringPiece a;
1334 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1335 for (int i = 0; i < n; i++) {
1336 RE2::PartialMatch(http_text, re, &a);
1337 }
1338 }
1339
1340 #ifdef USEPCRE
1341 BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1342 #endif
1343 BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1344
1345 static string http_smalltext =
1346 "GET /abc HTTP/1.1";
1347
SmallHTTPPartialMatchPCRE(int n)1348 void SmallHTTPPartialMatchPCRE(int n) {
1349 StringPiece a;
1350 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1351 for (int i = 0; i < n; i++) {
1352 PCRE::PartialMatch(http_text, re, &a);
1353 }
1354 }
1355
SmallHTTPPartialMatchRE2(int n)1356 void SmallHTTPPartialMatchRE2(int n) {
1357 StringPiece a;
1358 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1359 for (int i = 0; i < n; i++) {
1360 RE2::PartialMatch(http_text, re, &a);
1361 }
1362 }
1363
1364 #ifdef USEPCRE
1365 BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1366 #endif
1367 BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1368
DotMatchPCRE(int n)1369 void DotMatchPCRE(int n) {
1370 StringPiece a;
1371 PCRE re("(?-s)^(.+)");
1372 for (int i = 0; i < n; i++) {
1373 PCRE::PartialMatch(http_text, re, &a);
1374 }
1375 }
1376
DotMatchRE2(int n)1377 void DotMatchRE2(int n) {
1378 StringPiece a;
1379 RE2 re("(?-s)^(.+)");
1380 for (int i = 0; i < n; i++) {
1381 RE2::PartialMatch(http_text, re, &a);
1382 }
1383 }
1384
1385 #ifdef USEPCRE
1386 BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
1387 #endif
1388 BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
1389
ASCIIMatchPCRE(int n)1390 void ASCIIMatchPCRE(int n) {
1391 StringPiece a;
1392 PCRE re("(?-s)^([ -~]+)");
1393 for (int i = 0; i < n; i++) {
1394 PCRE::PartialMatch(http_text, re, &a);
1395 }
1396 }
1397
ASCIIMatchRE2(int n)1398 void ASCIIMatchRE2(int n) {
1399 StringPiece a;
1400 RE2 re("(?-s)^([ -~]+)");
1401 for (int i = 0; i < n; i++) {
1402 RE2::PartialMatch(http_text, re, &a);
1403 }
1404 }
1405
1406 #ifdef USEPCRE
1407 BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
1408 #endif
1409 BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
1410
FullMatchPCRE(int iter,int n,const char * regexp)1411 void FullMatchPCRE(int iter, int n, const char *regexp) {
1412 StopBenchmarkTiming();
1413 string s;
1414 MakeText(&s, n);
1415 s += "ABCDEFGHIJ";
1416 BenchmarkMemoryUsage();
1417 PCRE re(regexp);
1418 StartBenchmarkTiming();
1419 for (int i = 0; i < iter; i++)
1420 CHECK(PCRE::FullMatch(s, re));
1421 SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n);
1422 }
1423
FullMatchRE2(int iter,int n,const char * regexp)1424 void FullMatchRE2(int iter, int n, const char *regexp) {
1425 StopBenchmarkTiming();
1426 string s;
1427 MakeText(&s, n);
1428 s += "ABCDEFGHIJ";
1429 BenchmarkMemoryUsage();
1430 RE2 re(regexp, RE2::Latin1);
1431 StartBenchmarkTiming();
1432 for (int i = 0; i < iter; i++)
1433 CHECK(RE2::FullMatch(s, re));
1434 SetBenchmarkBytesProcessed(static_cast<int64>(iter)*n);
1435 }
1436
FullMatch_DotStar_CachedPCRE(int i,int n)1437 void FullMatch_DotStar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*"); }
FullMatch_DotStar_CachedRE2(int i,int n)1438 void FullMatch_DotStar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s).*"); }
1439
FullMatch_DotStarDollar_CachedPCRE(int i,int n)1440 void FullMatch_DotStarDollar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*$"); }
FullMatch_DotStarDollar_CachedRE2(int i,int n)1441 void FullMatch_DotStarDollar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s).*$"); }
1442
FullMatch_DotStarCapture_CachedPCRE(int i,int n)1443 void FullMatch_DotStarCapture_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s)((.*)()()($))"); }
FullMatch_DotStarCapture_CachedRE2(int i,int n)1444 void FullMatch_DotStarCapture_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s)((.*)()()($))"); }
1445
1446 #ifdef USEPCRE
1447 BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
1448 #endif
1449 BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20);
1450
1451 #ifdef USEPCRE
1452 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
1453 #endif
1454 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20);
1455
1456 #ifdef USEPCRE
1457 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
1458 #endif
1459 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20);
1460
1461 } // namespace re2
1462