1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Benchmarks for regular expression implementations.
6
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string>
11 #include <thread>
12 #include <utility>
13
14 #include "util/benchmark.h"
15 #include "util/test.h"
16 #include "util/flags.h"
17 #include "util/logging.h"
18 #include "util/malloc_counter.h"
19 #include "util/strutil.h"
20 #include "re2/prog.h"
21 #include "re2/re2.h"
22 #include "re2/regexp.h"
23 #include "util/pcre.h"
24
25 namespace re2 {
26 void Test();
27 void MemoryUsage();
28 } // namespace re2
29
30 typedef testing::MallocCounter MallocCounter;
31
32 namespace re2 {
33
Test()34 void Test() {
35 Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
36 CHECK(re);
37 Prog* prog = re->CompileToProg(0);
38 CHECK(prog);
39 CHECK(prog->IsOnePass());
40 CHECK(prog->CanBitState());
41 const char* text = "650-253-0001";
42 StringPiece sp[4];
43 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
44 CHECK_EQ(sp[0], "650-253-0001");
45 CHECK_EQ(sp[1], "650");
46 CHECK_EQ(sp[2], "253");
47 CHECK_EQ(sp[3], "0001");
48 delete prog;
49 re->Decref();
50 LOG(INFO) << "test passed\n";
51 }
52
MemoryUsage()53 void MemoryUsage() {
54 const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
55 const char* text = "650-253-0001";
56 {
57 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
58 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
59 CHECK(re);
60 // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
61 // because LOG(INFO) might do a big allocation before they get evaluated.
62 fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n",
63 mc.HeapGrowth(), mc.PeakHeapGrowth());
64 mc.Reset();
65
66 Prog* prog = re->CompileToProg(0);
67 CHECK(prog);
68 CHECK(prog->IsOnePass());
69 CHECK(prog->CanBitState());
70 fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n",
71 mc.HeapGrowth(), mc.PeakHeapGrowth());
72 mc.Reset();
73
74 StringPiece sp[4];
75 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
76 fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n",
77 mc.HeapGrowth(), mc.PeakHeapGrowth());
78 delete prog;
79 re->Decref();
80 }
81
82 {
83 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
84
85 PCRE re(regexp, PCRE::UTF8);
86 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n",
87 mc.HeapGrowth(), mc.PeakHeapGrowth());
88 PCRE::FullMatch(text, re);
89 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n",
90 mc.HeapGrowth(), mc.PeakHeapGrowth());
91 }
92
93 {
94 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
95
96 PCRE* re = new PCRE(regexp, PCRE::UTF8);
97 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n",
98 mc.HeapGrowth(), mc.PeakHeapGrowth());
99 PCRE::FullMatch(text, *re);
100 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n",
101 mc.HeapGrowth(), mc.PeakHeapGrowth());
102 delete re;
103 }
104
105 {
106 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
107
108 RE2 re(regexp);
109 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n",
110 mc.HeapGrowth(), mc.PeakHeapGrowth());
111 RE2::FullMatch(text, re);
112 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n",
113 mc.HeapGrowth(), mc.PeakHeapGrowth());
114 }
115
116 fprintf(stderr, "sizeof: PCRE=%zd RE2=%zd Prog=%zd Inst=%zd\n",
117 sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst));
118 }
119
NumCPUs()120 int NumCPUs() {
121 return static_cast<int>(std::thread::hardware_concurrency());
122 }
123
124 // Regular expression implementation wrappers.
125 // Defined at bottom of file, but they are repetitive
126 // and not interesting.
127
128 typedef void SearchImpl(benchmark::State& state, const char* regexp,
129 const StringPiece& text, Prog::Anchor anchor,
130 bool expect_match);
131
132 SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, SearchPCRE,
133 SearchRE2, SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass,
134 SearchCachedBitState, SearchCachedPCRE, SearchCachedRE2;
135
136 typedef void ParseImpl(benchmark::State& state, const char* regexp,
137 const StringPiece& text);
138
139 ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState, Parse1PCRE, Parse1RE2,
140 Parse1Backtrack, Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
141 Parse1CachedPCRE, Parse1CachedRE2, Parse1CachedBacktrack;
142
143 ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState, Parse3PCRE, Parse3RE2,
144 Parse3Backtrack, Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
145 Parse3CachedPCRE, Parse3CachedRE2, Parse3CachedBacktrack;
146
147 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
148
149 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
150
151 // Benchmark: failed search for regexp in random text.
152
153 // Generate random text that won't contain the search string,
154 // to test worst-case search behavior.
MakeText(std::string * text,int64_t nbytes)155 void MakeText(std::string* text, int64_t nbytes) {
156 srand(1);
157 text->resize(nbytes);
158 for (int64_t i = 0; i < nbytes; i++) {
159 // Generate a one-byte rune that isn't a control character (e.g. '\n').
160 // Clipping to 0x20 introduces some bias, but we don't need uniformity.
161 int byte = rand() & 0x7F;
162 if (byte < 0x20)
163 byte = 0x20;
164 (*text)[i] = byte;
165 }
166 }
167
168 // Makes text of size nbytes, then calls run to search
169 // the text for regexp iters times.
Search(benchmark::State & state,const char * regexp,SearchImpl * search)170 void Search(benchmark::State& state, const char* regexp, SearchImpl* search) {
171 std::string s;
172 MakeText(&s, state.range(0));
173 search(state, regexp, s, Prog::kUnanchored, false);
174 state.SetBytesProcessed(state.iterations() * state.range(0));
175 }
176
177 // These two are easy because they start with an A,
178 // giving the search loop something to memchr for.
179 #define EASY0 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
180 #define EASY1 "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
181
182 // This is a little harder, since it starts with a character class
183 // and thus can't be memchr'ed. Could look for ABC and work backward,
184 // but no one does that.
185 #define MEDIUM "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
186
187 // This is a fair amount harder, because of the leading [ -~]*.
188 // A bad backtracking implementation will take O(text^2) time to
189 // figure out there's no match.
190 #define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
191
192 // This has quite a high degree of fanout.
193 // NFA execution will be particularly slow.
194 #define FANOUT "(?:[\\x{80}-\\x{10FFFF}]?){100}[\\x{80}-\\x{10FFFF}]"
195
196 // This stresses engines that are trying to track parentheses.
197 #define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
198 "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
199
Search_Easy0_CachedDFA(benchmark::State & state)200 void Search_Easy0_CachedDFA(benchmark::State& state) { Search(state, EASY0, SearchCachedDFA); }
Search_Easy0_CachedNFA(benchmark::State & state)201 void Search_Easy0_CachedNFA(benchmark::State& state) { Search(state, EASY0, SearchCachedNFA); }
Search_Easy0_CachedPCRE(benchmark::State & state)202 void Search_Easy0_CachedPCRE(benchmark::State& state) { Search(state, EASY0, SearchCachedPCRE); }
Search_Easy0_CachedRE2(benchmark::State & state)203 void Search_Easy0_CachedRE2(benchmark::State& state) { Search(state, EASY0, SearchCachedRE2); }
204
205 BENCHMARK_RANGE(Search_Easy0_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
206 BENCHMARK_RANGE(Search_Easy0_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
207 #ifdef USEPCRE
208 BENCHMARK_RANGE(Search_Easy0_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
209 #endif
210 BENCHMARK_RANGE(Search_Easy0_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
211
Search_Easy1_CachedDFA(benchmark::State & state)212 void Search_Easy1_CachedDFA(benchmark::State& state) { Search(state, EASY1, SearchCachedDFA); }
Search_Easy1_CachedNFA(benchmark::State & state)213 void Search_Easy1_CachedNFA(benchmark::State& state) { Search(state, EASY1, SearchCachedNFA); }
Search_Easy1_CachedPCRE(benchmark::State & state)214 void Search_Easy1_CachedPCRE(benchmark::State& state) { Search(state, EASY1, SearchCachedPCRE); }
Search_Easy1_CachedRE2(benchmark::State & state)215 void Search_Easy1_CachedRE2(benchmark::State& state) { Search(state, EASY1, SearchCachedRE2); }
216
217 BENCHMARK_RANGE(Search_Easy1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
218 BENCHMARK_RANGE(Search_Easy1_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
219 #ifdef USEPCRE
220 BENCHMARK_RANGE(Search_Easy1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
221 #endif
222 BENCHMARK_RANGE(Search_Easy1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
223
Search_Medium_CachedDFA(benchmark::State & state)224 void Search_Medium_CachedDFA(benchmark::State& state) { Search(state, MEDIUM, SearchCachedDFA); }
Search_Medium_CachedNFA(benchmark::State & state)225 void Search_Medium_CachedNFA(benchmark::State& state) { Search(state, MEDIUM, SearchCachedNFA); }
Search_Medium_CachedPCRE(benchmark::State & state)226 void Search_Medium_CachedPCRE(benchmark::State& state) { Search(state, MEDIUM, SearchCachedPCRE); }
Search_Medium_CachedRE2(benchmark::State & state)227 void Search_Medium_CachedRE2(benchmark::State& state) { Search(state, MEDIUM, SearchCachedRE2); }
228
229 BENCHMARK_RANGE(Search_Medium_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
230 BENCHMARK_RANGE(Search_Medium_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
231 #ifdef USEPCRE
232 BENCHMARK_RANGE(Search_Medium_CachedPCRE, 8, 256<<10)->ThreadRange(1, NumCPUs());
233 #endif
234 BENCHMARK_RANGE(Search_Medium_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
235
Search_Hard_CachedDFA(benchmark::State & state)236 void Search_Hard_CachedDFA(benchmark::State& state) { Search(state, HARD, SearchCachedDFA); }
Search_Hard_CachedNFA(benchmark::State & state)237 void Search_Hard_CachedNFA(benchmark::State& state) { Search(state, HARD, SearchCachedNFA); }
Search_Hard_CachedPCRE(benchmark::State & state)238 void Search_Hard_CachedPCRE(benchmark::State& state) { Search(state, HARD, SearchCachedPCRE); }
Search_Hard_CachedRE2(benchmark::State & state)239 void Search_Hard_CachedRE2(benchmark::State& state) { Search(state, HARD, SearchCachedRE2); }
240
241 BENCHMARK_RANGE(Search_Hard_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
242 BENCHMARK_RANGE(Search_Hard_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
243 #ifdef USEPCRE
244 BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
245 #endif
246 BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
247
Search_Fanout_CachedDFA(benchmark::State & state)248 void Search_Fanout_CachedDFA(benchmark::State& state) { Search(state, FANOUT, SearchCachedDFA); }
Search_Fanout_CachedNFA(benchmark::State & state)249 void Search_Fanout_CachedNFA(benchmark::State& state) { Search(state, FANOUT, SearchCachedNFA); }
Search_Fanout_CachedPCRE(benchmark::State & state)250 void Search_Fanout_CachedPCRE(benchmark::State& state) { Search(state, FANOUT, SearchCachedPCRE); }
Search_Fanout_CachedRE2(benchmark::State & state)251 void Search_Fanout_CachedRE2(benchmark::State& state) { Search(state, FANOUT, SearchCachedRE2); }
252
253 BENCHMARK_RANGE(Search_Fanout_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
254 BENCHMARK_RANGE(Search_Fanout_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
255 #ifdef USEPCRE
256 BENCHMARK_RANGE(Search_Fanout_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
257 #endif
258 BENCHMARK_RANGE(Search_Fanout_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
259
Search_Parens_CachedDFA(benchmark::State & state)260 void Search_Parens_CachedDFA(benchmark::State& state) { Search(state, PARENS, SearchCachedDFA); }
Search_Parens_CachedNFA(benchmark::State & state)261 void Search_Parens_CachedNFA(benchmark::State& state) { Search(state, PARENS, SearchCachedNFA); }
Search_Parens_CachedPCRE(benchmark::State & state)262 void Search_Parens_CachedPCRE(benchmark::State& state) { Search(state, PARENS, SearchCachedPCRE); }
Search_Parens_CachedRE2(benchmark::State & state)263 void Search_Parens_CachedRE2(benchmark::State& state) { Search(state, PARENS, SearchCachedRE2); }
264
265 BENCHMARK_RANGE(Search_Parens_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
266 BENCHMARK_RANGE(Search_Parens_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
267 #ifdef USEPCRE
268 BENCHMARK_RANGE(Search_Parens_CachedPCRE, 8, 8)->ThreadRange(1, NumCPUs());
269 #endif
270 BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
271
SearchBigFixed(benchmark::State & state,SearchImpl * search)272 void SearchBigFixed(benchmark::State& state, SearchImpl* search) {
273 std::string s;
274 s.append(state.range(0)/2, 'x');
275 std::string regexp = "^" + s + ".*$";
276 std::string t;
277 MakeText(&t, state.range(0)/2);
278 s += t;
279 search(state, regexp.c_str(), s, Prog::kUnanchored, true);
280 state.SetBytesProcessed(state.iterations() * state.range(0));
281 }
282
Search_BigFixed_CachedDFA(benchmark::State & state)283 void Search_BigFixed_CachedDFA(benchmark::State& state) { SearchBigFixed(state, SearchCachedDFA); }
Search_BigFixed_CachedNFA(benchmark::State & state)284 void Search_BigFixed_CachedNFA(benchmark::State& state) { SearchBigFixed(state, SearchCachedNFA); }
Search_BigFixed_CachedPCRE(benchmark::State & state)285 void Search_BigFixed_CachedPCRE(benchmark::State& state) { SearchBigFixed(state, SearchCachedPCRE); }
Search_BigFixed_CachedRE2(benchmark::State & state)286 void Search_BigFixed_CachedRE2(benchmark::State& state) { SearchBigFixed(state, SearchCachedRE2); }
287
288 BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs());
289 BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPUs());
290 #ifdef USEPCRE
291 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPUs());
292 #endif
293 BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs());
294
295 // Benchmark: FindAndConsume
296
FindAndConsume(benchmark::State & state)297 void FindAndConsume(benchmark::State& state) {
298 std::string s;
299 MakeText(&s, state.range(0));
300 s.append("Hello World");
301 RE2 re("((Hello World))");
302 for (auto _ : state) {
303 StringPiece t = s;
304 StringPiece u;
305 CHECK(RE2::FindAndConsume(&t, re, &u));
306 CHECK_EQ(u, "Hello World");
307 }
308 state.SetBytesProcessed(state.iterations() * state.range(0));
309 }
310
311 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
312
313 // Benchmark: successful anchored search.
314
SearchSuccess(benchmark::State & state,const char * regexp,SearchImpl * search)315 void SearchSuccess(benchmark::State& state, const char* regexp,
316 SearchImpl* search) {
317 std::string s;
318 MakeText(&s, state.range(0));
319 search(state, regexp, s, Prog::kAnchored, true);
320 state.SetBytesProcessed(state.iterations() * state.range(0));
321 }
322
323 // Unambiguous search (RE2 can use OnePass).
324
Search_Success_DFA(benchmark::State & state)325 void Search_Success_DFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchDFA); }
Search_Success_NFA(benchmark::State & state)326 void Search_Success_NFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchNFA); }
Search_Success_PCRE(benchmark::State & state)327 void Search_Success_PCRE(benchmark::State& state) { SearchSuccess(state, ".*$", SearchPCRE); }
Search_Success_RE2(benchmark::State & state)328 void Search_Success_RE2(benchmark::State& state) { SearchSuccess(state, ".*$", SearchRE2); }
Search_Success_OnePass(benchmark::State & state)329 void Search_Success_OnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchOnePass); }
330
331 BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
332 BENCHMARK_RANGE(Search_Success_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
333 #ifdef USEPCRE
334 BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
335 #endif
336 BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
337 BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
338
Search_Success_CachedDFA(benchmark::State & state)339 void Search_Success_CachedDFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedDFA); }
Search_Success_CachedNFA(benchmark::State & state)340 void Search_Success_CachedNFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedNFA); }
Search_Success_CachedPCRE(benchmark::State & state)341 void Search_Success_CachedPCRE(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedPCRE); }
Search_Success_CachedRE2(benchmark::State & state)342 void Search_Success_CachedRE2(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedRE2); }
Search_Success_CachedOnePass(benchmark::State & state)343 void Search_Success_CachedOnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedOnePass); }
344
345 BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
346 BENCHMARK_RANGE(Search_Success_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
347 #ifdef USEPCRE
348 BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
349 #endif
350 BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
351 BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
352
353 // Ambiguous search (RE2 cannot use OnePass).
354 // Used to be ".*.$", but that is coalesced to ".+$" these days.
355
Search_Success1_DFA(benchmark::State & state)356 void Search_Success1_DFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchDFA); }
Search_Success1_NFA(benchmark::State & state)357 void Search_Success1_NFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchNFA); }
Search_Success1_PCRE(benchmark::State & state)358 void Search_Success1_PCRE(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchPCRE); }
Search_Success1_RE2(benchmark::State & state)359 void Search_Success1_RE2(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchRE2); }
Search_Success1_BitState(benchmark::State & state)360 void Search_Success1_BitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchBitState); }
361
362 BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
363 BENCHMARK_RANGE(Search_Success1_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
364 #ifdef USEPCRE
365 BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
366 #endif
367 BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
368 BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
369
Search_Success1_CachedDFA(benchmark::State & state)370 void Search_Success1_CachedDFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedDFA); }
Search_Success1_CachedNFA(benchmark::State & state)371 void Search_Success1_CachedNFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedNFA); }
Search_Success1_CachedPCRE(benchmark::State & state)372 void Search_Success1_CachedPCRE(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedPCRE); }
Search_Success1_CachedRE2(benchmark::State & state)373 void Search_Success1_CachedRE2(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedRE2); }
Search_Success1_CachedBitState(benchmark::State & state)374 void Search_Success1_CachedBitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedBitState); }
375
376 BENCHMARK_RANGE(Search_Success1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
377 BENCHMARK_RANGE(Search_Success1_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
378 #ifdef USEPCRE
379 BENCHMARK_RANGE(Search_Success1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
380 #endif
381 BENCHMARK_RANGE(Search_Success1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
382 BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
383
384 // Benchmark: AltMatch optimisation (just to verify that it works)
385 // Note that OnePass doesn't implement it!
386
SearchAltMatch(benchmark::State & state,SearchImpl * search)387 void SearchAltMatch(benchmark::State& state, SearchImpl* search) {
388 std::string s;
389 MakeText(&s, state.range(0));
390 search(state, "\\C*", s, Prog::kAnchored, true);
391 state.SetBytesProcessed(state.iterations() * state.range(0));
392 }
393
Search_AltMatch_DFA(benchmark::State & state)394 void Search_AltMatch_DFA(benchmark::State& state) { SearchAltMatch(state, SearchDFA); }
Search_AltMatch_NFA(benchmark::State & state)395 void Search_AltMatch_NFA(benchmark::State& state) { SearchAltMatch(state, SearchNFA); }
Search_AltMatch_OnePass(benchmark::State & state)396 void Search_AltMatch_OnePass(benchmark::State& state) { SearchAltMatch(state, SearchOnePass); }
Search_AltMatch_BitState(benchmark::State & state)397 void Search_AltMatch_BitState(benchmark::State& state) { SearchAltMatch(state, SearchBitState); }
Search_AltMatch_PCRE(benchmark::State & state)398 void Search_AltMatch_PCRE(benchmark::State& state) { SearchAltMatch(state, SearchPCRE); }
Search_AltMatch_RE2(benchmark::State & state)399 void Search_AltMatch_RE2(benchmark::State& state) { SearchAltMatch(state, SearchRE2); }
400
401 BENCHMARK_RANGE(Search_AltMatch_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
402 BENCHMARK_RANGE(Search_AltMatch_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
403 BENCHMARK_RANGE(Search_AltMatch_OnePass, 8, 16<<20)->ThreadRange(1, NumCPUs());
404 BENCHMARK_RANGE(Search_AltMatch_BitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
405 #ifdef USEPCRE
406 BENCHMARK_RANGE(Search_AltMatch_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
407 #endif
408 BENCHMARK_RANGE(Search_AltMatch_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
409
Search_AltMatch_CachedDFA(benchmark::State & state)410 void Search_AltMatch_CachedDFA(benchmark::State& state) { SearchAltMatch(state, SearchCachedDFA); }
Search_AltMatch_CachedNFA(benchmark::State & state)411 void Search_AltMatch_CachedNFA(benchmark::State& state) { SearchAltMatch(state, SearchCachedNFA); }
Search_AltMatch_CachedOnePass(benchmark::State & state)412 void Search_AltMatch_CachedOnePass(benchmark::State& state) { SearchAltMatch(state, SearchCachedOnePass); }
Search_AltMatch_CachedBitState(benchmark::State & state)413 void Search_AltMatch_CachedBitState(benchmark::State& state) { SearchAltMatch(state, SearchCachedBitState); }
Search_AltMatch_CachedPCRE(benchmark::State & state)414 void Search_AltMatch_CachedPCRE(benchmark::State& state) { SearchAltMatch(state, SearchCachedPCRE); }
Search_AltMatch_CachedRE2(benchmark::State & state)415 void Search_AltMatch_CachedRE2(benchmark::State& state) { SearchAltMatch(state, SearchCachedRE2); }
416
417 BENCHMARK_RANGE(Search_AltMatch_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
418 BENCHMARK_RANGE(Search_AltMatch_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
419 BENCHMARK_RANGE(Search_AltMatch_CachedOnePass, 8, 16<<20)->ThreadRange(1, NumCPUs());
420 BENCHMARK_RANGE(Search_AltMatch_CachedBitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
421 #ifdef USEPCRE
422 BENCHMARK_RANGE(Search_AltMatch_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
423 #endif
424 BENCHMARK_RANGE(Search_AltMatch_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
425
426 // Benchmark: use regexp to find phone number.
427
SearchDigits(benchmark::State & state,SearchImpl * search)428 void SearchDigits(benchmark::State& state, SearchImpl* search) {
429 StringPiece s("650-253-0001");
430 search(state, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true);
431 state.SetItemsProcessed(state.iterations());
432 }
433
Search_Digits_DFA(benchmark::State & state)434 void Search_Digits_DFA(benchmark::State& state) { SearchDigits(state, SearchDFA); }
Search_Digits_NFA(benchmark::State & state)435 void Search_Digits_NFA(benchmark::State& state) { SearchDigits(state, SearchNFA); }
Search_Digits_OnePass(benchmark::State & state)436 void Search_Digits_OnePass(benchmark::State& state) { SearchDigits(state, SearchOnePass); }
Search_Digits_PCRE(benchmark::State & state)437 void Search_Digits_PCRE(benchmark::State& state) { SearchDigits(state, SearchPCRE); }
Search_Digits_RE2(benchmark::State & state)438 void Search_Digits_RE2(benchmark::State& state) { SearchDigits(state, SearchRE2); }
Search_Digits_BitState(benchmark::State & state)439 void Search_Digits_BitState(benchmark::State& state) { SearchDigits(state, SearchBitState); }
440
441 BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
442 BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
443 BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
444 #ifdef USEPCRE
445 BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
446 #endif
447 BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
448 BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
449
450 // Benchmark: use regexp to parse digit fields in phone number.
451
Parse3Digits(benchmark::State & state,void (* parse3)(benchmark::State &,const char *,const StringPiece &))452 void Parse3Digits(benchmark::State& state,
453 void (*parse3)(benchmark::State&, const char*,
454 const StringPiece&)) {
455 parse3(state, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
456 state.SetItemsProcessed(state.iterations());
457 }
458
Parse_Digits_NFA(benchmark::State & state)459 void Parse_Digits_NFA(benchmark::State& state) { Parse3Digits(state, Parse3NFA); }
Parse_Digits_OnePass(benchmark::State & state)460 void Parse_Digits_OnePass(benchmark::State& state) { Parse3Digits(state, Parse3OnePass); }
Parse_Digits_PCRE(benchmark::State & state)461 void Parse_Digits_PCRE(benchmark::State& state) { Parse3Digits(state, Parse3PCRE); }
Parse_Digits_RE2(benchmark::State & state)462 void Parse_Digits_RE2(benchmark::State& state) { Parse3Digits(state, Parse3RE2); }
Parse_Digits_Backtrack(benchmark::State & state)463 void Parse_Digits_Backtrack(benchmark::State& state) { Parse3Digits(state, Parse3Backtrack); }
Parse_Digits_BitState(benchmark::State & state)464 void Parse_Digits_BitState(benchmark::State& state) { Parse3Digits(state, Parse3BitState); }
465
466 BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
467 BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
468 #ifdef USEPCRE
469 BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
470 #endif
471 BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
472 BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
473 BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
474
Parse_CachedDigits_NFA(benchmark::State & state)475 void Parse_CachedDigits_NFA(benchmark::State& state) { Parse3Digits(state, Parse3CachedNFA); }
Parse_CachedDigits_OnePass(benchmark::State & state)476 void Parse_CachedDigits_OnePass(benchmark::State& state) { Parse3Digits(state, Parse3CachedOnePass); }
Parse_CachedDigits_PCRE(benchmark::State & state)477 void Parse_CachedDigits_PCRE(benchmark::State& state) { Parse3Digits(state, Parse3CachedPCRE); }
Parse_CachedDigits_RE2(benchmark::State & state)478 void Parse_CachedDigits_RE2(benchmark::State& state) { Parse3Digits(state, Parse3CachedRE2); }
Parse_CachedDigits_Backtrack(benchmark::State & state)479 void Parse_CachedDigits_Backtrack(benchmark::State& state) { Parse3Digits(state, Parse3CachedBacktrack); }
Parse_CachedDigits_BitState(benchmark::State & state)480 void Parse_CachedDigits_BitState(benchmark::State& state) { Parse3Digits(state, Parse3CachedBitState); }
481
482 BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
483 BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
484 #ifdef USEPCRE
485 BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
486 #endif
487 BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
488 BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
489 BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
490
Parse3DigitDs(benchmark::State & state,void (* parse3)(benchmark::State &,const char *,const StringPiece &))491 void Parse3DigitDs(benchmark::State& state,
492 void (*parse3)(benchmark::State&, const char*,
493 const StringPiece&)) {
494 parse3(state, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
495 state.SetItemsProcessed(state.iterations());
496 }
497
Parse_DigitDs_NFA(benchmark::State & state)498 void Parse_DigitDs_NFA(benchmark::State& state) { Parse3DigitDs(state, Parse3NFA); }
Parse_DigitDs_OnePass(benchmark::State & state)499 void Parse_DigitDs_OnePass(benchmark::State& state) { Parse3DigitDs(state, Parse3OnePass); }
Parse_DigitDs_PCRE(benchmark::State & state)500 void Parse_DigitDs_PCRE(benchmark::State& state) { Parse3DigitDs(state, Parse3PCRE); }
Parse_DigitDs_RE2(benchmark::State & state)501 void Parse_DigitDs_RE2(benchmark::State& state) { Parse3DigitDs(state, Parse3RE2); }
Parse_DigitDs_Backtrack(benchmark::State & state)502 void Parse_DigitDs_Backtrack(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBacktrack); }
Parse_DigitDs_BitState(benchmark::State & state)503 void Parse_DigitDs_BitState(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBitState); }
504
505 BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
506 BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
507 #ifdef USEPCRE
508 BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
509 #endif
510 BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
511 BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
512 BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
513
Parse_CachedDigitDs_NFA(benchmark::State & state)514 void Parse_CachedDigitDs_NFA(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedNFA); }
Parse_CachedDigitDs_OnePass(benchmark::State & state)515 void Parse_CachedDigitDs_OnePass(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedOnePass); }
Parse_CachedDigitDs_PCRE(benchmark::State & state)516 void Parse_CachedDigitDs_PCRE(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedPCRE); }
Parse_CachedDigitDs_RE2(benchmark::State & state)517 void Parse_CachedDigitDs_RE2(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedRE2); }
Parse_CachedDigitDs_Backtrack(benchmark::State & state)518 void Parse_CachedDigitDs_Backtrack(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBacktrack); }
Parse_CachedDigitDs_BitState(benchmark::State & state)519 void Parse_CachedDigitDs_BitState(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBitState); }
520
521 BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
522 BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
523 #ifdef USEPCRE
524 BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
525 #endif
526 BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
527 BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
528 BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
529
530 // Benchmark: splitting off leading number field.
531
Parse1Split(benchmark::State & state,void (* parse1)(benchmark::State &,const char *,const StringPiece &))532 void Parse1Split(benchmark::State& state,
533 void (*parse1)(benchmark::State&, const char*,
534 const StringPiece&)) {
535 parse1(state, "[0-9]+-(.*)", "650-253-0001");
536 state.SetItemsProcessed(state.iterations());
537 }
538
Parse_Split_NFA(benchmark::State & state)539 void Parse_Split_NFA(benchmark::State& state) { Parse1Split(state, Parse1NFA); }
Parse_Split_OnePass(benchmark::State & state)540 void Parse_Split_OnePass(benchmark::State& state) { Parse1Split(state, Parse1OnePass); }
Parse_Split_PCRE(benchmark::State & state)541 void Parse_Split_PCRE(benchmark::State& state) { Parse1Split(state, Parse1PCRE); }
Parse_Split_RE2(benchmark::State & state)542 void Parse_Split_RE2(benchmark::State& state) { Parse1Split(state, Parse1RE2); }
Parse_Split_BitState(benchmark::State & state)543 void Parse_Split_BitState(benchmark::State& state) { Parse1Split(state, Parse1BitState); }
544
545 BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
546 BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
547 #ifdef USEPCRE
548 BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
549 #endif
550 BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
551 BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
552
Parse_CachedSplit_NFA(benchmark::State & state)553 void Parse_CachedSplit_NFA(benchmark::State& state) { Parse1Split(state, Parse1CachedNFA); }
Parse_CachedSplit_OnePass(benchmark::State & state)554 void Parse_CachedSplit_OnePass(benchmark::State& state) { Parse1Split(state, Parse1CachedOnePass); }
Parse_CachedSplit_PCRE(benchmark::State & state)555 void Parse_CachedSplit_PCRE(benchmark::State& state) { Parse1Split(state, Parse1CachedPCRE); }
Parse_CachedSplit_RE2(benchmark::State & state)556 void Parse_CachedSplit_RE2(benchmark::State& state) { Parse1Split(state, Parse1CachedRE2); }
Parse_CachedSplit_BitState(benchmark::State & state)557 void Parse_CachedSplit_BitState(benchmark::State& state) { Parse1Split(state, Parse1CachedBitState); }
558
559 BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
560 BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
561 #ifdef USEPCRE
562 BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
563 #endif
564 BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
565 BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
566
567 // Benchmark: splitting off leading number field but harder (ambiguous regexp).
568
Parse1SplitHard(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))569 void Parse1SplitHard(benchmark::State& state,
570 void (*run)(benchmark::State&, const char*,
571 const StringPiece&)) {
572 run(state, "[0-9]+.(.*)", "650-253-0001");
573 state.SetItemsProcessed(state.iterations());
574 }
575
Parse_SplitHard_NFA(benchmark::State & state)576 void Parse_SplitHard_NFA(benchmark::State& state) { Parse1SplitHard(state, Parse1NFA); }
Parse_SplitHard_PCRE(benchmark::State & state)577 void Parse_SplitHard_PCRE(benchmark::State& state) { Parse1SplitHard(state, Parse1PCRE); }
Parse_SplitHard_RE2(benchmark::State & state)578 void Parse_SplitHard_RE2(benchmark::State& state) { Parse1SplitHard(state, Parse1RE2); }
Parse_SplitHard_BitState(benchmark::State & state)579 void Parse_SplitHard_BitState(benchmark::State& state) { Parse1SplitHard(state, Parse1BitState); }
580
581 #ifdef USEPCRE
582 BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
583 #endif
584 BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
585 BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
586 BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
587
Parse_CachedSplitHard_NFA(benchmark::State & state)588 void Parse_CachedSplitHard_NFA(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedNFA); }
Parse_CachedSplitHard_PCRE(benchmark::State & state)589 void Parse_CachedSplitHard_PCRE(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedPCRE); }
Parse_CachedSplitHard_RE2(benchmark::State & state)590 void Parse_CachedSplitHard_RE2(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedRE2); }
Parse_CachedSplitHard_BitState(benchmark::State & state)591 void Parse_CachedSplitHard_BitState(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBitState); }
Parse_CachedSplitHard_Backtrack(benchmark::State & state)592 void Parse_CachedSplitHard_Backtrack(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBacktrack); }
593
594 #ifdef USEPCRE
595 BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
596 #endif
597 BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
598 BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
599 BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
600 BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
601
602 // Benchmark: Parse1SplitHard, big text, small match.
603
Parse1SplitBig1(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))604 void Parse1SplitBig1(benchmark::State& state,
605 void (*run)(benchmark::State&, const char*,
606 const StringPiece&)) {
607 std::string s;
608 s.append(100000, 'x');
609 s.append("650-253-0001");
610 run(state, "[0-9]+.(.*)", s);
611 state.SetItemsProcessed(state.iterations());
612 }
613
Parse_CachedSplitBig1_PCRE(benchmark::State & state)614 void Parse_CachedSplitBig1_PCRE(benchmark::State& state) { Parse1SplitBig1(state, SearchParse1CachedPCRE); }
Parse_CachedSplitBig1_RE2(benchmark::State & state)615 void Parse_CachedSplitBig1_RE2(benchmark::State& state) { Parse1SplitBig1(state, SearchParse1CachedRE2); }
616
617 #ifdef USEPCRE
618 BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
619 #endif
620 BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
621
622 // Benchmark: Parse1SplitHard, big text, big match.
623
Parse1SplitBig2(benchmark::State & state,void (* run)(benchmark::State &,const char *,const StringPiece &))624 void Parse1SplitBig2(benchmark::State& state,
625 void (*run)(benchmark::State&, const char*,
626 const StringPiece&)) {
627 std::string s;
628 s.append("650-253-");
629 s.append(100000, '0');
630 run(state, "[0-9]+.(.*)", s);
631 state.SetItemsProcessed(state.iterations());
632 }
633
Parse_CachedSplitBig2_PCRE(benchmark::State & state)634 void Parse_CachedSplitBig2_PCRE(benchmark::State& state) { Parse1SplitBig2(state, SearchParse1CachedPCRE); }
Parse_CachedSplitBig2_RE2(benchmark::State & state)635 void Parse_CachedSplitBig2_RE2(benchmark::State& state) { Parse1SplitBig2(state, SearchParse1CachedRE2); }
636
637 #ifdef USEPCRE
638 BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
639 #endif
640 BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
641
642 // Benchmark: measure time required to parse (but not execute)
643 // a simple regular expression.
644
ParseRegexp(benchmark::State & state,const std::string & regexp)645 void ParseRegexp(benchmark::State& state, const std::string& regexp) {
646 for (auto _ : state) {
647 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
648 CHECK(re);
649 re->Decref();
650 }
651 }
652
SimplifyRegexp(benchmark::State & state,const std::string & regexp)653 void SimplifyRegexp(benchmark::State& state, const std::string& regexp) {
654 for (auto _ : state) {
655 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
656 CHECK(re);
657 Regexp* sre = re->Simplify();
658 CHECK(sre);
659 sre->Decref();
660 re->Decref();
661 }
662 }
663
NullWalkRegexp(benchmark::State & state,const std::string & regexp)664 void NullWalkRegexp(benchmark::State& state, const std::string& regexp) {
665 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
666 CHECK(re);
667 for (auto _ : state) {
668 re->NullWalk();
669 }
670 re->Decref();
671 }
672
SimplifyCompileRegexp(benchmark::State & state,const std::string & regexp)673 void SimplifyCompileRegexp(benchmark::State& state, const std::string& regexp) {
674 for (auto _ : state) {
675 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
676 CHECK(re);
677 Regexp* sre = re->Simplify();
678 CHECK(sre);
679 Prog* prog = sre->CompileToProg(0);
680 CHECK(prog);
681 delete prog;
682 sre->Decref();
683 re->Decref();
684 }
685 }
686
CompileRegexp(benchmark::State & state,const std::string & regexp)687 void CompileRegexp(benchmark::State& state, const std::string& regexp) {
688 for (auto _ : state) {
689 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
690 CHECK(re);
691 Prog* prog = re->CompileToProg(0);
692 CHECK(prog);
693 delete prog;
694 re->Decref();
695 }
696 }
697
CompileToProg(benchmark::State & state,const std::string & regexp)698 void CompileToProg(benchmark::State& state, const std::string& regexp) {
699 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
700 CHECK(re);
701 for (auto _ : state) {
702 Prog* prog = re->CompileToProg(0);
703 CHECK(prog);
704 delete prog;
705 }
706 re->Decref();
707 }
708
CompileByteMap(benchmark::State & state,const std::string & regexp)709 void CompileByteMap(benchmark::State& state, const std::string& regexp) {
710 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
711 CHECK(re);
712 Prog* prog = re->CompileToProg(0);
713 CHECK(prog);
714 for (auto _ : state) {
715 prog->ComputeByteMap();
716 }
717 delete prog;
718 re->Decref();
719 }
720
CompilePCRE(benchmark::State & state,const std::string & regexp)721 void CompilePCRE(benchmark::State& state, const std::string& regexp) {
722 for (auto _ : state) {
723 PCRE re(regexp, PCRE::UTF8);
724 CHECK_EQ(re.error(), "");
725 }
726 }
727
CompileRE2(benchmark::State & state,const std::string & regexp)728 void CompileRE2(benchmark::State& state, const std::string& regexp) {
729 for (auto _ : state) {
730 RE2 re(regexp);
731 CHECK_EQ(re.error(), "");
732 }
733 }
734
RunBuild(benchmark::State & state,const std::string & regexp,void (* run)(benchmark::State &,const std::string &))735 void RunBuild(benchmark::State& state, const std::string& regexp,
736 void (*run)(benchmark::State&, const std::string&)) {
737 run(state, regexp);
738 state.SetItemsProcessed(state.iterations());
739 }
740
741 } // namespace re2
742
743 DEFINE_FLAG(std::string, compile_regexp, "(.*)-(\\d+)-of-(\\d+)",
744 "regexp for compile benchmarks");
745
746 namespace re2 {
747
BM_PCRE_Compile(benchmark::State & state)748 void BM_PCRE_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompilePCRE); }
BM_Regexp_Parse(benchmark::State & state)749 void BM_Regexp_Parse(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), ParseRegexp); }
BM_Regexp_Simplify(benchmark::State & state)750 void BM_Regexp_Simplify(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyRegexp); }
BM_CompileToProg(benchmark::State & state)751 void BM_CompileToProg(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileToProg); }
BM_CompileByteMap(benchmark::State & state)752 void BM_CompileByteMap(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileByteMap); }
BM_Regexp_Compile(benchmark::State & state)753 void BM_Regexp_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRegexp); }
BM_Regexp_SimplifyCompile(benchmark::State & state)754 void BM_Regexp_SimplifyCompile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyCompileRegexp); }
BM_Regexp_NullWalk(benchmark::State & state)755 void BM_Regexp_NullWalk(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), NullWalkRegexp); }
BM_RE2_Compile(benchmark::State & state)756 void BM_RE2_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRE2); }
757
758 #ifdef USEPCRE
759 BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
760 #endif
761 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
762 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
763 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
764 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
765 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
766 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
767 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
768 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
769
770 // Makes text of size nbytes, then calls run to search
771 // the text for regexp iters times.
SearchPhone(benchmark::State & state,ParseImpl * search)772 void SearchPhone(benchmark::State& state, ParseImpl* search) {
773 std::string s;
774 MakeText(&s, state.range(0));
775 s.append("(650) 253-0001");
776 search(state, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
777 state.SetBytesProcessed(state.iterations() * state.range(0));
778 }
779
SearchPhone_CachedPCRE(benchmark::State & state)780 void SearchPhone_CachedPCRE(benchmark::State& state) {
781 SearchPhone(state, SearchParse2CachedPCRE);
782 }
783
SearchPhone_CachedRE2(benchmark::State & state)784 void SearchPhone_CachedRE2(benchmark::State& state) {
785 SearchPhone(state, SearchParse2CachedRE2);
786 }
787
788 #ifdef USEPCRE
789 BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
790 #endif
791 BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
792
793 /*
794 TODO(rsc): Make this work again.
795 void CacheFill(int iters, int n, SearchImpl *srch) {
796 std::string s = DeBruijnString(n+1);
797 std::string t;
798 for (int i = n+1; i < 20; i++) {
799 t = s + s;
800 using std::swap;
801 swap(s, t);
802 }
803 srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
804 Prog::kUnanchored, true);
805 SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*s.size());
806 }
807
808 void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
809 void CacheFillRE2(int i, int n) { CacheFill(i, n, SearchCachedRE2); }
810 void CacheFillNFA(int i, int n) { CacheFill(i, n, SearchCachedNFA); }
811 void CacheFillDFA(int i, int n) { CacheFill(i, n, SearchCachedDFA); }
812
813 // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
814 // for the static BenchmarkRegisterer, which makes it unusable inside
815 // a macro like DO24 below. MY_BENCHMARK_WITH_ARG uses the argument a
816 // to make the identifiers distinct (only possible when 'a' is a simple
817 // expression like 2, not like 1+1).
818 #define MY_BENCHMARK_WITH_ARG(n, a) \
819 bool __benchmark_ ## n ## a = \
820 (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
821
822 #define DO24(A, B) \
823 A(B, 1); A(B, 2); A(B, 3); A(B, 4); A(B, 5); A(B, 6); \
824 A(B, 7); A(B, 8); A(B, 9); A(B, 10); A(B, 11); A(B, 12); \
825 A(B, 13); A(B, 14); A(B, 15); A(B, 16); A(B, 17); A(B, 18); \
826 A(B, 19); A(B, 20); A(B, 21); A(B, 22); A(B, 23); A(B, 24);
827
828 DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
829 DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
830 DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
831 DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
832
833 #undef DO24
834 #undef MY_BENCHMARK_WITH_ARG
835 */
836
837 ////////////////////////////////////////////////////////////////////////
838 //
839 // Implementation routines. Sad that there are so many,
840 // but all the interfaces are slightly different.
841
842 // Runs implementation to search for regexp in text, iters times.
843 // Expect_match says whether the regexp should be found.
844 // Anchored says whether to run an anchored search.
845
SearchDFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)846 void SearchDFA(benchmark::State& state, const char* regexp,
847 const StringPiece& text, Prog::Anchor anchor,
848 bool expect_match) {
849 for (auto _ : state) {
850 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
851 CHECK(re);
852 Prog* prog = re->CompileToProg(0);
853 CHECK(prog);
854 bool failed = false;
855 CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
856 NULL, &failed, NULL),
857 expect_match);
858 CHECK(!failed);
859 delete prog;
860 re->Decref();
861 }
862 }
863
SearchNFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)864 void SearchNFA(benchmark::State& state, const char* regexp,
865 const StringPiece& text, Prog::Anchor anchor,
866 bool expect_match) {
867 for (auto _ : state) {
868 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
869 CHECK(re);
870 Prog* prog = re->CompileToProg(0);
871 CHECK(prog);
872 CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
873 NULL, 0),
874 expect_match);
875 delete prog;
876 re->Decref();
877 }
878 }
879
SearchOnePass(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)880 void SearchOnePass(benchmark::State& state, const char* regexp,
881 const StringPiece& text, Prog::Anchor anchor,
882 bool expect_match) {
883 for (auto _ : state) {
884 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
885 CHECK(re);
886 Prog* prog = re->CompileToProg(0);
887 CHECK(prog);
888 CHECK(prog->IsOnePass());
889 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
890 expect_match);
891 delete prog;
892 re->Decref();
893 }
894 }
895
SearchBitState(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)896 void SearchBitState(benchmark::State& state, const char* regexp,
897 const StringPiece& text, Prog::Anchor anchor,
898 bool expect_match) {
899 for (auto _ : state) {
900 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
901 CHECK(re);
902 Prog* prog = re->CompileToProg(0);
903 CHECK(prog);
904 CHECK(prog->CanBitState());
905 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
906 expect_match);
907 delete prog;
908 re->Decref();
909 }
910 }
911
SearchPCRE(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)912 void SearchPCRE(benchmark::State& state, const char* regexp,
913 const StringPiece& text, Prog::Anchor anchor,
914 bool expect_match) {
915 for (auto _ : state) {
916 PCRE re(regexp, PCRE::UTF8);
917 CHECK_EQ(re.error(), "");
918 if (anchor == Prog::kAnchored)
919 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
920 else
921 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
922 }
923 }
924
SearchRE2(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)925 void SearchRE2(benchmark::State& state, const char* regexp,
926 const StringPiece& text, Prog::Anchor anchor,
927 bool expect_match) {
928 for (auto _ : state) {
929 RE2 re(regexp);
930 CHECK_EQ(re.error(), "");
931 if (anchor == Prog::kAnchored)
932 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
933 else
934 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
935 }
936 }
937
938 // SearchCachedXXX is like SearchXXX but only does the
939 // regexp parsing and compiling once. This lets us measure
940 // search time without the per-regexp overhead.
941
SearchCachedDFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)942 void SearchCachedDFA(benchmark::State& state, const char* regexp,
943 const StringPiece& text, Prog::Anchor anchor,
944 bool expect_match) {
945 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
946 CHECK(re);
947 Prog* prog = re->CompileToProg(int64_t{1}<<31);
948 CHECK(prog);
949 for (auto _ : state) {
950 bool failed = false;
951 CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
952 NULL, &failed, NULL),
953 expect_match);
954 CHECK(!failed);
955 }
956 delete prog;
957 re->Decref();
958 }
959
SearchCachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)960 void SearchCachedNFA(benchmark::State& state, const char* regexp,
961 const StringPiece& text, Prog::Anchor anchor,
962 bool expect_match) {
963 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
964 CHECK(re);
965 Prog* prog = re->CompileToProg(0);
966 CHECK(prog);
967 for (auto _ : state) {
968 CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
969 NULL, 0),
970 expect_match);
971 }
972 delete prog;
973 re->Decref();
974 }
975
SearchCachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)976 void SearchCachedOnePass(benchmark::State& state, const char* regexp,
977 const StringPiece& text, Prog::Anchor anchor,
978 bool expect_match) {
979 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
980 CHECK(re);
981 Prog* prog = re->CompileToProg(0);
982 CHECK(prog);
983 CHECK(prog->IsOnePass());
984 for (auto _ : state) {
985 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
986 expect_match);
987 }
988 delete prog;
989 re->Decref();
990 }
991
SearchCachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)992 void SearchCachedBitState(benchmark::State& state, const char* regexp,
993 const StringPiece& text, Prog::Anchor anchor,
994 bool expect_match) {
995 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
996 CHECK(re);
997 Prog* prog = re->CompileToProg(0);
998 CHECK(prog);
999 CHECK(prog->CanBitState());
1000 for (auto _ : state) {
1001 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1002 expect_match);
1003 }
1004 delete prog;
1005 re->Decref();
1006 }
1007
SearchCachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1008 void SearchCachedPCRE(benchmark::State& state, const char* regexp,
1009 const StringPiece& text, Prog::Anchor anchor,
1010 bool expect_match) {
1011 PCRE re(regexp, PCRE::UTF8);
1012 CHECK_EQ(re.error(), "");
1013 for (auto _ : state) {
1014 if (anchor == Prog::kAnchored)
1015 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
1016 else
1017 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
1018 }
1019 }
1020
SearchCachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1021 void SearchCachedRE2(benchmark::State& state, const char* regexp,
1022 const StringPiece& text, Prog::Anchor anchor,
1023 bool expect_match) {
1024 RE2 re(regexp);
1025 CHECK_EQ(re.error(), "");
1026 for (auto _ : state) {
1027 if (anchor == Prog::kAnchored)
1028 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
1029 else
1030 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
1031 }
1032 }
1033
1034 // Runs implementation to full match regexp against text,
1035 // extracting three submatches. Expects match always.
1036
Parse3NFA(benchmark::State & state,const char * regexp,const StringPiece & text)1037 void Parse3NFA(benchmark::State& state, const char* regexp,
1038 const StringPiece& text) {
1039 for (auto _ : state) {
1040 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1041 CHECK(re);
1042 Prog* prog = re->CompileToProg(0);
1043 CHECK(prog);
1044 StringPiece sp[4]; // 4 because sp[0] is whole match.
1045 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1046 Prog::kFullMatch, sp, 4));
1047 delete prog;
1048 re->Decref();
1049 }
1050 }
1051
Parse3OnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1052 void Parse3OnePass(benchmark::State& state, const char* regexp,
1053 const StringPiece& text) {
1054 for (auto _ : state) {
1055 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1056 CHECK(re);
1057 Prog* prog = re->CompileToProg(0);
1058 CHECK(prog);
1059 CHECK(prog->IsOnePass());
1060 StringPiece sp[4]; // 4 because sp[0] is whole match.
1061 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1062 delete prog;
1063 re->Decref();
1064 }
1065 }
1066
Parse3BitState(benchmark::State & state,const char * regexp,const StringPiece & text)1067 void Parse3BitState(benchmark::State& state, const char* regexp,
1068 const StringPiece& text) {
1069 for (auto _ : state) {
1070 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1071 CHECK(re);
1072 Prog* prog = re->CompileToProg(0);
1073 CHECK(prog);
1074 CHECK(prog->CanBitState());
1075 StringPiece sp[4]; // 4 because sp[0] is whole match.
1076 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1077 delete prog;
1078 re->Decref();
1079 }
1080 }
1081
Parse3Backtrack(benchmark::State & state,const char * regexp,const StringPiece & text)1082 void Parse3Backtrack(benchmark::State& state, const char* regexp,
1083 const StringPiece& text) {
1084 for (auto _ : state) {
1085 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1086 CHECK(re);
1087 Prog* prog = re->CompileToProg(0);
1088 CHECK(prog);
1089 StringPiece sp[4]; // 4 because sp[0] is whole match.
1090 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1091 delete prog;
1092 re->Decref();
1093 }
1094 }
1095
Parse3PCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1096 void Parse3PCRE(benchmark::State& state, const char* regexp,
1097 const StringPiece& text) {
1098 for (auto _ : state) {
1099 PCRE re(regexp, PCRE::UTF8);
1100 CHECK_EQ(re.error(), "");
1101 StringPiece sp1, sp2, sp3;
1102 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1103 }
1104 }
1105
Parse3RE2(benchmark::State & state,const char * regexp,const StringPiece & text)1106 void Parse3RE2(benchmark::State& state, const char* regexp,
1107 const StringPiece& text) {
1108 for (auto _ : state) {
1109 RE2 re(regexp);
1110 CHECK_EQ(re.error(), "");
1111 StringPiece sp1, sp2, sp3;
1112 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1113 }
1114 }
1115
Parse3CachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text)1116 void Parse3CachedNFA(benchmark::State& state, const char* regexp,
1117 const StringPiece& text) {
1118 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1119 CHECK(re);
1120 Prog* prog = re->CompileToProg(0);
1121 CHECK(prog);
1122 StringPiece sp[4]; // 4 because sp[0] is whole match.
1123 for (auto _ : state) {
1124 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1125 Prog::kFullMatch, sp, 4));
1126 }
1127 delete prog;
1128 re->Decref();
1129 }
1130
Parse3CachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1131 void Parse3CachedOnePass(benchmark::State& state, const char* regexp,
1132 const StringPiece& text) {
1133 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1134 CHECK(re);
1135 Prog* prog = re->CompileToProg(0);
1136 CHECK(prog);
1137 CHECK(prog->IsOnePass());
1138 StringPiece sp[4]; // 4 because sp[0] is whole match.
1139 for (auto _ : state) {
1140 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1141 }
1142 delete prog;
1143 re->Decref();
1144 }
1145
Parse3CachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text)1146 void Parse3CachedBitState(benchmark::State& state, const char* regexp,
1147 const StringPiece& text) {
1148 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1149 CHECK(re);
1150 Prog* prog = re->CompileToProg(0);
1151 CHECK(prog);
1152 CHECK(prog->CanBitState());
1153 StringPiece sp[4]; // 4 because sp[0] is whole match.
1154 for (auto _ : state) {
1155 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1156 }
1157 delete prog;
1158 re->Decref();
1159 }
1160
Parse3CachedBacktrack(benchmark::State & state,const char * regexp,const StringPiece & text)1161 void Parse3CachedBacktrack(benchmark::State& state, const char* regexp,
1162 const StringPiece& text) {
1163 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1164 CHECK(re);
1165 Prog* prog = re->CompileToProg(0);
1166 CHECK(prog);
1167 StringPiece sp[4]; // 4 because sp[0] is whole match.
1168 for (auto _ : state) {
1169 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1170 }
1171 delete prog;
1172 re->Decref();
1173 }
1174
Parse3CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1175 void Parse3CachedPCRE(benchmark::State& state, const char* regexp,
1176 const StringPiece& text) {
1177 PCRE re(regexp, PCRE::UTF8);
1178 CHECK_EQ(re.error(), "");
1179 StringPiece sp1, sp2, sp3;
1180 for (auto _ : state) {
1181 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1182 }
1183 }
1184
Parse3CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1185 void Parse3CachedRE2(benchmark::State& state, const char* regexp,
1186 const StringPiece& text) {
1187 RE2 re(regexp);
1188 CHECK_EQ(re.error(), "");
1189 StringPiece sp1, sp2, sp3;
1190 for (auto _ : state) {
1191 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1192 }
1193 }
1194
1195 // Runs implementation to full match regexp against text,
1196 // extracting three submatches. Expects match always.
1197
Parse1NFA(benchmark::State & state,const char * regexp,const StringPiece & text)1198 void Parse1NFA(benchmark::State& state, const char* regexp,
1199 const StringPiece& text) {
1200 for (auto _ : state) {
1201 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1202 CHECK(re);
1203 Prog* prog = re->CompileToProg(0);
1204 CHECK(prog);
1205 StringPiece sp[2]; // 2 because sp[0] is whole match.
1206 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1207 Prog::kFullMatch, sp, 2));
1208 delete prog;
1209 re->Decref();
1210 }
1211 }
1212
Parse1OnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1213 void Parse1OnePass(benchmark::State& state, const char* regexp,
1214 const StringPiece& text) {
1215 for (auto _ : state) {
1216 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1217 CHECK(re);
1218 Prog* prog = re->CompileToProg(0);
1219 CHECK(prog);
1220 CHECK(prog->IsOnePass());
1221 StringPiece sp[2]; // 2 because sp[0] is whole match.
1222 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1223 delete prog;
1224 re->Decref();
1225 }
1226 }
1227
Parse1BitState(benchmark::State & state,const char * regexp,const StringPiece & text)1228 void Parse1BitState(benchmark::State& state, const char* regexp,
1229 const StringPiece& text) {
1230 for (auto _ : state) {
1231 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1232 CHECK(re);
1233 Prog* prog = re->CompileToProg(0);
1234 CHECK(prog);
1235 CHECK(prog->CanBitState());
1236 StringPiece sp[2]; // 2 because sp[0] is whole match.
1237 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1238 delete prog;
1239 re->Decref();
1240 }
1241 }
1242
Parse1PCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1243 void Parse1PCRE(benchmark::State& state, const char* regexp,
1244 const StringPiece& text) {
1245 for (auto _ : state) {
1246 PCRE re(regexp, PCRE::UTF8);
1247 CHECK_EQ(re.error(), "");
1248 StringPiece sp1;
1249 CHECK(PCRE::FullMatch(text, re, &sp1));
1250 }
1251 }
1252
Parse1RE2(benchmark::State & state,const char * regexp,const StringPiece & text)1253 void Parse1RE2(benchmark::State& state, const char* regexp,
1254 const StringPiece& text) {
1255 for (auto _ : state) {
1256 RE2 re(regexp);
1257 CHECK_EQ(re.error(), "");
1258 StringPiece sp1;
1259 CHECK(RE2::FullMatch(text, re, &sp1));
1260 }
1261 }
1262
Parse1CachedNFA(benchmark::State & state,const char * regexp,const StringPiece & text)1263 void Parse1CachedNFA(benchmark::State& state, const char* regexp,
1264 const StringPiece& text) {
1265 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1266 CHECK(re);
1267 Prog* prog = re->CompileToProg(0);
1268 CHECK(prog);
1269 StringPiece sp[2]; // 2 because sp[0] is whole match.
1270 for (auto _ : state) {
1271 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1272 Prog::kFullMatch, sp, 2));
1273 }
1274 delete prog;
1275 re->Decref();
1276 }
1277
Parse1CachedOnePass(benchmark::State & state,const char * regexp,const StringPiece & text)1278 void Parse1CachedOnePass(benchmark::State& state, const char* regexp,
1279 const StringPiece& text) {
1280 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1281 CHECK(re);
1282 Prog* prog = re->CompileToProg(0);
1283 CHECK(prog);
1284 CHECK(prog->IsOnePass());
1285 StringPiece sp[2]; // 2 because sp[0] is whole match.
1286 for (auto _ : state) {
1287 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1288 }
1289 delete prog;
1290 re->Decref();
1291 }
1292
Parse1CachedBitState(benchmark::State & state,const char * regexp,const StringPiece & text)1293 void Parse1CachedBitState(benchmark::State& state, const char* regexp,
1294 const StringPiece& text) {
1295 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1296 CHECK(re);
1297 Prog* prog = re->CompileToProg(0);
1298 CHECK(prog);
1299 CHECK(prog->CanBitState());
1300 StringPiece sp[2]; // 2 because sp[0] is whole match.
1301 for (auto _ : state) {
1302 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1303 }
1304 delete prog;
1305 re->Decref();
1306 }
1307
Parse1CachedBacktrack(benchmark::State & state,const char * regexp,const StringPiece & text)1308 void Parse1CachedBacktrack(benchmark::State& state, const char* regexp,
1309 const StringPiece& text) {
1310 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1311 CHECK(re);
1312 Prog* prog = re->CompileToProg(0);
1313 CHECK(prog);
1314 StringPiece sp[2]; // 2 because sp[0] is whole match.
1315 for (auto _ : state) {
1316 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1317 }
1318 delete prog;
1319 re->Decref();
1320 }
1321
Parse1CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1322 void Parse1CachedPCRE(benchmark::State& state, const char* regexp,
1323 const StringPiece& text) {
1324 PCRE re(regexp, PCRE::UTF8);
1325 CHECK_EQ(re.error(), "");
1326 StringPiece sp1;
1327 for (auto _ : state) {
1328 CHECK(PCRE::FullMatch(text, re, &sp1));
1329 }
1330 }
1331
Parse1CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1332 void Parse1CachedRE2(benchmark::State& state, const char* regexp,
1333 const StringPiece& text) {
1334 RE2 re(regexp);
1335 CHECK_EQ(re.error(), "");
1336 StringPiece sp1;
1337 for (auto _ : state) {
1338 CHECK(RE2::FullMatch(text, re, &sp1));
1339 }
1340 }
1341
SearchParse2CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1342 void SearchParse2CachedPCRE(benchmark::State& state, const char* regexp,
1343 const StringPiece& text) {
1344 PCRE re(regexp, PCRE::UTF8);
1345 CHECK_EQ(re.error(), "");
1346 for (auto _ : state) {
1347 StringPiece sp1, sp2;
1348 CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
1349 }
1350 }
1351
SearchParse2CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1352 void SearchParse2CachedRE2(benchmark::State& state, const char* regexp,
1353 const StringPiece& text) {
1354 RE2 re(regexp);
1355 CHECK_EQ(re.error(), "");
1356 for (auto _ : state) {
1357 StringPiece sp1, sp2;
1358 CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
1359 }
1360 }
1361
SearchParse1CachedPCRE(benchmark::State & state,const char * regexp,const StringPiece & text)1362 void SearchParse1CachedPCRE(benchmark::State& state, const char* regexp,
1363 const StringPiece& text) {
1364 PCRE re(regexp, PCRE::UTF8);
1365 CHECK_EQ(re.error(), "");
1366 for (auto _ : state) {
1367 StringPiece sp1;
1368 CHECK(PCRE::PartialMatch(text, re, &sp1));
1369 }
1370 }
1371
SearchParse1CachedRE2(benchmark::State & state,const char * regexp,const StringPiece & text)1372 void SearchParse1CachedRE2(benchmark::State& state, const char* regexp,
1373 const StringPiece& text) {
1374 RE2 re(regexp);
1375 CHECK_EQ(re.error(), "");
1376 for (auto _ : state) {
1377 StringPiece sp1;
1378 CHECK(RE2::PartialMatch(text, re, &sp1));
1379 }
1380 }
1381
EmptyPartialMatchPCRE(benchmark::State & state)1382 void EmptyPartialMatchPCRE(benchmark::State& state) {
1383 PCRE re("");
1384 for (auto _ : state) {
1385 PCRE::PartialMatch("", re);
1386 }
1387 }
1388
EmptyPartialMatchRE2(benchmark::State & state)1389 void EmptyPartialMatchRE2(benchmark::State& state) {
1390 RE2 re("");
1391 for (auto _ : state) {
1392 RE2::PartialMatch("", re);
1393 }
1394 }
1395 #ifdef USEPCRE
1396 BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1397 #endif
1398 BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
1399
SimplePartialMatchPCRE(benchmark::State & state)1400 void SimplePartialMatchPCRE(benchmark::State& state) {
1401 PCRE re("abcdefg");
1402 for (auto _ : state) {
1403 PCRE::PartialMatch("abcdefg", re);
1404 }
1405 }
1406
SimplePartialMatchRE2(benchmark::State & state)1407 void SimplePartialMatchRE2(benchmark::State& state) {
1408 RE2 re("abcdefg");
1409 for (auto _ : state) {
1410 RE2::PartialMatch("abcdefg", re);
1411 }
1412 }
1413 #ifdef USEPCRE
1414 BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
1415 #endif
1416 BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
1417
1418 static std::string http_text =
1419 "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
1420 "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
1421
HTTPPartialMatchPCRE(benchmark::State & state)1422 void HTTPPartialMatchPCRE(benchmark::State& state) {
1423 StringPiece a;
1424 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1425 for (auto _ : state) {
1426 PCRE::PartialMatch(http_text, re, &a);
1427 }
1428 }
1429
HTTPPartialMatchRE2(benchmark::State & state)1430 void HTTPPartialMatchRE2(benchmark::State& state) {
1431 StringPiece a;
1432 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1433 for (auto _ : state) {
1434 RE2::PartialMatch(http_text, re, &a);
1435 }
1436 }
1437
1438 #ifdef USEPCRE
1439 BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1440 #endif
1441 BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1442
1443 static std::string smallhttp_text =
1444 "GET /abc HTTP/1.1";
1445
SmallHTTPPartialMatchPCRE(benchmark::State & state)1446 void SmallHTTPPartialMatchPCRE(benchmark::State& state) {
1447 StringPiece a;
1448 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1449 for (auto _ : state) {
1450 PCRE::PartialMatch(smallhttp_text, re, &a);
1451 }
1452 }
1453
SmallHTTPPartialMatchRE2(benchmark::State & state)1454 void SmallHTTPPartialMatchRE2(benchmark::State& state) {
1455 StringPiece a;
1456 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1457 for (auto _ : state) {
1458 RE2::PartialMatch(smallhttp_text, re, &a);
1459 }
1460 }
1461
1462 #ifdef USEPCRE
1463 BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1464 #endif
1465 BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1466
DotMatchPCRE(benchmark::State & state)1467 void DotMatchPCRE(benchmark::State& state) {
1468 StringPiece a;
1469 PCRE re("(?-s)^(.+)");
1470 for (auto _ : state) {
1471 PCRE::PartialMatch(http_text, re, &a);
1472 }
1473 }
1474
DotMatchRE2(benchmark::State & state)1475 void DotMatchRE2(benchmark::State& state) {
1476 StringPiece a;
1477 RE2 re("(?-s)^(.+)");
1478 for (auto _ : state) {
1479 RE2::PartialMatch(http_text, re, &a);
1480 }
1481 }
1482
1483 #ifdef USEPCRE
1484 BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
1485 #endif
1486 BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
1487
ASCIIMatchPCRE(benchmark::State & state)1488 void ASCIIMatchPCRE(benchmark::State& state) {
1489 StringPiece a;
1490 PCRE re("(?-s)^([ -~]+)");
1491 for (auto _ : state) {
1492 PCRE::PartialMatch(http_text, re, &a);
1493 }
1494 }
1495
ASCIIMatchRE2(benchmark::State & state)1496 void ASCIIMatchRE2(benchmark::State& state) {
1497 StringPiece a;
1498 RE2 re("(?-s)^([ -~]+)");
1499 for (auto _ : state) {
1500 RE2::PartialMatch(http_text, re, &a);
1501 }
1502 }
1503
1504 #ifdef USEPCRE
1505 BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
1506 #endif
1507 BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
1508
FullMatchPCRE(benchmark::State & state,const char * regexp)1509 void FullMatchPCRE(benchmark::State& state, const char *regexp) {
1510 std::string s;
1511 MakeText(&s, state.range(0));
1512 s += "ABCDEFGHIJ";
1513 PCRE re(regexp);
1514 for (auto _ : state) {
1515 CHECK(PCRE::FullMatch(s, re));
1516 }
1517 state.SetBytesProcessed(state.iterations() * state.range(0));
1518 }
1519
FullMatchRE2(benchmark::State & state,const char * regexp)1520 void FullMatchRE2(benchmark::State& state, const char *regexp) {
1521 std::string s;
1522 MakeText(&s, state.range(0));
1523 s += "ABCDEFGHIJ";
1524 RE2 re(regexp, RE2::Latin1);
1525 for (auto _ : state) {
1526 CHECK(RE2::FullMatch(s, re));
1527 }
1528 state.SetBytesProcessed(state.iterations() * state.range(0));
1529 }
1530
FullMatch_DotStar_CachedPCRE(benchmark::State & state)1531 void FullMatch_DotStar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*"); }
FullMatch_DotStar_CachedRE2(benchmark::State & state)1532 void FullMatch_DotStar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*"); }
1533
FullMatch_DotStarDollar_CachedPCRE(benchmark::State & state)1534 void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*$"); }
FullMatch_DotStarDollar_CachedRE2(benchmark::State & state)1535 void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*$"); }
1536
FullMatch_DotStarCapture_CachedPCRE(benchmark::State & state)1537 void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s)((.*)()()($))"); }
FullMatch_DotStarCapture_CachedRE2(benchmark::State & state)1538 void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s)((.*)()()($))"); }
1539
1540 #ifdef USEPCRE
1541 BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
1542 #endif
1543 BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20);
1544
1545 #ifdef USEPCRE
1546 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
1547 #endif
1548 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20);
1549
1550 #ifdef USEPCRE
1551 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
1552 #endif
1553 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20);
1554
PossibleMatchRangeCommon(benchmark::State & state,const char * regexp)1555 void PossibleMatchRangeCommon(benchmark::State& state, const char* regexp) {
1556 RE2 re(regexp);
1557 std::string min;
1558 std::string max;
1559 const int kMaxLen = 16;
1560 for (auto _ : state) {
1561 CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
1562 }
1563 }
1564
PossibleMatchRange_Trivial(benchmark::State & state)1565 void PossibleMatchRange_Trivial(benchmark::State& state) {
1566 PossibleMatchRangeCommon(state, ".*");
1567 }
PossibleMatchRange_Complex(benchmark::State & state)1568 void PossibleMatchRange_Complex(benchmark::State& state) {
1569 PossibleMatchRangeCommon(state, "^abc[def]?[gh]{1,2}.*");
1570 }
PossibleMatchRange_Prefix(benchmark::State & state)1571 void PossibleMatchRange_Prefix(benchmark::State& state) {
1572 PossibleMatchRangeCommon(state, "^some_random_prefix.*");
1573 }
PossibleMatchRange_NoProg(benchmark::State & state)1574 void PossibleMatchRange_NoProg(benchmark::State& state) {
1575 PossibleMatchRangeCommon(state, "^some_random_string$");
1576 }
1577
1578 BENCHMARK(PossibleMatchRange_Trivial);
1579 BENCHMARK(PossibleMatchRange_Complex);
1580 BENCHMARK(PossibleMatchRange_Prefix);
1581 BENCHMARK(PossibleMatchRange_NoProg);
1582
1583 } // namespace re2
1584