1 #![allow(non_snake_case)]
2
3 use std::iter::repeat;
4
5 use test::Bencher;
6
7 #[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))]
8 use crate::RegexSet;
9 use crate::{Regex, Text};
10
11 #[cfg(not(feature = "re-onig"))]
12 #[cfg(not(feature = "re-pcre1"))]
13 #[cfg(not(feature = "re-pcre2"))]
14 bench_match!(
15 no_exponential,
16 {
17 format!(
18 "{}{}",
19 repeat("a?").take(100).collect::<String>(),
20 repeat("a").take(100).collect::<String>()
21 )
22 },
23 repeat("a").take(100).collect()
24 );
25
26 bench_match!(literal, r"y", {
27 format!("{}y", repeat("x").take(50).collect::<String>())
28 });
29
30 bench_match!(not_literal, r".y", {
31 format!("{}y", repeat("x").take(50).collect::<String>())
32 });
33
34 bench_match!(match_class, "[abcdw]", {
35 format!("{}w", repeat("xxxx").take(20).collect::<String>())
36 });
37
38 bench_match!(match_class_in_range, "[ac]", {
39 format!("{}c", repeat("bbbb").take(20).collect::<String>())
40 });
41
42 #[cfg(not(feature = "re-rust-bytes"))]
43 #[cfg(not(feature = "re-tcl"))]
44 bench_match!(match_class_unicode, r"\p{L}", {
45 format!("{}a", repeat("☃5☃5").take(20).collect::<String>())
46 });
47
48 bench_not_match!(anchored_literal_short_non_match, r"^zbc(d|e)", {
49 "abcdefghijklmnopqrstuvwxyz".to_owned()
50 });
51
52 bench_not_match!(anchored_literal_long_non_match, r"^zbc(d|e)", {
53 repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>()
54 });
55
56 bench_match!(anchored_literal_short_match, r"^.bc(d|e)", {
57 "abcdefghijklmnopqrstuvwxyz".to_owned()
58 });
59
60 bench_match!(anchored_literal_long_match, r"^.bc(d|e)", {
61 repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>()
62 });
63
64 bench_match!(one_pass_short, r"^.bc(d|e)*$", {
65 "abcddddddeeeededd".to_owned()
66 });
67
68 bench_match!(one_pass_short_not, r".bc(d|e)*$", {
69 "abcddddddeeeededd".to_owned()
70 });
71
72 bench_match!(one_pass_long_prefix, r"^abcdefghijklmnopqrstuvwxyz.*$", {
73 "abcdefghijklmnopqrstuvwxyz".to_owned()
74 });
75
76 bench_match!(one_pass_long_prefix_not, r"^.bcdefghijklmnopqrstuvwxyz.*$", {
77 "abcdefghijklmnopqrstuvwxyz".to_owned()
78 });
79
80 bench_match!(long_needle1, r"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", {
81 repeat("a").take(100_000).collect::<String>() + "b"
82 });
83
84 bench_match!(long_needle2, r"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbba", {
85 repeat("b").take(100_000).collect::<String>() + "a"
86 });
87
88 // This benchmark specifically targets the "reverse suffix literal"
89 // optimization. In particular, it is easy for a naive implementation to
90 // take quadratic worst case time. This benchmark provides a case for such
91 // a scenario.
92 bench_not_match!(reverse_suffix_no_quadratic, r"[r-z].*bcdefghijklmnopq", {
93 repeat("bcdefghijklmnopq").take(500).collect::<String>()
94 });
95
96 #[cfg(feature = "re-rust")]
97 #[bench]
replace_all(b: &mut Bencher)98 fn replace_all(b: &mut Bencher) {
99 let re = regex!("[cjrw]");
100 let text = "abcdefghijklmnopqrstuvwxyz";
101 b.iter(|| re.replace_all(text, ""));
102 }
103
104 const TXT_32: &'static str = include_str!("data/32.txt");
105 const TXT_1K: &'static str = include_str!("data/1K.txt");
106 const TXT_32K: &'static str = include_str!("data/32K.txt");
107 const TXT_1MB: &'static str = include_str!("data/1MB.txt");
108
get_text(corpus: &str, suffix: String) -> String109 fn get_text(corpus: &str, suffix: String) -> String {
110 let mut corpus = corpus.to_string();
111 corpus.push_str(&suffix);
112 corpus
113 }
114
easy0_suffix() -> String115 fn easy0_suffix() -> String {
116 "ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string()
117 }
118
119 macro_rules! easy0 {
120 () => {
121 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
122 };
123 }
124
125 bench_match!(easy0_32, easy0!(), get_text(TXT_32, easy0_suffix()));
126 bench_match!(easy0_1K, easy0!(), get_text(TXT_1K, easy0_suffix()));
127 bench_match!(easy0_32K, easy0!(), get_text(TXT_32K, easy0_suffix()));
128 bench_match!(easy0_1MB, easy0!(), get_text(TXT_1MB, easy0_suffix()));
129
easy1_suffix() -> String130 fn easy1_suffix() -> String {
131 "AABCCCDEEEFGGHHHIJJ".to_string()
132 }
133
134 macro_rules! easy1 {
135 () => {
136 r"A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
137 };
138 }
139
140 bench_match!(easy1_32, easy1!(), get_text(TXT_32, easy1_suffix()));
141 bench_match!(easy1_1K, easy1!(), get_text(TXT_1K, easy1_suffix()));
142 bench_match!(easy1_32K, easy1!(), get_text(TXT_32K, easy1_suffix()));
143 bench_match!(easy1_1MB, easy1!(), get_text(TXT_1MB, easy1_suffix()));
144
medium_suffix() -> String145 fn medium_suffix() -> String {
146 "XABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string()
147 }
148
149 macro_rules! medium {
150 () => {
151 r"[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
152 };
153 }
154
155 bench_match!(medium_32, medium!(), get_text(TXT_32, medium_suffix()));
156 bench_match!(medium_1K, medium!(), get_text(TXT_1K, medium_suffix()));
157 bench_match!(medium_32K, medium!(), get_text(TXT_32K, medium_suffix()));
158 bench_match!(medium_1MB, medium!(), get_text(TXT_1MB, medium_suffix()));
159
hard_suffix() -> String160 fn hard_suffix() -> String {
161 "ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string()
162 }
163
164 macro_rules! hard {
165 () => {
166 r"[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
167 };
168 }
169
170 bench_match!(hard_32, hard!(), get_text(TXT_32, hard_suffix()));
171 bench_match!(hard_1K, hard!(), get_text(TXT_1K, hard_suffix()));
172 bench_match!(hard_32K, hard!(), get_text(TXT_32K, hard_suffix()));
173 bench_match!(hard_1MB, hard!(), get_text(TXT_1MB, hard_suffix()));
174
reallyhard_suffix() -> String175 fn reallyhard_suffix() -> String {
176 "ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string()
177 }
178
179 macro_rules! reallyhard {
180 () => {
181 // The point of this being "really" hard is that it should completely
182 // thwart any prefix or suffix literal optimizations.
183 r"[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ.*"
184 };
185 }
186
187 bench_match!(
188 reallyhard_32,
189 reallyhard!(),
190 get_text(TXT_32, reallyhard_suffix())
191 );
192 bench_match!(
193 reallyhard_1K,
194 reallyhard!(),
195 get_text(TXT_1K, reallyhard_suffix())
196 );
197 bench_match!(
198 reallyhard_32K,
199 reallyhard!(),
200 get_text(TXT_32K, reallyhard_suffix())
201 );
202 bench_match!(
203 reallyhard_1MB,
204 reallyhard!(),
205 get_text(TXT_1MB, reallyhard_suffix())
206 );
207
reallyhard2_suffix() -> String208 fn reallyhard2_suffix() -> String {
209 "Sherlock Holmes".to_string()
210 }
211
212 macro_rules! reallyhard2 {
213 () => {
214 r"\w+\s+Holmes"
215 };
216 }
217
218 bench_match!(
219 reallyhard2_1K,
220 reallyhard2!(),
221 get_text(TXT_1K, reallyhard2_suffix())
222 );
223
224 //
225 // Benchmarks to justify the short-haystack NFA fallthrough optimization
226 // implemented by `read_captures_at` in regex/src/exec.rs. See github issue
227 // #348.
228 //
229 // The procedure used to try to determine the right hardcoded cutoff
230 // for the short-haystack optimization in issue #348 is as follows.
231 //
232 // ```
233 // > cd bench
234 // > cargo bench --features re-rust short_hay | tee dfa-nfa.res
235 // > # modify the `MatchType::Dfa` branch in exec.rs:read_captures_at
236 // > # to just execute the nfa
237 // > cargo bench --features re-rust short_hay | tee nfa-only.res
238 // > cargo benchcmp dfa-nfa.res nfa-only.res
239 // ```
240 //
241 // The expected result is that short inputs will go faster under
242 // the nfa-only mode, but at some turnover point the dfa-nfa mode
243 // will start to win again. Unfortunately, that is not what happened.
244 // Instead there was no noticeable change in the bench results, so
245 // I've opted to just do the more conservative anchor optimization.
246 //
247 bench_captures!(
248 short_haystack_1x,
249 Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
250 2,
251 String::from("aaaabbbbccccbbbdddd")
252 );
253 bench_captures!(
254 short_haystack_2x,
255 Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
256 2,
257 format!(
258 "{}bbbbccccbbb{}",
259 repeat("aaaa").take(2).collect::<String>(),
260 repeat("dddd").take(2).collect::<String>(),
261 )
262 );
263 bench_captures!(
264 short_haystack_3x,
265 Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
266 2,
267 format!(
268 "{}bbbbccccbbb{}",
269 repeat("aaaa").take(3).collect::<String>(),
270 repeat("dddd").take(3).collect::<String>(),
271 )
272 );
273 bench_captures!(
274 short_haystack_4x,
275 Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
276 2,
277 format!(
278 "{}bbbbccccbbb{}",
279 repeat("aaaa").take(4).collect::<String>(),
280 repeat("dddd").take(4).collect::<String>(),
281 )
282 );
283 bench_captures!(
284 short_haystack_10x,
285 Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
286 2,
287 format!(
288 "{}bbbbccccbbb{}",
289 repeat("aaaa").take(10).collect::<String>(),
290 repeat("dddd").take(10).collect::<String>(),
291 )
292 );
293 bench_captures!(
294 short_haystack_100x,
295 Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
296 2,
297 format!(
298 "{}bbbbccccbbb{}",
299 repeat("aaaa").take(100).collect::<String>(),
300 repeat("dddd").take(100).collect::<String>(),
301 )
302 );
303 bench_captures!(
304 short_haystack_1000x,
305 Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
306 2,
307 format!(
308 "{}bbbbccccbbb{}",
309 repeat("aaaa").take(1000).collect::<String>(),
310 repeat("dddd").take(1000).collect::<String>(),
311 )
312 );
313 bench_captures!(
314 short_haystack_10000x,
315 Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
316 2,
317 format!(
318 "{}bbbbccccbbb{}",
319 repeat("aaaa").take(10000).collect::<String>(),
320 repeat("dddd").take(10000).collect::<String>(),
321 )
322 );
323 bench_captures!(
324 short_haystack_100000x,
325 Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
326 2,
327 format!(
328 "{}bbbbccccbbb{}",
329 repeat("aaaa").take(100000).collect::<String>(),
330 repeat("dddd").take(100000).collect::<String>(),
331 )
332 );
333 bench_captures!(
334 short_haystack_1000000x,
335 Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
336 2,
337 format!(
338 "{}bbbbccccbbb{}",
339 repeat("aaaa").take(1000000).collect::<String>(),
340 repeat("dddd").take(1000000).collect::<String>(),
341 )
342 );
343
344 #[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))]
345 bench_is_match_set!(
346 is_match_set,
347 true,
348 RegexSet::new(vec![
349 "aaaaaaaaaaaaaaaaaaa",
350 "abc579",
351 "def.+",
352 "e24fg",
353 "a.*2c",
354 "23.",
355 ])
356 .unwrap(),
357 format!(
358 "{}a482c{}",
359 repeat('a').take(10).collect::<String>(),
360 repeat('b').take(10).collect::<String>()
361 )
362 );
363
364 #[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))]
365 bench_matches_set!(
366 matches_set,
367 true,
368 RegexSet::new(vec![
369 "aaaaaaaaaaaaaaaaaaa",
370 "abc579",
371 "def.+",
372 "e24fg",
373 "a.*2c",
374 "23.",
375 ])
376 .unwrap(),
377 format!(
378 "{}a482c{}",
379 repeat('a').take(10).collect::<String>(),
380 repeat('b').take(10).collect::<String>()
381 )
382 );
383