• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #![allow(non_snake_case)]
2 
3 use std::iter::repeat;
4 
5 use test::Bencher;
6 
7 #[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))]
8 use crate::RegexSet;
9 use crate::{Regex, Text};
10 
11 #[cfg(not(feature = "re-onig"))]
12 #[cfg(not(feature = "re-pcre1"))]
13 #[cfg(not(feature = "re-pcre2"))]
14 bench_match!(
15     no_exponential,
16     {
17         format!(
18             "{}{}",
19             repeat("a?").take(100).collect::<String>(),
20             repeat("a").take(100).collect::<String>()
21         )
22     },
23     repeat("a").take(100).collect()
24 );
25 
26 bench_match!(literal, r"y", {
27     format!("{}y", repeat("x").take(50).collect::<String>())
28 });
29 
30 bench_match!(not_literal, r".y", {
31     format!("{}y", repeat("x").take(50).collect::<String>())
32 });
33 
34 bench_match!(match_class, "[abcdw]", {
35     format!("{}w", repeat("xxxx").take(20).collect::<String>())
36 });
37 
38 bench_match!(match_class_in_range, "[ac]", {
39     format!("{}c", repeat("bbbb").take(20).collect::<String>())
40 });
41 
42 #[cfg(not(feature = "re-rust-bytes"))]
43 #[cfg(not(feature = "re-tcl"))]
44 bench_match!(match_class_unicode, r"\p{L}", {
45     format!("{}a", repeat("☃5☃5").take(20).collect::<String>())
46 });
47 
48 bench_not_match!(anchored_literal_short_non_match, r"^zbc(d|e)", {
49     "abcdefghijklmnopqrstuvwxyz".to_owned()
50 });
51 
52 bench_not_match!(anchored_literal_long_non_match, r"^zbc(d|e)", {
53     repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>()
54 });
55 
56 bench_match!(anchored_literal_short_match, r"^.bc(d|e)", {
57     "abcdefghijklmnopqrstuvwxyz".to_owned()
58 });
59 
60 bench_match!(anchored_literal_long_match, r"^.bc(d|e)", {
61     repeat("abcdefghijklmnopqrstuvwxyz").take(15).collect::<String>()
62 });
63 
64 bench_match!(one_pass_short, r"^.bc(d|e)*$", {
65     "abcddddddeeeededd".to_owned()
66 });
67 
68 bench_match!(one_pass_short_not, r".bc(d|e)*$", {
69     "abcddddddeeeededd".to_owned()
70 });
71 
72 bench_match!(one_pass_long_prefix, r"^abcdefghijklmnopqrstuvwxyz.*$", {
73     "abcdefghijklmnopqrstuvwxyz".to_owned()
74 });
75 
76 bench_match!(one_pass_long_prefix_not, r"^.bcdefghijklmnopqrstuvwxyz.*$", {
77     "abcdefghijklmnopqrstuvwxyz".to_owned()
78 });
79 
80 bench_match!(long_needle1, r"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", {
81     repeat("a").take(100_000).collect::<String>() + "b"
82 });
83 
84 bench_match!(long_needle2, r"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbba", {
85     repeat("b").take(100_000).collect::<String>() + "a"
86 });
87 
88 // This benchmark specifically targets the "reverse suffix literal"
89 // optimization. In particular, it is easy for a naive implementation to
90 // take quadratic worst case time. This benchmark provides a case for such
91 // a scenario.
92 bench_not_match!(reverse_suffix_no_quadratic, r"[r-z].*bcdefghijklmnopq", {
93     repeat("bcdefghijklmnopq").take(500).collect::<String>()
94 });
95 
96 #[cfg(feature = "re-rust")]
97 #[bench]
replace_all(b: &mut Bencher)98 fn replace_all(b: &mut Bencher) {
99     let re = regex!("[cjrw]");
100     let text = "abcdefghijklmnopqrstuvwxyz";
101     b.iter(|| re.replace_all(text, ""));
102 }
103 
104 const TXT_32: &'static str = include_str!("data/32.txt");
105 const TXT_1K: &'static str = include_str!("data/1K.txt");
106 const TXT_32K: &'static str = include_str!("data/32K.txt");
107 const TXT_1MB: &'static str = include_str!("data/1MB.txt");
108 
get_text(corpus: &str, suffix: String) -> String109 fn get_text(corpus: &str, suffix: String) -> String {
110     let mut corpus = corpus.to_string();
111     corpus.push_str(&suffix);
112     corpus
113 }
114 
easy0_suffix() -> String115 fn easy0_suffix() -> String {
116     "ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string()
117 }
118 
119 macro_rules! easy0 {
120     () => {
121         "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
122     };
123 }
124 
125 bench_match!(easy0_32, easy0!(), get_text(TXT_32, easy0_suffix()));
126 bench_match!(easy0_1K, easy0!(), get_text(TXT_1K, easy0_suffix()));
127 bench_match!(easy0_32K, easy0!(), get_text(TXT_32K, easy0_suffix()));
128 bench_match!(easy0_1MB, easy0!(), get_text(TXT_1MB, easy0_suffix()));
129 
easy1_suffix() -> String130 fn easy1_suffix() -> String {
131     "AABCCCDEEEFGGHHHIJJ".to_string()
132 }
133 
134 macro_rules! easy1 {
135     () => {
136         r"A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
137     };
138 }
139 
140 bench_match!(easy1_32, easy1!(), get_text(TXT_32, easy1_suffix()));
141 bench_match!(easy1_1K, easy1!(), get_text(TXT_1K, easy1_suffix()));
142 bench_match!(easy1_32K, easy1!(), get_text(TXT_32K, easy1_suffix()));
143 bench_match!(easy1_1MB, easy1!(), get_text(TXT_1MB, easy1_suffix()));
144 
medium_suffix() -> String145 fn medium_suffix() -> String {
146     "XABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string()
147 }
148 
149 macro_rules! medium {
150     () => {
151         r"[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
152     };
153 }
154 
155 bench_match!(medium_32, medium!(), get_text(TXT_32, medium_suffix()));
156 bench_match!(medium_1K, medium!(), get_text(TXT_1K, medium_suffix()));
157 bench_match!(medium_32K, medium!(), get_text(TXT_32K, medium_suffix()));
158 bench_match!(medium_1MB, medium!(), get_text(TXT_1MB, medium_suffix()));
159 
hard_suffix() -> String160 fn hard_suffix() -> String {
161     "ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string()
162 }
163 
164 macro_rules! hard {
165     () => {
166         r"[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
167     };
168 }
169 
170 bench_match!(hard_32, hard!(), get_text(TXT_32, hard_suffix()));
171 bench_match!(hard_1K, hard!(), get_text(TXT_1K, hard_suffix()));
172 bench_match!(hard_32K, hard!(), get_text(TXT_32K, hard_suffix()));
173 bench_match!(hard_1MB, hard!(), get_text(TXT_1MB, hard_suffix()));
174 
reallyhard_suffix() -> String175 fn reallyhard_suffix() -> String {
176     "ABCDEFGHIJKLMNOPQRSTUVWXYZ".to_string()
177 }
178 
179 macro_rules! reallyhard {
180     () => {
181         // The point of this being "really" hard is that it should completely
182         // thwart any prefix or suffix literal optimizations.
183         r"[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ.*"
184     };
185 }
186 
187 bench_match!(
188     reallyhard_32,
189     reallyhard!(),
190     get_text(TXT_32, reallyhard_suffix())
191 );
192 bench_match!(
193     reallyhard_1K,
194     reallyhard!(),
195     get_text(TXT_1K, reallyhard_suffix())
196 );
197 bench_match!(
198     reallyhard_32K,
199     reallyhard!(),
200     get_text(TXT_32K, reallyhard_suffix())
201 );
202 bench_match!(
203     reallyhard_1MB,
204     reallyhard!(),
205     get_text(TXT_1MB, reallyhard_suffix())
206 );
207 
reallyhard2_suffix() -> String208 fn reallyhard2_suffix() -> String {
209     "Sherlock Holmes".to_string()
210 }
211 
212 macro_rules! reallyhard2 {
213     () => {
214         r"\w+\s+Holmes"
215     };
216 }
217 
218 bench_match!(
219     reallyhard2_1K,
220     reallyhard2!(),
221     get_text(TXT_1K, reallyhard2_suffix())
222 );
223 
224 //
225 // Benchmarks to justify the short-haystack NFA fallthrough optimization
226 // implemented by `read_captures_at` in regex/src/exec.rs. See github issue
227 // #348.
228 //
229 // The procedure used to try to determine the right hardcoded cutoff
230 // for the short-haystack optimization in issue #348 is as follows.
231 //
232 // ```
233 // > cd bench
234 // > cargo bench --features re-rust short_hay | tee dfa-nfa.res
235 // > # modify the `MatchType::Dfa` branch in exec.rs:read_captures_at
236 // > # to just execute the nfa
237 // > cargo bench --features re-rust short_hay | tee nfa-only.res
238 // > cargo benchcmp dfa-nfa.res nfa-only.res
239 // ```
240 //
241 // The expected result is that short inputs will go faster under
242 // the nfa-only mode, but at some turnover point the dfa-nfa mode
243 // will start to win again. Unfortunately, that is not what happened.
244 // Instead there was no noticeable change in the bench results, so
245 // I've opted to just do the more conservative anchor optimization.
246 //
247 bench_captures!(
248     short_haystack_1x,
249     Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
250     2,
251     String::from("aaaabbbbccccbbbdddd")
252 );
253 bench_captures!(
254     short_haystack_2x,
255     Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
256     2,
257     format!(
258         "{}bbbbccccbbb{}",
259         repeat("aaaa").take(2).collect::<String>(),
260         repeat("dddd").take(2).collect::<String>(),
261     )
262 );
263 bench_captures!(
264     short_haystack_3x,
265     Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
266     2,
267     format!(
268         "{}bbbbccccbbb{}",
269         repeat("aaaa").take(3).collect::<String>(),
270         repeat("dddd").take(3).collect::<String>(),
271     )
272 );
273 bench_captures!(
274     short_haystack_4x,
275     Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
276     2,
277     format!(
278         "{}bbbbccccbbb{}",
279         repeat("aaaa").take(4).collect::<String>(),
280         repeat("dddd").take(4).collect::<String>(),
281     )
282 );
283 bench_captures!(
284     short_haystack_10x,
285     Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
286     2,
287     format!(
288         "{}bbbbccccbbb{}",
289         repeat("aaaa").take(10).collect::<String>(),
290         repeat("dddd").take(10).collect::<String>(),
291     )
292 );
293 bench_captures!(
294     short_haystack_100x,
295     Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
296     2,
297     format!(
298         "{}bbbbccccbbb{}",
299         repeat("aaaa").take(100).collect::<String>(),
300         repeat("dddd").take(100).collect::<String>(),
301     )
302 );
303 bench_captures!(
304     short_haystack_1000x,
305     Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
306     2,
307     format!(
308         "{}bbbbccccbbb{}",
309         repeat("aaaa").take(1000).collect::<String>(),
310         repeat("dddd").take(1000).collect::<String>(),
311     )
312 );
313 bench_captures!(
314     short_haystack_10000x,
315     Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
316     2,
317     format!(
318         "{}bbbbccccbbb{}",
319         repeat("aaaa").take(10000).collect::<String>(),
320         repeat("dddd").take(10000).collect::<String>(),
321     )
322 );
323 bench_captures!(
324     short_haystack_100000x,
325     Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
326     2,
327     format!(
328         "{}bbbbccccbbb{}",
329         repeat("aaaa").take(100000).collect::<String>(),
330         repeat("dddd").take(100000).collect::<String>(),
331     )
332 );
333 bench_captures!(
334     short_haystack_1000000x,
335     Regex::new(r"(bbbb)cccc(bbb)").unwrap(),
336     2,
337     format!(
338         "{}bbbbccccbbb{}",
339         repeat("aaaa").take(1000000).collect::<String>(),
340         repeat("dddd").take(1000000).collect::<String>(),
341     )
342 );
343 
344 #[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))]
345 bench_is_match_set!(
346     is_match_set,
347     true,
348     RegexSet::new(vec![
349         "aaaaaaaaaaaaaaaaaaa",
350         "abc579",
351         "def.+",
352         "e24fg",
353         "a.*2c",
354         "23.",
355     ])
356     .unwrap(),
357     format!(
358         "{}a482c{}",
359         repeat('a').take(10).collect::<String>(),
360         repeat('b').take(10).collect::<String>()
361     )
362 );
363 
364 #[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))]
365 bench_matches_set!(
366     matches_set,
367     true,
368     RegexSet::new(vec![
369         "aaaaaaaaaaaaaaaaaaa",
370         "abc579",
371         "def.+",
372         "e24fg",
373         "a.*2c",
374         "23.",
375     ])
376     .unwrap(),
377     format!(
378         "{}a482c{}",
379         repeat('a').take(10).collect::<String>(),
380         repeat('b').take(10).collect::<String>()
381     )
382 );
383