• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Enable the benchmarking harness.
2 #![feature(test)]
3 // It's too annoying to carefully define macros based on which regex engines
4 // have which benchmarks, so just ignore these warnings.
5 #![allow(unused_macros)]
6 
7 extern crate test;
8 
9 use cfg_if::cfg_if;
10 
11 cfg_if! {
12     if #[cfg(feature = "re-pcre1")] {
13         pub use ffi::pcre1::Regex;
14     } else if #[cfg(feature = "re-onig")] {
15         pub use ffi::onig::Regex;
16     } else if #[cfg(any(feature = "re-rust"))] {
17         pub use regex::{Regex, RegexSet};
18     } else if #[cfg(feature = "re-rust-bytes")] {
19         pub use regex::bytes::{Regex, RegexSet};
20     } else if #[cfg(feature = "re-re2")] {
21         pub use ffi::re2::Regex;
22     } else if #[cfg(feature = "re-pcre2")] {
23         pub use ffi::pcre2::Regex;
24     } else if #[cfg(feature = "re-tcl")] {
25         pub use ffi::tcl::Regex;
26     } else {
27         compile_error!(
28             "To run the benchmarks, see `./run -h` or the HACKING.md document"
29         );
30     }
31 }
32 
33 // Usage: regex!(pattern)
34 //
35 // Builds a ::Regex from a borrowed string.
36 //
37 // Due to macro scoping rules, this definition only applies for the modules
38 // defined below. Effectively, it allows us to use the same tests for both
39 // native and dynamic regexes.
40 macro_rules! regex {
41     ($re:expr) => {
42         crate::Regex::new(&$re.to_owned()).unwrap()
43     };
44 }
45 
46 cfg_if! {
47     if #[cfg(feature = "re-tcl")] {
48         // Usage: text!(haystack)
49         //
50         // Builds a ::Text from an owned string.
51         //
52         // This macro is called on every input searched in every benchmark. It is
53         // called exactly once per benchmark and its time is not included in the
54         // benchmark timing.
55         //
56         // The text given to the macro is always a String, which is guaranteed to be
57         // valid UTF-8.
58         //
59         // The return type should be an owned value that can deref to whatever the
60         // regex accepts in its `is_match` and `find_iter` methods.
61         macro_rules! text {
62             ($text:expr) => {{
63                 use crate::ffi::tcl::Text;
64                 Text::new($text)
65             }}
66         }
67         type Text = ffi::tcl::Text;
68     } else if #[cfg(feature = "re-rust-bytes")] {
69         macro_rules! text {
70             ($text:expr) => {{
71                 let text: String = $text;
72                 text.into_bytes()
73             }}
74         }
75         type Text = Vec<u8>;
76     } else {
77         macro_rules! text {
78             ($text:expr) => { $text }
79         }
80         type Text = String;
81     }
82 }
83 
84 // Macros for writing benchmarks easily. We provide macros for benchmarking
85 // matches, non-matches and for finding all successive non-overlapping matches
86 // in a string (including a check that the count is correct).
87 
88 // USAGE: bench_match!(name, pattern, haystack)
89 //
90 // This benchmarks how fast a regular expression can report whether it matches
91 // a particular haystack. If the regex doesn't match, then the benchmark fails.
92 // Regexes are compiled exactly once.
93 //
94 // name is an identifier for the benchmark.
95 //
96 // pattern should be a &'static str representing the regular expression.
97 //
98 // haystack should be a String.
99 macro_rules! bench_match {
100     ($name:ident, $pattern:expr, $haystack:expr) => {
101         bench_is_match!($name, true, regex!($pattern), $haystack);
102     };
103 }
104 
105 // USAGE: bench_not_match!(name, pattern, haystack)
106 //
107 // This benchmarks how fast a regular expression can report whether it matches
108 // a particular haystack. If the regex matches, then the benchmark fails.
109 // Regexes are compiled exactly once.
110 //
111 // name is an identifier for the benchmark.
112 //
113 // pattern should be a &'static str representing the regular expression.
114 //
115 // haystack should be a String.
116 macro_rules! bench_not_match {
117     ($name:ident, $pattern:expr, $haystack:expr) => {
118         bench_is_match!($name, false, regex!($pattern), $haystack);
119     };
120 }
121 
122 // USAGE: bench_is_match!(name, is_match, regex, haystack)
123 //
124 // This benchmarks how fast a regular expression can report whether it matches
125 // a particular haystack. If the regex match status doesn't match is_match,
126 // then the benchmark fails. Regexes are compiled exactly once.
127 //
128 // name is an identifier for the benchmark.
129 //
130 // is_match reports whether the regex is expected to match the haystack or not.
131 //
132 // regex should be a ::Regex.
133 //
134 // haystack should be a String.
135 macro_rules! bench_is_match {
136     ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => {
137         #[bench]
138         fn $name(b: &mut Bencher) {
139             use lazy_static::lazy_static;
140             use std::sync::Mutex;
141 
142             // Why do we use lazy_static here? It seems sensible to just
143             // compile a regex outside of the b.iter() call and be done with
144             // it. However, it seems like Rust's benchmark harness actually
145             // calls the entire benchmark function multiple times. This doesn't
146             // factor into the timings reported in the benchmarks, but it does
147             // make the benchmarks take substantially longer to run because
148             // they're spending a lot of time recompiling regexes.
149             lazy_static! {
150                 static ref RE: Mutex<Regex> = Mutex::new($re);
151                 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
152             };
153             let re = RE.lock().unwrap();
154             let text = TEXT.lock().unwrap();
155             b.bytes = text.len() as u64;
156             b.iter(|| {
157                 if re.is_match(&text) != $is_match {
158                     if $is_match {
159                         panic!("expected match, got not match");
160                     } else {
161                         panic!("expected no match, got match");
162                     }
163                 }
164             });
165         }
166     };
167 }
168 
169 // USAGE: bench_find!(name, pattern, count, haystack)
170 //
171 // This benchmarks how fast a regular expression can count all successive
172 // non-overlapping matches in haystack. If the count reported does not match
173 // the count given, then the benchmark fails.
174 //
175 // name is an identifier for the benchmark.
176 //
177 // pattern should be a &'static str representing the regular expression.
178 //
179 // haystack should be a String.
180 macro_rules! bench_find {
181     ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => {
182         #[bench]
183         fn $name(b: &mut Bencher) {
184             use lazy_static::lazy_static;
185             use std::sync::Mutex;
186 
187             lazy_static! {
188                 static ref RE: Mutex<Regex> = Mutex::new(regex!($pattern));
189                 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
190             };
191             let re = RE.lock().unwrap();
192             let text = TEXT.lock().unwrap();
193             b.bytes = text.len() as u64;
194             b.iter(|| {
195                 let count = re.find_iter(&text).count();
196                 assert_eq!($count, count)
197             });
198         }
199     };
200 }
201 
202 // USAGE: bench_captures!(name, pattern, groups, haystack);
203 //
204 // CONTRACT:
205 //   Given:
206 //     ident, the desired benchmarking function name
207 //     pattern : ::Regex, the regular expression to be executed
208 //     groups : usize, the number of capture groups
209 //     haystack : String, the string to search
210 //   bench_captures will benchmark how fast re.captures() produces
211 //   the capture groups in question.
212 macro_rules! bench_captures {
213     ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => {
214         #[cfg(feature = "re-rust")]
215         #[bench]
216         fn $name(b: &mut Bencher) {
217             use lazy_static::lazy_static;
218             use std::sync::Mutex;
219 
220             lazy_static! {
221                 static ref RE: Mutex<Regex> = Mutex::new($pattern);
222                 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
223             };
224             let re = RE.lock().unwrap();
225             let text = TEXT.lock().unwrap();
226             b.bytes = text.len() as u64;
227             b.iter(|| match re.captures(&text) {
228                 None => assert!(false, "no captures"),
229                 Some(caps) => assert_eq!($count + 1, caps.len()),
230             });
231         }
232     };
233 }
234 
235 // USAGE: bench_is_match_set!(name, is_match, regex, haystack)
236 macro_rules! bench_is_match_set {
237     ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => {
238         #[bench]
239         fn $name(b: &mut Bencher) {
240             use lazy_static::lazy_static;
241             use std::sync::Mutex;
242 
243             lazy_static! {
244                 static ref RE: Mutex<RegexSet> = Mutex::new($re);
245                 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
246             };
247             let re = RE.lock().unwrap();
248             let text = TEXT.lock().unwrap();
249             b.bytes = text.len() as u64;
250             b.iter(|| {
251                 if re.is_match(&text) != $is_match {
252                     if $is_match {
253                         panic!("expected match, got not match");
254                     } else {
255                         panic!("expected no match, got match");
256                     }
257                 }
258             });
259         }
260     };
261 }
262 
263 // USAGE: bench_matches_set!(name, is_match, regex, haystack)
264 macro_rules! bench_matches_set {
265     ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => {
266         #[bench]
267         fn $name(b: &mut Bencher) {
268             use lazy_static::lazy_static;
269             use std::sync::Mutex;
270 
271             lazy_static! {
272                 static ref RE: Mutex<RegexSet> = Mutex::new($re);
273                 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack));
274             };
275             let re = RE.lock().unwrap();
276             let text = TEXT.lock().unwrap();
277             b.bytes = text.len() as u64;
278             b.iter(|| {
279                 if re.matches(&text).matched_any() != $is_match {
280                     if $is_match {
281                         panic!("expected match, got not match");
282                     } else {
283                         panic!("expected no match, got match");
284                     }
285                 }
286             });
287         }
288     };
289 }
290 
291 cfg_if! {
292     if #[cfg(any(
293         feature = "re-pcre1",
294         feature = "re-onig",
295         feature = "re-rust",
296         feature = "re-rust-bytes",
297         feature = "re-re2",
298         feature = "re-pcre2",
299         feature = "re-tcl"
300     ))] {
301         mod ffi;
302         mod misc;
303         mod regexdna;
304         mod sherlock;
305     }
306 }
307 
308 #[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))]
309 mod rust_compile;
310