1 // Enable the benchmarking harness. 2 #![feature(test)] 3 // It's too annoying to carefully define macros based on which regex engines 4 // have which benchmarks, so just ignore these warnings. 5 #![allow(unused_macros)] 6 7 extern crate test; 8 9 use cfg_if::cfg_if; 10 11 cfg_if! { 12 if #[cfg(feature = "re-pcre1")] { 13 pub use ffi::pcre1::Regex; 14 } else if #[cfg(feature = "re-onig")] { 15 pub use ffi::onig::Regex; 16 } else if #[cfg(any(feature = "re-rust"))] { 17 pub use regex::{Regex, RegexSet}; 18 } else if #[cfg(feature = "re-rust-bytes")] { 19 pub use regex::bytes::{Regex, RegexSet}; 20 } else if #[cfg(feature = "re-re2")] { 21 pub use ffi::re2::Regex; 22 } else if #[cfg(feature = "re-pcre2")] { 23 pub use ffi::pcre2::Regex; 24 } else if #[cfg(feature = "re-tcl")] { 25 pub use ffi::tcl::Regex; 26 } else { 27 compile_error!( 28 "To run the benchmarks, see `./run -h` or the HACKING.md document" 29 ); 30 } 31 } 32 33 // Usage: regex!(pattern) 34 // 35 // Builds a ::Regex from a borrowed string. 36 // 37 // Due to macro scoping rules, this definition only applies for the modules 38 // defined below. Effectively, it allows us to use the same tests for both 39 // native and dynamic regexes. 40 macro_rules! regex { 41 ($re:expr) => { 42 crate::Regex::new(&$re.to_owned()).unwrap() 43 }; 44 } 45 46 cfg_if! { 47 if #[cfg(feature = "re-tcl")] { 48 // Usage: text!(haystack) 49 // 50 // Builds a ::Text from an owned string. 51 // 52 // This macro is called on every input searched in every benchmark. It is 53 // called exactly once per benchmark and its time is not included in the 54 // benchmark timing. 55 // 56 // The text given to the macro is always a String, which is guaranteed to be 57 // valid UTF-8. 58 // 59 // The return type should be an owned value that can deref to whatever the 60 // regex accepts in its `is_match` and `find_iter` methods. 61 macro_rules! text { 62 ($text:expr) => {{ 63 use crate::ffi::tcl::Text; 64 Text::new($text) 65 }} 66 } 67 type Text = ffi::tcl::Text; 68 } else if #[cfg(feature = "re-rust-bytes")] { 69 macro_rules! text { 70 ($text:expr) => {{ 71 let text: String = $text; 72 text.into_bytes() 73 }} 74 } 75 type Text = Vec<u8>; 76 } else { 77 macro_rules! text { 78 ($text:expr) => { $text } 79 } 80 type Text = String; 81 } 82 } 83 84 // Macros for writing benchmarks easily. We provide macros for benchmarking 85 // matches, non-matches and for finding all successive non-overlapping matches 86 // in a string (including a check that the count is correct). 87 88 // USAGE: bench_match!(name, pattern, haystack) 89 // 90 // This benchmarks how fast a regular expression can report whether it matches 91 // a particular haystack. If the regex doesn't match, then the benchmark fails. 92 // Regexes are compiled exactly once. 93 // 94 // name is an identifier for the benchmark. 95 // 96 // pattern should be a &'static str representing the regular expression. 97 // 98 // haystack should be a String. 99 macro_rules! bench_match { 100 ($name:ident, $pattern:expr, $haystack:expr) => { 101 bench_is_match!($name, true, regex!($pattern), $haystack); 102 }; 103 } 104 105 // USAGE: bench_not_match!(name, pattern, haystack) 106 // 107 // This benchmarks how fast a regular expression can report whether it matches 108 // a particular haystack. If the regex matches, then the benchmark fails. 109 // Regexes are compiled exactly once. 110 // 111 // name is an identifier for the benchmark. 112 // 113 // pattern should be a &'static str representing the regular expression. 114 // 115 // haystack should be a String. 116 macro_rules! bench_not_match { 117 ($name:ident, $pattern:expr, $haystack:expr) => { 118 bench_is_match!($name, false, regex!($pattern), $haystack); 119 }; 120 } 121 122 // USAGE: bench_is_match!(name, is_match, regex, haystack) 123 // 124 // This benchmarks how fast a regular expression can report whether it matches 125 // a particular haystack. If the regex match status doesn't match is_match, 126 // then the benchmark fails. Regexes are compiled exactly once. 127 // 128 // name is an identifier for the benchmark. 129 // 130 // is_match reports whether the regex is expected to match the haystack or not. 131 // 132 // regex should be a ::Regex. 133 // 134 // haystack should be a String. 135 macro_rules! bench_is_match { 136 ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { 137 #[bench] 138 fn $name(b: &mut Bencher) { 139 use lazy_static::lazy_static; 140 use std::sync::Mutex; 141 142 // Why do we use lazy_static here? It seems sensible to just 143 // compile a regex outside of the b.iter() call and be done with 144 // it. However, it seems like Rust's benchmark harness actually 145 // calls the entire benchmark function multiple times. This doesn't 146 // factor into the timings reported in the benchmarks, but it does 147 // make the benchmarks take substantially longer to run because 148 // they're spending a lot of time recompiling regexes. 149 lazy_static! { 150 static ref RE: Mutex<Regex> = Mutex::new($re); 151 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 152 }; 153 let re = RE.lock().unwrap(); 154 let text = TEXT.lock().unwrap(); 155 b.bytes = text.len() as u64; 156 b.iter(|| { 157 if re.is_match(&text) != $is_match { 158 if $is_match { 159 panic!("expected match, got not match"); 160 } else { 161 panic!("expected no match, got match"); 162 } 163 } 164 }); 165 } 166 }; 167 } 168 169 // USAGE: bench_find!(name, pattern, count, haystack) 170 // 171 // This benchmarks how fast a regular expression can count all successive 172 // non-overlapping matches in haystack. If the count reported does not match 173 // the count given, then the benchmark fails. 174 // 175 // name is an identifier for the benchmark. 176 // 177 // pattern should be a &'static str representing the regular expression. 178 // 179 // haystack should be a String. 180 macro_rules! bench_find { 181 ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => { 182 #[bench] 183 fn $name(b: &mut Bencher) { 184 use lazy_static::lazy_static; 185 use std::sync::Mutex; 186 187 lazy_static! { 188 static ref RE: Mutex<Regex> = Mutex::new(regex!($pattern)); 189 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 190 }; 191 let re = RE.lock().unwrap(); 192 let text = TEXT.lock().unwrap(); 193 b.bytes = text.len() as u64; 194 b.iter(|| { 195 let count = re.find_iter(&text).count(); 196 assert_eq!($count, count) 197 }); 198 } 199 }; 200 } 201 202 // USAGE: bench_captures!(name, pattern, groups, haystack); 203 // 204 // CONTRACT: 205 // Given: 206 // ident, the desired benchmarking function name 207 // pattern : ::Regex, the regular expression to be executed 208 // groups : usize, the number of capture groups 209 // haystack : String, the string to search 210 // bench_captures will benchmark how fast re.captures() produces 211 // the capture groups in question. 212 macro_rules! bench_captures { 213 ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => { 214 #[cfg(feature = "re-rust")] 215 #[bench] 216 fn $name(b: &mut Bencher) { 217 use lazy_static::lazy_static; 218 use std::sync::Mutex; 219 220 lazy_static! { 221 static ref RE: Mutex<Regex> = Mutex::new($pattern); 222 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 223 }; 224 let re = RE.lock().unwrap(); 225 let text = TEXT.lock().unwrap(); 226 b.bytes = text.len() as u64; 227 b.iter(|| match re.captures(&text) { 228 None => assert!(false, "no captures"), 229 Some(caps) => assert_eq!($count + 1, caps.len()), 230 }); 231 } 232 }; 233 } 234 235 // USAGE: bench_is_match_set!(name, is_match, regex, haystack) 236 macro_rules! bench_is_match_set { 237 ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { 238 #[bench] 239 fn $name(b: &mut Bencher) { 240 use lazy_static::lazy_static; 241 use std::sync::Mutex; 242 243 lazy_static! { 244 static ref RE: Mutex<RegexSet> = Mutex::new($re); 245 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 246 }; 247 let re = RE.lock().unwrap(); 248 let text = TEXT.lock().unwrap(); 249 b.bytes = text.len() as u64; 250 b.iter(|| { 251 if re.is_match(&text) != $is_match { 252 if $is_match { 253 panic!("expected match, got not match"); 254 } else { 255 panic!("expected no match, got match"); 256 } 257 } 258 }); 259 } 260 }; 261 } 262 263 // USAGE: bench_matches_set!(name, is_match, regex, haystack) 264 macro_rules! bench_matches_set { 265 ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { 266 #[bench] 267 fn $name(b: &mut Bencher) { 268 use lazy_static::lazy_static; 269 use std::sync::Mutex; 270 271 lazy_static! { 272 static ref RE: Mutex<RegexSet> = Mutex::new($re); 273 static ref TEXT: Mutex<Text> = Mutex::new(text!($haystack)); 274 }; 275 let re = RE.lock().unwrap(); 276 let text = TEXT.lock().unwrap(); 277 b.bytes = text.len() as u64; 278 b.iter(|| { 279 if re.matches(&text).matched_any() != $is_match { 280 if $is_match { 281 panic!("expected match, got not match"); 282 } else { 283 panic!("expected no match, got match"); 284 } 285 } 286 }); 287 } 288 }; 289 } 290 291 cfg_if! { 292 if #[cfg(any( 293 feature = "re-pcre1", 294 feature = "re-onig", 295 feature = "re-rust", 296 feature = "re-rust-bytes", 297 feature = "re-re2", 298 feature = "re-pcre2", 299 feature = "re-tcl" 300 ))] { 301 mod ffi; 302 mod misc; 303 mod regexdna; 304 mod sherlock; 305 } 306 } 307 308 #[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))] 309 mod rust_compile; 310