// Enable the benchmarking harness. #![feature(test)] // It's too annoying to carefully define macros based on which regex engines // have which benchmarks, so just ignore these warnings. #![allow(unused_macros)] extern crate test; use cfg_if::cfg_if; cfg_if! { if #[cfg(feature = "re-pcre1")] { pub use ffi::pcre1::Regex; } else if #[cfg(feature = "re-onig")] { pub use ffi::onig::Regex; } else if #[cfg(any(feature = "re-rust"))] { pub use regex::{Regex, RegexSet}; } else if #[cfg(feature = "re-rust-bytes")] { pub use regex::bytes::{Regex, RegexSet}; } else if #[cfg(feature = "re-re2")] { pub use ffi::re2::Regex; } else if #[cfg(feature = "re-pcre2")] { pub use ffi::pcre2::Regex; } else if #[cfg(feature = "re-tcl")] { pub use ffi::tcl::Regex; } else { compile_error!( "To run the benchmarks, see `./run -h` or the HACKING.md document" ); } } // Usage: regex!(pattern) // // Builds a ::Regex from a borrowed string. // // Due to macro scoping rules, this definition only applies for the modules // defined below. Effectively, it allows us to use the same tests for both // native and dynamic regexes. macro_rules! regex { ($re:expr) => { crate::Regex::new(&$re.to_owned()).unwrap() }; } cfg_if! { if #[cfg(feature = "re-tcl")] { // Usage: text!(haystack) // // Builds a ::Text from an owned string. // // This macro is called on every input searched in every benchmark. It is // called exactly once per benchmark and its time is not included in the // benchmark timing. // // The text given to the macro is always a String, which is guaranteed to be // valid UTF-8. // // The return type should be an owned value that can deref to whatever the // regex accepts in its `is_match` and `find_iter` methods. macro_rules! text { ($text:expr) => {{ use crate::ffi::tcl::Text; Text::new($text) }} } type Text = ffi::tcl::Text; } else if #[cfg(feature = "re-rust-bytes")] { macro_rules! text { ($text:expr) => {{ let text: String = $text; text.into_bytes() }} } type Text = Vec; } else { macro_rules! text { ($text:expr) => { $text } } type Text = String; } } // Macros for writing benchmarks easily. We provide macros for benchmarking // matches, non-matches and for finding all successive non-overlapping matches // in a string (including a check that the count is correct). // USAGE: bench_match!(name, pattern, haystack) // // This benchmarks how fast a regular expression can report whether it matches // a particular haystack. If the regex doesn't match, then the benchmark fails. // Regexes are compiled exactly once. // // name is an identifier for the benchmark. // // pattern should be a &'static str representing the regular expression. // // haystack should be a String. macro_rules! bench_match { ($name:ident, $pattern:expr, $haystack:expr) => { bench_is_match!($name, true, regex!($pattern), $haystack); }; } // USAGE: bench_not_match!(name, pattern, haystack) // // This benchmarks how fast a regular expression can report whether it matches // a particular haystack. If the regex matches, then the benchmark fails. // Regexes are compiled exactly once. // // name is an identifier for the benchmark. // // pattern should be a &'static str representing the regular expression. // // haystack should be a String. macro_rules! bench_not_match { ($name:ident, $pattern:expr, $haystack:expr) => { bench_is_match!($name, false, regex!($pattern), $haystack); }; } // USAGE: bench_is_match!(name, is_match, regex, haystack) // // This benchmarks how fast a regular expression can report whether it matches // a particular haystack. If the regex match status doesn't match is_match, // then the benchmark fails. Regexes are compiled exactly once. // // name is an identifier for the benchmark. // // is_match reports whether the regex is expected to match the haystack or not. // // regex should be a ::Regex. // // haystack should be a String. macro_rules! bench_is_match { ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { #[bench] fn $name(b: &mut Bencher) { use lazy_static::lazy_static; use std::sync::Mutex; // Why do we use lazy_static here? It seems sensible to just // compile a regex outside of the b.iter() call and be done with // it. However, it seems like Rust's benchmark harness actually // calls the entire benchmark function multiple times. This doesn't // factor into the timings reported in the benchmarks, but it does // make the benchmarks take substantially longer to run because // they're spending a lot of time recompiling regexes. lazy_static! { static ref RE: Mutex = Mutex::new($re); static ref TEXT: Mutex = Mutex::new(text!($haystack)); }; let re = RE.lock().unwrap(); let text = TEXT.lock().unwrap(); b.bytes = text.len() as u64; b.iter(|| { if re.is_match(&text) != $is_match { if $is_match { panic!("expected match, got not match"); } else { panic!("expected no match, got match"); } } }); } }; } // USAGE: bench_find!(name, pattern, count, haystack) // // This benchmarks how fast a regular expression can count all successive // non-overlapping matches in haystack. If the count reported does not match // the count given, then the benchmark fails. // // name is an identifier for the benchmark. // // pattern should be a &'static str representing the regular expression. // // haystack should be a String. macro_rules! bench_find { ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => { #[bench] fn $name(b: &mut Bencher) { use lazy_static::lazy_static; use std::sync::Mutex; lazy_static! { static ref RE: Mutex = Mutex::new(regex!($pattern)); static ref TEXT: Mutex = Mutex::new(text!($haystack)); }; let re = RE.lock().unwrap(); let text = TEXT.lock().unwrap(); b.bytes = text.len() as u64; b.iter(|| { let count = re.find_iter(&text).count(); assert_eq!($count, count) }); } }; } // USAGE: bench_captures!(name, pattern, groups, haystack); // // CONTRACT: // Given: // ident, the desired benchmarking function name // pattern : ::Regex, the regular expression to be executed // groups : usize, the number of capture groups // haystack : String, the string to search // bench_captures will benchmark how fast re.captures() produces // the capture groups in question. macro_rules! bench_captures { ($name:ident, $pattern:expr, $count:expr, $haystack:expr) => { #[cfg(feature = "re-rust")] #[bench] fn $name(b: &mut Bencher) { use lazy_static::lazy_static; use std::sync::Mutex; lazy_static! { static ref RE: Mutex = Mutex::new($pattern); static ref TEXT: Mutex = Mutex::new(text!($haystack)); }; let re = RE.lock().unwrap(); let text = TEXT.lock().unwrap(); b.bytes = text.len() as u64; b.iter(|| match re.captures(&text) { None => assert!(false, "no captures"), Some(caps) => assert_eq!($count + 1, caps.len()), }); } }; } // USAGE: bench_is_match_set!(name, is_match, regex, haystack) macro_rules! bench_is_match_set { ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { #[bench] fn $name(b: &mut Bencher) { use lazy_static::lazy_static; use std::sync::Mutex; lazy_static! { static ref RE: Mutex = Mutex::new($re); static ref TEXT: Mutex = Mutex::new(text!($haystack)); }; let re = RE.lock().unwrap(); let text = TEXT.lock().unwrap(); b.bytes = text.len() as u64; b.iter(|| { if re.is_match(&text) != $is_match { if $is_match { panic!("expected match, got not match"); } else { panic!("expected no match, got match"); } } }); } }; } // USAGE: bench_matches_set!(name, is_match, regex, haystack) macro_rules! bench_matches_set { ($name:ident, $is_match:expr, $re:expr, $haystack:expr) => { #[bench] fn $name(b: &mut Bencher) { use lazy_static::lazy_static; use std::sync::Mutex; lazy_static! { static ref RE: Mutex = Mutex::new($re); static ref TEXT: Mutex = Mutex::new(text!($haystack)); }; let re = RE.lock().unwrap(); let text = TEXT.lock().unwrap(); b.bytes = text.len() as u64; b.iter(|| { if re.matches(&text).matched_any() != $is_match { if $is_match { panic!("expected match, got not match"); } else { panic!("expected no match, got match"); } } }); } }; } cfg_if! { if #[cfg(any( feature = "re-pcre1", feature = "re-onig", feature = "re-rust", feature = "re-rust-bytes", feature = "re-re2", feature = "re-pcre2", feature = "re-tcl" ))] { mod ffi; mod misc; mod regexdna; mod sherlock; } } #[cfg(any(feature = "re-rust", feature = "re-rust-bytes"))] mod rust_compile;