1 use std::fs::File;
2 use std::str;
3
4 use docopt::Docopt;
5 use memmap::Mmap;
6
7 mod ffi;
8
9 const USAGE: &'static str = "
10 Count the number of matches of <pattern> in <file>.
11
12 This compiles the pattern once and counts all successive non-overlapping
13 matches in <file>. <file> is memory mapped. Matching is done as if <file> were
14 a single string (it is not line oriented).
15
16 Since this tool includes compilation of the <pattern>, sufficiently large
17 haystacks should be used to amortize the cost of compilation. (e.g., >1MB.)
18
19 Usage:
20 regex-run-one [options] [onig | pcre1 | pcre2 | re2 | rust | rust-bytes | tcl] <file> <pattern>
21 regex-run-one [options] (-h | --help)
22
23 Options:
24 -h, --help Show this usage message.
25 ";
26
27 #[derive(Debug, serde::Deserialize)]
28 struct Args {
29 arg_pattern: String,
30 arg_file: String,
31 cmd_onig: bool,
32 cmd_pcre1: bool,
33 cmd_pcre2: bool,
34 cmd_re2: bool,
35 cmd_rust: bool,
36 cmd_rust_bytes: bool,
37 cmd_tcl: bool,
38 }
39
main()40 fn main() {
41 let args: Args = Docopt::new(USAGE)
42 .and_then(|d| d.deserialize())
43 .unwrap_or_else(|e| e.exit());
44
45 let mmap =
46 unsafe { Mmap::map(&File::open(&args.arg_file).unwrap()).unwrap() };
47 let haystack = unsafe { str::from_utf8_unchecked(&mmap) };
48
49 println!("{}", args.count(&haystack));
50 }
51
52 impl Args {
count(&self, haystack: &str) -> usize53 fn count(&self, haystack: &str) -> usize {
54 let pat = &self.arg_pattern;
55 if self.cmd_onig {
56 count_onig(pat, haystack)
57 } else if self.cmd_pcre1 {
58 count_pcre1(pat, haystack)
59 } else if self.cmd_pcre2 {
60 count_pcre2(pat, haystack)
61 } else if self.cmd_re2 {
62 count_re2(pat, haystack)
63 } else if self.cmd_rust {
64 count_rust(pat, haystack)
65 } else if self.cmd_rust_bytes {
66 count_rust_bytes(pat, haystack)
67 } else if self.cmd_tcl {
68 count_tcl(pat, haystack)
69 } else {
70 panic!("unreachable")
71 }
72 }
73 }
74
75 macro_rules! nada {
76 ($feature:expr, $name:ident) => {
77 #[cfg(not(feature = $feature))]
78 fn $name(_pat: &str, _haystack: &str) -> usize {
79 panic!(
80 "Support not enabled. Re-compile with '--features {}' \
81 to enable.",
82 $feature
83 )
84 }
85 };
86 }
87
88 nada!("re-onig", count_onig);
89 #[cfg(feature = "re-onig")]
count_onig(pat: &str, haystack: &str) -> usize90 fn count_onig(pat: &str, haystack: &str) -> usize {
91 use ffi::onig::Regex;
92 Regex::new(pat).unwrap().find_iter(haystack).count()
93 }
94
95 nada!("re-pcre1", count_pcre1);
96 #[cfg(feature = "re-pcre1")]
count_pcre1(pat: &str, haystack: &str) -> usize97 fn count_pcre1(pat: &str, haystack: &str) -> usize {
98 use ffi::pcre1::Regex;
99 Regex::new(pat).unwrap().find_iter(haystack).count()
100 }
101
102 nada!("re-pcre2", count_pcre2);
103 #[cfg(feature = "re-pcre2")]
count_pcre2(pat: &str, haystack: &str) -> usize104 fn count_pcre2(pat: &str, haystack: &str) -> usize {
105 use ffi::pcre2::Regex;
106 Regex::new(pat).unwrap().find_iter(haystack).count()
107 }
108
109 nada!("re-re2", count_re2);
110 #[cfg(feature = "re-re2")]
count_re2(pat: &str, haystack: &str) -> usize111 fn count_re2(pat: &str, haystack: &str) -> usize {
112 use ffi::re2::Regex;
113 Regex::new(pat).unwrap().find_iter(haystack).count()
114 }
115
116 nada!("re-rust", count_rust);
117 #[cfg(feature = "re-rust")]
count_rust(pat: &str, haystack: &str) -> usize118 fn count_rust(pat: &str, haystack: &str) -> usize {
119 use regex::Regex;
120 Regex::new(pat).unwrap().find_iter(haystack).count()
121 }
122
123 nada!("re-rust-bytes", count_rust_bytes);
124 #[cfg(feature = "re-rust-bytes")]
count_rust_bytes(pat: &str, haystack: &str) -> usize125 fn count_rust_bytes(pat: &str, haystack: &str) -> usize {
126 use regex::bytes::Regex;
127 Regex::new(pat).unwrap().find_iter(haystack.as_bytes()).count()
128 }
129
130 nada!("re-tcl", count_tcl);
131 #[cfg(feature = "re-tcl")]
count_tcl(pat: &str, haystack: &str) -> usize132 fn count_tcl(pat: &str, haystack: &str) -> usize {
133 use ffi::tcl::{Regex, Text};
134 Regex::new(pat).unwrap().find_iter(&Text::new(haystack.to_owned())).count()
135 }
136