• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::fs::File;
2 use std::str;
3 
4 use docopt::Docopt;
5 use memmap::Mmap;
6 
7 mod ffi;
8 
9 const USAGE: &'static str = "
10 Count the number of matches of <pattern> in <file>.
11 
12 This compiles the pattern once and counts all successive non-overlapping
13 matches in <file>. <file> is memory mapped. Matching is done as if <file> were
14 a single string (it is not line oriented).
15 
16 Since this tool includes compilation of the <pattern>, sufficiently large
17 haystacks should be used to amortize the cost of compilation. (e.g., >1MB.)
18 
19 Usage:
20     regex-run-one [options] [onig | pcre1 | pcre2 | re2 | rust | rust-bytes | tcl] <file> <pattern>
21     regex-run-one [options] (-h | --help)
22 
23 Options:
24     -h, --help   Show this usage message.
25 ";
26 
27 #[derive(Debug, serde::Deserialize)]
28 struct Args {
29     arg_pattern: String,
30     arg_file: String,
31     cmd_onig: bool,
32     cmd_pcre1: bool,
33     cmd_pcre2: bool,
34     cmd_re2: bool,
35     cmd_rust: bool,
36     cmd_rust_bytes: bool,
37     cmd_tcl: bool,
38 }
39 
main()40 fn main() {
41     let args: Args = Docopt::new(USAGE)
42         .and_then(|d| d.deserialize())
43         .unwrap_or_else(|e| e.exit());
44 
45     let mmap =
46         unsafe { Mmap::map(&File::open(&args.arg_file).unwrap()).unwrap() };
47     let haystack = unsafe { str::from_utf8_unchecked(&mmap) };
48 
49     println!("{}", args.count(&haystack));
50 }
51 
52 impl Args {
count(&self, haystack: &str) -> usize53     fn count(&self, haystack: &str) -> usize {
54         let pat = &self.arg_pattern;
55         if self.cmd_onig {
56             count_onig(pat, haystack)
57         } else if self.cmd_pcre1 {
58             count_pcre1(pat, haystack)
59         } else if self.cmd_pcre2 {
60             count_pcre2(pat, haystack)
61         } else if self.cmd_re2 {
62             count_re2(pat, haystack)
63         } else if self.cmd_rust {
64             count_rust(pat, haystack)
65         } else if self.cmd_rust_bytes {
66             count_rust_bytes(pat, haystack)
67         } else if self.cmd_tcl {
68             count_tcl(pat, haystack)
69         } else {
70             panic!("unreachable")
71         }
72     }
73 }
74 
75 macro_rules! nada {
76     ($feature:expr, $name:ident) => {
77         #[cfg(not(feature = $feature))]
78         fn $name(_pat: &str, _haystack: &str) -> usize {
79             panic!(
80                 "Support not enabled. Re-compile with '--features {}' \
81                  to enable.",
82                 $feature
83             )
84         }
85     };
86 }
87 
88 nada!("re-onig", count_onig);
89 #[cfg(feature = "re-onig")]
count_onig(pat: &str, haystack: &str) -> usize90 fn count_onig(pat: &str, haystack: &str) -> usize {
91     use ffi::onig::Regex;
92     Regex::new(pat).unwrap().find_iter(haystack).count()
93 }
94 
95 nada!("re-pcre1", count_pcre1);
96 #[cfg(feature = "re-pcre1")]
count_pcre1(pat: &str, haystack: &str) -> usize97 fn count_pcre1(pat: &str, haystack: &str) -> usize {
98     use ffi::pcre1::Regex;
99     Regex::new(pat).unwrap().find_iter(haystack).count()
100 }
101 
102 nada!("re-pcre2", count_pcre2);
103 #[cfg(feature = "re-pcre2")]
count_pcre2(pat: &str, haystack: &str) -> usize104 fn count_pcre2(pat: &str, haystack: &str) -> usize {
105     use ffi::pcre2::Regex;
106     Regex::new(pat).unwrap().find_iter(haystack).count()
107 }
108 
109 nada!("re-re2", count_re2);
110 #[cfg(feature = "re-re2")]
count_re2(pat: &str, haystack: &str) -> usize111 fn count_re2(pat: &str, haystack: &str) -> usize {
112     use ffi::re2::Regex;
113     Regex::new(pat).unwrap().find_iter(haystack).count()
114 }
115 
116 nada!("re-rust", count_rust);
117 #[cfg(feature = "re-rust")]
count_rust(pat: &str, haystack: &str) -> usize118 fn count_rust(pat: &str, haystack: &str) -> usize {
119     use regex::Regex;
120     Regex::new(pat).unwrap().find_iter(haystack).count()
121 }
122 
123 nada!("re-rust-bytes", count_rust_bytes);
124 #[cfg(feature = "re-rust-bytes")]
count_rust_bytes(pat: &str, haystack: &str) -> usize125 fn count_rust_bytes(pat: &str, haystack: &str) -> usize {
126     use regex::bytes::Regex;
127     Regex::new(pat).unwrap().find_iter(haystack.as_bytes()).count()
128 }
129 
130 nada!("re-tcl", count_tcl);
131 #[cfg(feature = "re-tcl")]
count_tcl(pat: &str, haystack: &str) -> usize132 fn count_tcl(pat: &str, haystack: &str) -> usize {
133     use ffi::tcl::{Regex, Text};
134     Regex::new(pat).unwrap().find_iter(&Text::new(haystack.to_owned())).count()
135 }
136