1 use std::error::Error;
2 use std::fs;
3 use std::path::PathBuf;
4 use std::process;
5 use std::result;
6 use std::time::Instant;
7
8 use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
9 use memmap::Mmap;
10
11 type Result<T> = result::Result<T, Box<dyn Error>>;
12
13 // Change this to tweak the size of state IDs used in the automaton.
14 type Size = u32;
15
main()16 fn main() {
17 if let Err(err) = try_main() {
18 eprintln!("{}", err);
19 process::exit(1);
20 }
21 }
22
try_main() -> Result<()>23 fn try_main() -> Result<()> {
24 let args = Args::parse()?;
25 let ac = args.automaton()?;
26 let haystack = args.haystack()?;
27
28 eprintln!("automaton heap usage: {} bytes", ac.heap_bytes());
29 if args.no_search {
30 return Ok(());
31 }
32
33 let start = Instant::now();
34 let count = ac.find_iter(&haystack).count();
35 println!("match count: {}", count);
36
37 let count_time = Instant::now().duration_since(start);
38 eprintln!("count time: {:?}", count_time);
39 Ok(())
40 }
41
42 #[derive(Debug)]
43 struct Args {
44 dictionary: PathBuf,
45 haystack: PathBuf,
46 match_kind: MatchKind,
47 ascii_casei: bool,
48 dense_depth: usize,
49 dfa: bool,
50 prefilter: bool,
51 classes: bool,
52 premultiply: bool,
53 no_search: bool,
54 }
55
56 impl Args {
parse() -> Result<Args>57 fn parse() -> Result<Args> {
58 use clap::{crate_authors, crate_version, App, Arg};
59
60 let parsed = App::new("Search using aho-corasick")
61 .author(crate_authors!())
62 .version(crate_version!())
63 .max_term_width(100)
64 .arg(Arg::with_name("dictionary").required(true))
65 .arg(Arg::with_name("haystack").required(true))
66 .arg(
67 Arg::with_name("kind")
68 .long("kind")
69 .possible_values(&[
70 "standard",
71 "leftmost-first",
72 "leftmost-longest",
73 ])
74 .default_value("standard"),
75 )
76 .arg(
77 Arg::with_name("ascii-case-insensitive")
78 .long("ascii-case-insensitive")
79 .short("i"),
80 )
81 .arg(
82 Arg::with_name("dense-depth")
83 .long("dense-depth")
84 .default_value("2"),
85 )
86 .arg(Arg::with_name("dfa").long("dfa").short("d"))
87 .arg(Arg::with_name("prefilter").long("prefilter").short("f"))
88 .arg(Arg::with_name("classes").long("classes").short("c"))
89 .arg(Arg::with_name("premultiply").long("premultiply").short("p"))
90 .arg(Arg::with_name("no-search").long("no-search"))
91 .get_matches();
92
93 let dictionary =
94 PathBuf::from(parsed.value_of_os("dictionary").unwrap());
95 let haystack = PathBuf::from(parsed.value_of_os("haystack").unwrap());
96 let match_kind = match parsed.value_of("kind").unwrap() {
97 "standard" => MatchKind::Standard,
98 "leftmost-first" => MatchKind::LeftmostFirst,
99 "leftmost-longest" => MatchKind::LeftmostLongest,
100 _ => unreachable!(),
101 };
102 let dense_depth = parsed.value_of("dense-depth").unwrap().parse()?;
103
104 Ok(Args {
105 dictionary,
106 haystack,
107 match_kind,
108 dense_depth,
109 ascii_casei: parsed.is_present("ascii-case-insensitive"),
110 dfa: parsed.is_present("dfa"),
111 prefilter: parsed.is_present("prefilter"),
112 classes: parsed.is_present("classes"),
113 premultiply: parsed.is_present("premultiply"),
114 no_search: parsed.is_present("no-search"),
115 })
116 }
117
automaton(&self) -> Result<AhoCorasick<Size>>118 fn automaton(&self) -> Result<AhoCorasick<Size>> {
119 let start = Instant::now();
120 let patterns = fs::read_to_string(&self.dictionary)?;
121 let read_time = Instant::now().duration_since(start);
122 eprintln!("pattern read time: {:?}", read_time);
123
124 let start = Instant::now();
125 // TODO: remove when byte classes and premultiply options are removed.
126 #[allow(deprecated)]
127 let ac = AhoCorasickBuilder::new()
128 .match_kind(self.match_kind)
129 .ascii_case_insensitive(self.ascii_casei)
130 .dense_depth(self.dense_depth)
131 .dfa(self.dfa)
132 .prefilter(self.prefilter)
133 .byte_classes(self.classes)
134 .premultiply(self.premultiply)
135 .build_with_size::<Size, _, _>(patterns.lines())?;
136 let build_time = Instant::now().duration_since(start);
137 eprintln!("automaton build time: {:?}", build_time);
138
139 Ok(ac)
140 }
141
haystack(&self) -> Result<Mmap>142 fn haystack(&self) -> Result<Mmap> {
143 Ok(unsafe { Mmap::map(&fs::File::open(&self.haystack)?)? })
144 }
145 }
146