• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::error::Error;
2 use std::fs;
3 use std::path::PathBuf;
4 use std::process;
5 use std::result;
6 use std::time::Instant;
7 
8 use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
9 use memmap::Mmap;
10 
11 type Result<T> = result::Result<T, Box<dyn Error>>;
12 
13 // Change this to tweak the size of state IDs used in the automaton.
14 type Size = u32;
15 
main()16 fn main() {
17     if let Err(err) = try_main() {
18         eprintln!("{}", err);
19         process::exit(1);
20     }
21 }
22 
try_main() -> Result<()>23 fn try_main() -> Result<()> {
24     let args = Args::parse()?;
25     let ac = args.automaton()?;
26     let haystack = args.haystack()?;
27 
28     eprintln!("automaton heap usage: {} bytes", ac.heap_bytes());
29     if args.no_search {
30         return Ok(());
31     }
32 
33     let start = Instant::now();
34     let count = ac.find_iter(&haystack).count();
35     println!("match count: {}", count);
36 
37     let count_time = Instant::now().duration_since(start);
38     eprintln!("count time: {:?}", count_time);
39     Ok(())
40 }
41 
42 #[derive(Debug)]
43 struct Args {
44     dictionary: PathBuf,
45     haystack: PathBuf,
46     match_kind: MatchKind,
47     ascii_casei: bool,
48     dense_depth: usize,
49     dfa: bool,
50     prefilter: bool,
51     classes: bool,
52     premultiply: bool,
53     no_search: bool,
54 }
55 
56 impl Args {
parse() -> Result<Args>57     fn parse() -> Result<Args> {
58         use clap::{crate_authors, crate_version, App, Arg};
59 
60         let parsed = App::new("Search using aho-corasick")
61             .author(crate_authors!())
62             .version(crate_version!())
63             .max_term_width(100)
64             .arg(Arg::with_name("dictionary").required(true))
65             .arg(Arg::with_name("haystack").required(true))
66             .arg(
67                 Arg::with_name("kind")
68                     .long("kind")
69                     .possible_values(&[
70                         "standard",
71                         "leftmost-first",
72                         "leftmost-longest",
73                     ])
74                     .default_value("standard"),
75             )
76             .arg(
77                 Arg::with_name("ascii-case-insensitive")
78                     .long("ascii-case-insensitive")
79                     .short("i"),
80             )
81             .arg(
82                 Arg::with_name("dense-depth")
83                     .long("dense-depth")
84                     .default_value("2"),
85             )
86             .arg(Arg::with_name("dfa").long("dfa").short("d"))
87             .arg(Arg::with_name("prefilter").long("prefilter").short("f"))
88             .arg(Arg::with_name("classes").long("classes").short("c"))
89             .arg(Arg::with_name("premultiply").long("premultiply").short("p"))
90             .arg(Arg::with_name("no-search").long("no-search"))
91             .get_matches();
92 
93         let dictionary =
94             PathBuf::from(parsed.value_of_os("dictionary").unwrap());
95         let haystack = PathBuf::from(parsed.value_of_os("haystack").unwrap());
96         let match_kind = match parsed.value_of("kind").unwrap() {
97             "standard" => MatchKind::Standard,
98             "leftmost-first" => MatchKind::LeftmostFirst,
99             "leftmost-longest" => MatchKind::LeftmostLongest,
100             _ => unreachable!(),
101         };
102         let dense_depth = parsed.value_of("dense-depth").unwrap().parse()?;
103 
104         Ok(Args {
105             dictionary,
106             haystack,
107             match_kind,
108             dense_depth,
109             ascii_casei: parsed.is_present("ascii-case-insensitive"),
110             dfa: parsed.is_present("dfa"),
111             prefilter: parsed.is_present("prefilter"),
112             classes: parsed.is_present("classes"),
113             premultiply: parsed.is_present("premultiply"),
114             no_search: parsed.is_present("no-search"),
115         })
116     }
117 
automaton(&self) -> Result<AhoCorasick<Size>>118     fn automaton(&self) -> Result<AhoCorasick<Size>> {
119         let start = Instant::now();
120         let patterns = fs::read_to_string(&self.dictionary)?;
121         let read_time = Instant::now().duration_since(start);
122         eprintln!("pattern read time: {:?}", read_time);
123 
124         let start = Instant::now();
125         // TODO: remove when byte classes and premultiply options are removed.
126         #[allow(deprecated)]
127         let ac = AhoCorasickBuilder::new()
128             .match_kind(self.match_kind)
129             .ascii_case_insensitive(self.ascii_casei)
130             .dense_depth(self.dense_depth)
131             .dfa(self.dfa)
132             .prefilter(self.prefilter)
133             .byte_classes(self.classes)
134             .premultiply(self.premultiply)
135             .build_with_size::<Size, _, _>(patterns.lines())?;
136         let build_time = Instant::now().duration_since(start);
137         eprintln!("automaton build time: {:?}", build_time);
138 
139         Ok(ac)
140     }
141 
haystack(&self) -> Result<Mmap>142     fn haystack(&self) -> Result<Mmap> {
143         Ok(unsafe { Mmap::map(&fs::File::open(&self.haystack)?)? })
144     }
145 }
146