• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::collections::HashMap;
2 use std::io;
3 use std::usize;
4 
5 use crate::{AhoCorasickBuilder, Match, MatchKind};
6 
7 /// A description of a single test against an Aho-Corasick automaton.
8 ///
9 /// A single test may not necessarily pass on every configuration of an
10 /// Aho-Corasick automaton. The tests are categorized and grouped appropriately
11 /// below.
12 #[derive(Clone, Debug, Eq, PartialEq)]
13 struct SearchTest {
14     /// The name of this test, for debugging.
15     name: &'static str,
16     /// The patterns to search for.
17     patterns: &'static [&'static str],
18     /// The text to search.
19     haystack: &'static str,
20     /// Each match is a triple of (pattern_index, start, end), where
21     /// pattern_index is an index into `patterns` and `start`/`end` are indices
22     /// into `haystack`.
23     matches: &'static [(usize, usize, usize)],
24 }
25 
26 /// Short-hand constructor for SearchTest. We use it a lot below.
27 macro_rules! t {
28     ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
29         SearchTest {
30             name: stringify!($name),
31             patterns: $patterns,
32             haystack: $haystack,
33             matches: $matches,
34         }
35     };
36 }
37 
38 /// A collection of test groups.
39 type TestCollection = &'static [&'static [SearchTest]];
40 
41 // Define several collections corresponding to the different type of match
42 // semantics supported by Aho-Corasick. These collections have some overlap,
43 // but each collection should have some tests that no other collection has.
44 
45 /// Tests for Aho-Corasick's standard non-overlapping match semantics.
46 const AC_STANDARD_NON_OVERLAPPING: TestCollection =
47     &[BASICS, NON_OVERLAPPING, STANDARD, REGRESSION];
48 
49 /// Tests for Aho-Corasick's anchored standard non-overlapping match semantics.
50 const AC_STANDARD_ANCHORED_NON_OVERLAPPING: TestCollection =
51     &[ANCHORED_BASICS, ANCHORED_NON_OVERLAPPING, STANDARD_ANCHORED];
52 
53 /// Tests for Aho-Corasick's standard overlapping match semantics.
54 const AC_STANDARD_OVERLAPPING: TestCollection =
55     &[BASICS, OVERLAPPING, REGRESSION];
56 
57 /// Tests for Aho-Corasick's anchored standard overlapping match semantics.
58 const AC_STANDARD_ANCHORED_OVERLAPPING: TestCollection =
59     &[ANCHORED_BASICS, ANCHORED_OVERLAPPING];
60 
61 /// Tests for Aho-Corasick's leftmost-first match semantics.
62 const AC_LEFTMOST_FIRST: TestCollection =
63     &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION];
64 
65 /// Tests for Aho-Corasick's anchored leftmost-first match semantics.
66 const AC_LEFTMOST_FIRST_ANCHORED: TestCollection = &[
67     ANCHORED_BASICS,
68     ANCHORED_NON_OVERLAPPING,
69     ANCHORED_LEFTMOST,
70     ANCHORED_LEFTMOST_FIRST,
71 ];
72 
73 /// Tests for Aho-Corasick's leftmost-longest match semantics.
74 const AC_LEFTMOST_LONGEST: TestCollection =
75     &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION];
76 
77 /// Tests for Aho-Corasick's anchored leftmost-longest match semantics.
78 const AC_LEFTMOST_LONGEST_ANCHORED: TestCollection = &[
79     ANCHORED_BASICS,
80     ANCHORED_NON_OVERLAPPING,
81     ANCHORED_LEFTMOST,
82     ANCHORED_LEFTMOST_LONGEST,
83 ];
84 
85 // Now define the individual tests that make up the collections above.
86 
87 /// A collection of tests for the Aho-Corasick algorithm that should always be
88 /// true regardless of match semantics. That is, all combinations of
89 /// leftmost-{shortest, first, longest} x {overlapping, non-overlapping}
90 /// should produce the same answer.
91 const BASICS: &'static [SearchTest] = &[
92     t!(basic000, &[], "", &[]),
93     t!(basic001, &["a"], "", &[]),
94     t!(basic010, &["a"], "a", &[(0, 0, 1)]),
95     t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
96     t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
97     t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
98     t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
99     t!(basic060, &["a"], "bbb", &[]),
100     t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
101     t!(basic100, &["aa"], "", &[]),
102     t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
103     t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
104     t!(basic130, &["aa"], "abbab", &[]),
105     t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
106     t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
107     t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
108     t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
109     t!(basic300, &["a", "b"], "", &[]),
110     t!(basic310, &["a", "b"], "z", &[]),
111     t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
112     t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
113     t!(
114         basic340,
115         &["a", "b"],
116         "abba",
117         &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
118     ),
119     t!(
120         basic350,
121         &["b", "a"],
122         "abba",
123         &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
124     ),
125     t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
126     t!(basic400, &["foo", "bar"], "", &[]),
127     t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
128     t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
129     t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
130     t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
131     t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
132     t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
133     t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
134     t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
135     t!(basic600, &[""], "", &[(0, 0, 0)]),
136     t!(basic610, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
137     t!(basic620, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]),
138     t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
139     t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
140     t!(
141         basic720,
142         &["yabcdef", "bcdeyabc", "abcdezghi"],
143         "yabcdezghi",
144         &[(2, 1, 10),]
145     ),
146 ];
147 
148 /// A collection of *anchored* tests for the Aho-Corasick algorithm that should
149 /// always be true regardless of match semantics. That is, all combinations of
150 /// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} should
151 /// produce the same answer.
152 const ANCHORED_BASICS: &'static [SearchTest] = &[
153     t!(abasic000, &[], "", &[]),
154     t!(abasic010, &[""], "", &[(0, 0, 0)]),
155     t!(abasic020, &[""], "a", &[(0, 0, 0)]),
156     t!(abasic030, &[""], "abc", &[(0, 0, 0)]),
157     t!(abasic100, &["a"], "a", &[(0, 0, 1)]),
158     t!(abasic110, &["a"], "aa", &[(0, 0, 1)]),
159     t!(abasic120, &["a", "b"], "ab", &[(0, 0, 1)]),
160     t!(abasic130, &["a", "b"], "ba", &[(1, 0, 1)]),
161     t!(abasic140, &["foo", "foofoo"], "foo", &[(0, 0, 3)]),
162     t!(abasic150, &["foofoo", "foo"], "foo", &[(1, 0, 3)]),
163 ];
164 
165 /// Tests for non-overlapping standard match semantics.
166 ///
167 /// These tests generally shouldn't pass for leftmost-{first,longest}, although
168 /// some do in order to write clearer tests. For example, standard000 will
169 /// pass with leftmost-first semantics, but standard010 will not. We write
170 /// both to emphasize how the match semantics work.
171 const STANDARD: &'static [SearchTest] = &[
172     t!(standard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
173     t!(standard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]),
174     t!(standard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
175     t!(standard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
176     t!(standard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
177     t!(
178         standard400,
179         &["abcd", "bcd", "cd", "b"],
180         "abcd",
181         &[(3, 1, 2), (2, 2, 4),]
182     ),
183     t!(standard410, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
184     t!(standard420, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2),]),
185     t!(standard430, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
186     t!(standard440, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1),]),
187     t!(standard450, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
188 ];
189 
190 /// Like STANDARD, but for anchored searches.
191 const STANDARD_ANCHORED: &'static [SearchTest] = &[
192     t!(astandard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
193     t!(astandard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]),
194     t!(astandard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
195     t!(astandard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
196     t!(astandard040, &["a", ""], "a", &[(1, 0, 0)]),
197     t!(astandard050, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
198     t!(astandard410, &["", "a"], "a", &[(0, 0, 0)]),
199     t!(astandard420, &["", "a"], "aa", &[(0, 0, 0)]),
200     t!(astandard430, &["", "a", ""], "a", &[(0, 0, 0)]),
201     t!(astandard440, &["a", "", ""], "a", &[(1, 0, 0)]),
202     t!(astandard450, &["", "", "a"], "a", &[(0, 0, 0)]),
203 ];
204 
205 /// Tests for non-overlapping leftmost match semantics. These should pass for
206 /// both leftmost-first and leftmost-longest match kinds. Stated differently,
207 /// among ambiguous matches, the longest match and the match that appeared
208 /// first when constructing the automaton should always be the same.
209 const LEFTMOST: &'static [SearchTest] = &[
210     t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
211     t!(leftmost010, &["a", ""], "a", &[(0, 0, 1), (1, 1, 1)]),
212     t!(leftmost020, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
213     t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
214     t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
215     t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
216     t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
217     t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
218     t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
219     t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
220     t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
221     t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
222     t!(
223         leftmost360,
224         &["abcdefghi", "hz", "abcdefgh"],
225         "abcdefghz",
226         &[(2, 0, 8),]
227     ),
228     t!(
229         leftmost370,
230         &["abcdefghi", "cde", "hz", "abcdefgh"],
231         "abcdefghz",
232         &[(3, 0, 8),]
233     ),
234     t!(
235         leftmost380,
236         &["abcdefghi", "hz", "abcdefgh", "a"],
237         "abcdefghz",
238         &[(2, 0, 8),]
239     ),
240     t!(
241         leftmost390,
242         &["b", "abcdefghi", "hz", "abcdefgh"],
243         "abcdefghz",
244         &[(3, 0, 8),]
245     ),
246     t!(
247         leftmost400,
248         &["h", "abcdefghi", "hz", "abcdefgh"],
249         "abcdefghz",
250         &[(3, 0, 8),]
251     ),
252     t!(
253         leftmost410,
254         &["z", "abcdefghi", "hz", "abcdefgh"],
255         "abcdefghz",
256         &[(3, 0, 8), (0, 8, 9),]
257     ),
258 ];
259 
260 /// Like LEFTMOST, but for anchored searches.
261 const ANCHORED_LEFTMOST: &'static [SearchTest] = &[
262     t!(aleftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
263     t!(aleftmost010, &["a", ""], "a", &[(0, 0, 1)]),
264     t!(aleftmost020, &["", ""], "a", &[(0, 0, 0)]),
265     t!(aleftmost030, &["a", "ab"], "aa", &[(0, 0, 1)]),
266     t!(aleftmost031, &["ab", "a"], "aa", &[(1, 0, 1)]),
267     t!(aleftmost032, &["ab", "a"], "xayabbbz", &[]),
268     t!(aleftmost300, &["abcd", "bce", "b"], "abce", &[]),
269     t!(aleftmost310, &["abcd", "ce", "bc"], "abce", &[]),
270     t!(aleftmost320, &["abcd", "bce", "ce", "b"], "abce", &[]),
271     t!(aleftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[]),
272     t!(aleftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
273     t!(aleftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
274     t!(
275         aleftmost360,
276         &["abcdefghi", "hz", "abcdefgh"],
277         "abcdefghz",
278         &[(2, 0, 8),]
279     ),
280     t!(
281         aleftmost370,
282         &["abcdefghi", "cde", "hz", "abcdefgh"],
283         "abcdefghz",
284         &[(3, 0, 8),]
285     ),
286     t!(
287         aleftmost380,
288         &["abcdefghi", "hz", "abcdefgh", "a"],
289         "abcdefghz",
290         &[(2, 0, 8),]
291     ),
292     t!(
293         aleftmost390,
294         &["b", "abcdefghi", "hz", "abcdefgh"],
295         "abcdefghz",
296         &[(3, 0, 8),]
297     ),
298     t!(
299         aleftmost400,
300         &["h", "abcdefghi", "hz", "abcdefgh"],
301         "abcdefghz",
302         &[(3, 0, 8),]
303     ),
304     t!(
305         aleftmost410,
306         &["z", "abcdefghi", "hz", "abcdefgh"],
307         "abcdefghz",
308         &[(3, 0, 8)]
309     ),
310 ];
311 
312 /// Tests for non-overlapping leftmost-first match semantics. These tests
313 /// should generally be specific to leftmost-first, which means they should
314 /// generally fail under leftmost-longest semantics.
315 const LEFTMOST_FIRST: &'static [SearchTest] = &[
316     t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
317     t!(leftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
318     t!(leftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
319     t!(leftfirst012, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]),
320     t!(leftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
321     t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
322     t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
323     t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
324     t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
325     t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
326     t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
327     t!(
328         leftfirst310,
329         &["abcd", "b", "bce", "ce"],
330         "abce",
331         &[(1, 1, 2), (3, 2, 4),]
332     ),
333     t!(
334         leftfirst320,
335         &["a", "abcdefghi", "hz", "abcdefgh"],
336         "abcdefghz",
337         &[(0, 0, 1), (2, 7, 9),]
338     ),
339     t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
340     t!(leftfirst400, &["amwix", "samwise", "sam"], "Zsamwix", &[(2, 1, 4)]),
341 ];
342 
343 /// Like LEFTMOST_FIRST, but for anchored searches.
344 const ANCHORED_LEFTMOST_FIRST: &'static [SearchTest] = &[
345     t!(aleftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
346     t!(aleftfirst010, &["", "a"], "a", &[(0, 0, 0)]),
347     t!(aleftfirst011, &["", "a", ""], "a", &[(0, 0, 0)]),
348     t!(aleftfirst012, &["a", "", ""], "a", &[(0, 0, 1)]),
349     t!(aleftfirst013, &["", "", "a"], "a", &[(0, 0, 0)]),
350     t!(aleftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
351     t!(aleftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
352     t!(aleftfirst040, &["a", "ab"], "xayabbbz", &[]),
353     t!(aleftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]),
354     t!(aleftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]),
355     t!(aleftfirst300, &["abcd", "b", "bce"], "abce", &[]),
356     t!(aleftfirst310, &["abcd", "b", "bce", "ce"], "abce", &[]),
357     t!(
358         aleftfirst320,
359         &["a", "abcdefghi", "hz", "abcdefgh"],
360         "abcdefghz",
361         &[(0, 0, 1)]
362     ),
363     t!(aleftfirst330, &["a", "abab"], "abab", &[(0, 0, 1)]),
364     t!(aleftfirst400, &["wise", "samwise", "sam"], "samwix", &[(2, 0, 3)]),
365 ];
366 
367 /// Tests for non-overlapping leftmost-longest match semantics. These tests
368 /// should generally be specific to leftmost-longest, which means they should
369 /// generally fail under leftmost-first semantics.
370 const LEFTMOST_LONGEST: &'static [SearchTest] = &[
371     t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
372     t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
373     t!(leftlong020, &["", "a"], "a", &[(1, 0, 1), (0, 1, 1),]),
374     t!(leftlong021, &["", "a", ""], "a", &[(1, 0, 1), (0, 1, 1),]),
375     t!(leftlong022, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]),
376     t!(leftlong023, &["", "", "a"], "a", &[(2, 0, 1), (0, 1, 1),]),
377     t!(leftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2), (0, 2, 2),]),
378     t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
379     t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
380     t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
381     t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
382     t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
383     t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
384     t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
385     t!(
386         leftlong310,
387         &["a", "abcdefghi", "hz", "abcdefgh"],
388         "abcdefghz",
389         &[(3, 0, 8),]
390     ),
391     t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
392     t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
393     t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
394 ];
395 
396 /// Like LEFTMOST_LONGEST, but for anchored searches.
397 const ANCHORED_LEFTMOST_LONGEST: &'static [SearchTest] = &[
398     t!(aleftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
399     t!(aleftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
400     t!(aleftlong020, &["", "a"], "a", &[(1, 0, 1)]),
401     t!(aleftlong021, &["", "a", ""], "a", &[(1, 0, 1)]),
402     t!(aleftlong022, &["a", "", ""], "a", &[(0, 0, 1)]),
403     t!(aleftlong023, &["", "", "a"], "a", &[(2, 0, 1)]),
404     t!(aleftlong030, &["", "a"], "aa", &[(1, 0, 1)]),
405     t!(aleftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
406     t!(aleftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
407     t!(aleftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
408     t!(aleftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
409     t!(aleftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]),
410     t!(aleftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]),
411     t!(aleftlong300, &["abcd", "b", "bce"], "abce", &[]),
412     t!(
413         aleftlong310,
414         &["a", "abcdefghi", "hz", "abcdefgh"],
415         "abcdefghz",
416         &[(3, 0, 8),]
417     ),
418     t!(aleftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
419     t!(aleftlong330, &["abcd", "b", "ce"], "abce", &[]),
420     t!(aleftlong340, &["a", "ab"], "xayabbbz", &[]),
421 ];
422 
423 /// Tests for non-overlapping match semantics.
424 ///
425 /// Generally these tests shouldn't pass when using overlapping semantics.
426 /// These should pass for both standard and leftmost match semantics.
427 const NON_OVERLAPPING: &'static [SearchTest] = &[
428     t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
429     t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
430     t!(nover030, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
431     t!(
432         nover100,
433         &["ab", "ba"],
434         "abababa",
435         &[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
436     ),
437     t!(nover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
438     t!(nover300, &["", ""], "", &[(0, 0, 0),]),
439     t!(nover310, &["", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
440 ];
441 
442 /// Like NON_OVERLAPPING, but for anchored searches.
443 const ANCHORED_NON_OVERLAPPING: &'static [SearchTest] = &[
444     t!(anover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
445     t!(anover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
446     t!(anover030, &["abc", "bc"], "zazabcz", &[]),
447     t!(anover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]),
448     t!(anover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3)]),
449     t!(anover300, &["", ""], "", &[(0, 0, 0),]),
450     t!(anover310, &["", ""], "a", &[(0, 0, 0)]),
451 ];
452 
453 /// Tests for overlapping match semantics.
454 ///
455 /// This only supports standard match semantics, since leftmost-{first,longest}
456 /// do not support overlapping matches.
457 const OVERLAPPING: &'static [SearchTest] = &[
458     t!(
459         over000,
460         &["abcd", "bcd", "cd", "b"],
461         "abcd",
462         &[(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),]
463     ),
464     t!(
465         over010,
466         &["bcd", "cd", "b", "abcd"],
467         "abcd",
468         &[(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),]
469     ),
470     t!(
471         over020,
472         &["abcd", "bcd", "cd"],
473         "abcd",
474         &[(0, 0, 4), (1, 1, 4), (2, 2, 4),]
475     ),
476     t!(
477         over030,
478         &["bcd", "abcd", "cd"],
479         "abcd",
480         &[(1, 0, 4), (0, 1, 4), (2, 2, 4),]
481     ),
482     t!(
483         over040,
484         &["bcd", "cd", "abcd"],
485         "abcd",
486         &[(2, 0, 4), (0, 1, 4), (1, 2, 4),]
487     ),
488     t!(over050, &["abc", "bc"], "zazabcz", &[(0, 3, 6), (1, 4, 6),]),
489     t!(
490         over100,
491         &["ab", "ba"],
492         "abababa",
493         &[(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),]
494     ),
495     t!(
496         over200,
497         &["foo", "foo"],
498         "foobarfoo",
499         &[(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),]
500     ),
501     t!(over300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]),
502     t!(
503         over310,
504         &["", ""],
505         "a",
506         &[(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),]
507     ),
508     t!(over320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1), (0, 1, 1),]),
509     t!(
510         over330,
511         &["", "a", ""],
512         "a",
513         &[(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),]
514     ),
515     t!(
516         over340,
517         &["a", "", ""],
518         "a",
519         &[(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),]
520     ),
521     t!(
522         over350,
523         &["", "", "a"],
524         "a",
525         &[(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),]
526     ),
527     t!(
528         over360,
529         &["foo", "foofoo"],
530         "foofoo",
531         &[(0, 0, 3), (1, 0, 6), (0, 3, 6)]
532     ),
533 ];
534 
535 /// Like OVERLAPPING, but for anchored searches.
536 const ANCHORED_OVERLAPPING: &'static [SearchTest] = &[
537     t!(aover000, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
538     t!(aover010, &["bcd", "cd", "b", "abcd"], "abcd", &[(3, 0, 4)]),
539     t!(aover020, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4)]),
540     t!(aover030, &["bcd", "abcd", "cd"], "abcd", &[(1, 0, 4)]),
541     t!(aover040, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4)]),
542     t!(aover050, &["abc", "bc"], "zazabcz", &[]),
543     t!(aover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]),
544     t!(aover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (1, 0, 3)]),
545     t!(aover300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]),
546     t!(aover310, &["", ""], "a", &[(0, 0, 0), (1, 0, 0)]),
547     t!(aover320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1)]),
548     t!(aover330, &["", "a", ""], "a", &[(0, 0, 0), (2, 0, 0), (1, 0, 1)]),
549     t!(aover340, &["a", "", ""], "a", &[(1, 0, 0), (2, 0, 0), (0, 0, 1)]),
550     t!(aover350, &["", "", "a"], "a", &[(0, 0, 0), (1, 0, 0), (2, 0, 1)]),
551     t!(aover360, &["foo", "foofoo"], "foofoo", &[(0, 0, 3), (1, 0, 6)]),
552 ];
553 
554 /// Tests for ASCII case insensitivity.
555 ///
556 /// These tests should all have the same behavior regardless of match semantics
557 /// or whether the search is overlapping.
558 const ASCII_CASE_INSENSITIVE: &'static [SearchTest] = &[
559     t!(acasei000, &["a"], "A", &[(0, 0, 1)]),
560     t!(acasei010, &["Samwise"], "SAMWISE", &[(0, 0, 7)]),
561     t!(acasei011, &["Samwise"], "SAMWISE.abcd", &[(0, 0, 7)]),
562     t!(acasei020, &["fOoBaR"], "quux foobar baz", &[(0, 5, 11)]),
563 ];
564 
565 /// Like ASCII_CASE_INSENSITIVE, but specifically for non-overlapping tests.
566 const ASCII_CASE_INSENSITIVE_NON_OVERLAPPING: &'static [SearchTest] = &[
567     t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3)]),
568     t!(acasei000, &["FOO", "foo"], "fOo", &[(0, 0, 3)]),
569     t!(acasei010, &["abc", "def"], "abcdef", &[(0, 0, 3), (1, 3, 6)]),
570 ];
571 
572 /// Like ASCII_CASE_INSENSITIVE, but specifically for overlapping tests.
573 const ASCII_CASE_INSENSITIVE_OVERLAPPING: &'static [SearchTest] = &[
574     t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3), (1, 0, 3)]),
575     t!(acasei001, &["FOO", "foo"], "fOo", &[(0, 0, 3), (1, 0, 3)]),
576     // This is a regression test from:
577     // https://github.com/BurntSushi/aho-corasick/issues/68
578     // Previously, it was reporting a duplicate (1, 3, 6) match.
579     t!(
580         acasei010,
581         &["abc", "def", "abcdef"],
582         "abcdef",
583         &[(0, 0, 3), (2, 0, 6), (1, 3, 6)]
584     ),
585 ];
586 
587 /// Regression tests that are applied to all Aho-Corasick combinations.
588 ///
589 /// If regression tests are needed for specific match semantics, then add them
590 /// to the appropriate group above.
591 const REGRESSION: &'static [SearchTest] = &[
592     t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
593     t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
594     t!(
595         regression030,
596         &["libcore/", "libstd/"],
597         "libcore/char/methods.rs",
598         &[(0, 0, 8),]
599     ),
600     t!(
601         regression040,
602         &["libstd/", "libcore/"],
603         "libcore/char/methods.rs",
604         &[(1, 0, 8),]
605     ),
606     t!(
607         regression050,
608         &["\x00\x00\x01", "\x00\x00\x00"],
609         "\x00\x00\x00",
610         &[(1, 0, 3),]
611     ),
612     t!(
613         regression060,
614         &["\x00\x00\x00", "\x00\x00\x01"],
615         "\x00\x00\x00",
616         &[(0, 0, 3),]
617     ),
618 ];
619 
620 // Now define a test for each combination of things above that we want to run.
621 // Since there are a few different combinations for each collection of tests,
622 // we define a couple of macros to avoid repetition drudgery. The testconfig
623 // macro constructs the automaton from a given match kind, and runs the search
624 // tests one-by-one over the given collection. The `with` parameter allows one
625 // to configure the builder with additional parameters. The testcombo macro
626 // invokes testconfig in precisely this way: it sets up several tests where
627 // each one turns a different knob on AhoCorasickBuilder.
628 
629 macro_rules! testconfig {
630     (overlapping, $name:ident, $collection:expr, $kind:ident, $with:expr) => {
631         #[test]
632         fn $name() {
633             run_search_tests($collection, |test| {
634                 let mut builder = AhoCorasickBuilder::new();
635                 $with(&mut builder);
636                 builder
637                     .match_kind(MatchKind::$kind)
638                     .build(test.patterns)
639                     .find_overlapping_iter(test.haystack)
640                     .collect()
641             });
642         }
643     };
644     (stream, $name:ident, $collection:expr, $kind:ident, $with:expr) => {
645         #[test]
646         fn $name() {
647             run_search_tests($collection, |test| {
648                 let buf =
649                     io::BufReader::with_capacity(1, test.haystack.as_bytes());
650                 let mut builder = AhoCorasickBuilder::new();
651                 $with(&mut builder);
652                 builder
653                     .match_kind(MatchKind::$kind)
654                     .build(test.patterns)
655                     .stream_find_iter(buf)
656                     .map(|result| result.unwrap())
657                     .collect()
658             });
659         }
660     };
661     ($name:ident, $collection:expr, $kind:ident, $with:expr) => {
662         #[test]
663         fn $name() {
664             run_search_tests($collection, |test| {
665                 let mut builder = AhoCorasickBuilder::new();
666                 $with(&mut builder);
667                 builder
668                     .match_kind(MatchKind::$kind)
669                     .build(test.patterns)
670                     .find_iter(test.haystack)
671                     .collect()
672             });
673         }
674     };
675 }
676 
677 macro_rules! testcombo {
678     ($name:ident, $collection:expr, $kind:ident) => {
679         mod $name {
680             use super::*;
681 
682             testconfig!(nfa_default, $collection, $kind, |_| ());
683             testconfig!(
684                 nfa_no_prefilter,
685                 $collection,
686                 $kind,
687                 |b: &mut AhoCorasickBuilder| {
688                     b.prefilter(false);
689                 }
690             );
691             testconfig!(
692                 nfa_all_sparse,
693                 $collection,
694                 $kind,
695                 |b: &mut AhoCorasickBuilder| {
696                     b.dense_depth(0);
697                 }
698             );
699             testconfig!(
700                 nfa_all_dense,
701                 $collection,
702                 $kind,
703                 |b: &mut AhoCorasickBuilder| {
704                     b.dense_depth(usize::MAX);
705                 }
706             );
707             testconfig!(
708                 dfa_default,
709                 $collection,
710                 $kind,
711                 |b: &mut AhoCorasickBuilder| {
712                     b.dfa(true);
713                 }
714             );
715             testconfig!(
716                 dfa_no_prefilter,
717                 $collection,
718                 $kind,
719                 |b: &mut AhoCorasickBuilder| {
720                     b.dfa(true).prefilter(false);
721                 }
722             );
723             testconfig!(
724                 dfa_all_sparse,
725                 $collection,
726                 $kind,
727                 |b: &mut AhoCorasickBuilder| {
728                     b.dfa(true).dense_depth(0);
729                 }
730             );
731             testconfig!(
732                 dfa_all_dense,
733                 $collection,
734                 $kind,
735                 |b: &mut AhoCorasickBuilder| {
736                     b.dfa(true).dense_depth(usize::MAX);
737                 }
738             );
739             testconfig!(
740                 dfa_no_byte_class,
741                 $collection,
742                 $kind,
743                 |b: &mut AhoCorasickBuilder| {
744                     // TODO: remove tests when option is removed.
745                     #[allow(deprecated)]
746                     b.dfa(true).byte_classes(false);
747                 }
748             );
749             testconfig!(
750                 dfa_no_premultiply,
751                 $collection,
752                 $kind,
753                 |b: &mut AhoCorasickBuilder| {
754                     // TODO: remove tests when option is removed.
755                     #[allow(deprecated)]
756                     b.dfa(true).premultiply(false);
757                 }
758             );
759             testconfig!(
760                 dfa_no_byte_class_no_premultiply,
761                 $collection,
762                 $kind,
763                 |b: &mut AhoCorasickBuilder| {
764                     // TODO: remove tests when options are removed.
765                     #[allow(deprecated)]
766                     b.dfa(true).byte_classes(false).premultiply(false);
767                 }
768             );
769         }
770     };
771 }
772 
773 // Write out the combinations.
774 testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest);
775 testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst);
776 testcombo!(
777     search_standard_nonoverlapping,
778     AC_STANDARD_NON_OVERLAPPING,
779     Standard
780 );
781 
782 // Write out the overlapping combo by hand since there is only one of them.
783 testconfig!(
784     overlapping,
785     search_standard_overlapping_nfa_default,
786     AC_STANDARD_OVERLAPPING,
787     Standard,
788     |_| ()
789 );
790 testconfig!(
791     overlapping,
792     search_standard_overlapping_nfa_all_sparse,
793     AC_STANDARD_OVERLAPPING,
794     Standard,
795     |b: &mut AhoCorasickBuilder| {
796         b.dense_depth(0);
797     }
798 );
799 testconfig!(
800     overlapping,
801     search_standard_overlapping_nfa_all_dense,
802     AC_STANDARD_OVERLAPPING,
803     Standard,
804     |b: &mut AhoCorasickBuilder| {
805         b.dense_depth(usize::MAX);
806     }
807 );
808 testconfig!(
809     overlapping,
810     search_standard_overlapping_dfa_default,
811     AC_STANDARD_OVERLAPPING,
812     Standard,
813     |b: &mut AhoCorasickBuilder| {
814         b.dfa(true);
815     }
816 );
817 testconfig!(
818     overlapping,
819     search_standard_overlapping_dfa_all_sparse,
820     AC_STANDARD_OVERLAPPING,
821     Standard,
822     |b: &mut AhoCorasickBuilder| {
823         b.dfa(true).dense_depth(0);
824     }
825 );
826 testconfig!(
827     overlapping,
828     search_standard_overlapping_dfa_all_dense,
829     AC_STANDARD_OVERLAPPING,
830     Standard,
831     |b: &mut AhoCorasickBuilder| {
832         b.dfa(true).dense_depth(usize::MAX);
833     }
834 );
835 testconfig!(
836     overlapping,
837     search_standard_overlapping_dfa_no_byte_class,
838     AC_STANDARD_OVERLAPPING,
839     Standard,
840     |b: &mut AhoCorasickBuilder| {
841         // TODO: remove tests when option is removed.
842         #[allow(deprecated)]
843         b.dfa(true).byte_classes(false);
844     }
845 );
846 testconfig!(
847     overlapping,
848     search_standard_overlapping_dfa_no_premultiply,
849     AC_STANDARD_OVERLAPPING,
850     Standard,
851     |b: &mut AhoCorasickBuilder| {
852         // TODO: remove tests when option is removed.
853         #[allow(deprecated)]
854         b.dfa(true).premultiply(false);
855     }
856 );
857 testconfig!(
858     overlapping,
859     search_standard_overlapping_dfa_no_byte_class_no_premultiply,
860     AC_STANDARD_OVERLAPPING,
861     Standard,
862     |b: &mut AhoCorasickBuilder| {
863         // TODO: remove tests when options are removed.
864         #[allow(deprecated)]
865         b.dfa(true).byte_classes(false).premultiply(false);
866     }
867 );
868 
869 // Also write out tests manually for streams, since we only test the standard
870 // match semantics. We also don't bother testing different automaton
871 // configurations, since those are well covered by tests above.
872 testconfig!(
873     stream,
874     search_standard_stream_nfa_default,
875     AC_STANDARD_NON_OVERLAPPING,
876     Standard,
877     |_| ()
878 );
879 testconfig!(
880     stream,
881     search_standard_stream_dfa_default,
882     AC_STANDARD_NON_OVERLAPPING,
883     Standard,
884     |b: &mut AhoCorasickBuilder| {
885         b.dfa(true);
886     }
887 );
888 
889 // Same thing for anchored searches. Write them out manually.
890 testconfig!(
891     search_standard_anchored_nfa_default,
892     AC_STANDARD_ANCHORED_NON_OVERLAPPING,
893     Standard,
894     |b: &mut AhoCorasickBuilder| {
895         b.anchored(true);
896     }
897 );
898 testconfig!(
899     search_standard_anchored_dfa_default,
900     AC_STANDARD_ANCHORED_NON_OVERLAPPING,
901     Standard,
902     |b: &mut AhoCorasickBuilder| {
903         b.anchored(true).dfa(true);
904     }
905 );
906 testconfig!(
907     overlapping,
908     search_standard_anchored_overlapping_nfa_default,
909     AC_STANDARD_ANCHORED_OVERLAPPING,
910     Standard,
911     |b: &mut AhoCorasickBuilder| {
912         b.anchored(true);
913     }
914 );
915 testconfig!(
916     overlapping,
917     search_standard_anchored_overlapping_dfa_default,
918     AC_STANDARD_ANCHORED_OVERLAPPING,
919     Standard,
920     |b: &mut AhoCorasickBuilder| {
921         b.anchored(true).dfa(true);
922     }
923 );
924 testconfig!(
925     search_leftmost_first_anchored_nfa_default,
926     AC_LEFTMOST_FIRST_ANCHORED,
927     LeftmostFirst,
928     |b: &mut AhoCorasickBuilder| {
929         b.anchored(true);
930     }
931 );
932 testconfig!(
933     search_leftmost_first_anchored_dfa_default,
934     AC_LEFTMOST_FIRST_ANCHORED,
935     LeftmostFirst,
936     |b: &mut AhoCorasickBuilder| {
937         b.anchored(true).dfa(true);
938     }
939 );
940 testconfig!(
941     search_leftmost_longest_anchored_nfa_default,
942     AC_LEFTMOST_LONGEST_ANCHORED,
943     LeftmostLongest,
944     |b: &mut AhoCorasickBuilder| {
945         b.anchored(true);
946     }
947 );
948 testconfig!(
949     search_leftmost_longest_anchored_dfa_default,
950     AC_LEFTMOST_LONGEST_ANCHORED,
951     LeftmostLongest,
952     |b: &mut AhoCorasickBuilder| {
953         b.anchored(true).dfa(true);
954     }
955 );
956 
957 // And also write out the test combinations for ASCII case insensitivity.
958 testconfig!(
959     acasei_standard_nfa_default,
960     &[ASCII_CASE_INSENSITIVE],
961     Standard,
962     |b: &mut AhoCorasickBuilder| {
963         b.prefilter(false).ascii_case_insensitive(true);
964     }
965 );
966 testconfig!(
967     acasei_standard_dfa_default,
968     &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
969     Standard,
970     |b: &mut AhoCorasickBuilder| {
971         b.ascii_case_insensitive(true).dfa(true);
972     }
973 );
974 testconfig!(
975     overlapping,
976     acasei_standard_overlapping_nfa_default,
977     &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING],
978     Standard,
979     |b: &mut AhoCorasickBuilder| {
980         b.ascii_case_insensitive(true);
981     }
982 );
983 testconfig!(
984     overlapping,
985     acasei_standard_overlapping_dfa_default,
986     &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING],
987     Standard,
988     |b: &mut AhoCorasickBuilder| {
989         b.ascii_case_insensitive(true).dfa(true);
990     }
991 );
992 testconfig!(
993     acasei_leftmost_first_nfa_default,
994     &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
995     LeftmostFirst,
996     |b: &mut AhoCorasickBuilder| {
997         b.ascii_case_insensitive(true);
998     }
999 );
1000 testconfig!(
1001     acasei_leftmost_first_dfa_default,
1002     &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1003     LeftmostFirst,
1004     |b: &mut AhoCorasickBuilder| {
1005         b.ascii_case_insensitive(true).dfa(true);
1006     }
1007 );
1008 testconfig!(
1009     acasei_leftmost_longest_nfa_default,
1010     &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1011     LeftmostLongest,
1012     |b: &mut AhoCorasickBuilder| {
1013         b.ascii_case_insensitive(true);
1014     }
1015 );
1016 testconfig!(
1017     acasei_leftmost_longest_dfa_default,
1018     &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1019     LeftmostLongest,
1020     |b: &mut AhoCorasickBuilder| {
1021         b.ascii_case_insensitive(true).dfa(true);
1022     }
1023 );
1024 
run_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>( which: TestCollection, mut f: F, )1025 fn run_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>(
1026     which: TestCollection,
1027     mut f: F,
1028 ) {
1029     let get_match_triples =
1030         |matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
1031             matches
1032                 .into_iter()
1033                 .map(|m| (m.pattern(), m.start(), m.end()))
1034                 .collect()
1035         };
1036     for &tests in which {
1037         for test in tests {
1038             assert_eq!(
1039                 test.matches,
1040                 get_match_triples(f(&test)).as_slice(),
1041                 "test: {}, patterns: {:?}, haystack: {:?}",
1042                 test.name,
1043                 test.patterns,
1044                 test.haystack
1045             );
1046         }
1047     }
1048 }
1049 
1050 #[test]
search_tests_have_unique_names()1051 fn search_tests_have_unique_names() {
1052     let assert = |constname, tests: &[SearchTest]| {
1053         let mut seen = HashMap::new(); // map from test name to position
1054         for (i, test) in tests.iter().enumerate() {
1055             if !seen.contains_key(test.name) {
1056                 seen.insert(test.name, i);
1057             } else {
1058                 let last = seen[test.name];
1059                 panic!(
1060                     "{} tests have duplicate names at positions {} and {}",
1061                     constname, last, i
1062                 );
1063             }
1064         }
1065     };
1066     assert("BASICS", BASICS);
1067     assert("STANDARD", STANDARD);
1068     assert("LEFTMOST", LEFTMOST);
1069     assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
1070     assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
1071     assert("NON_OVERLAPPING", NON_OVERLAPPING);
1072     assert("OVERLAPPING", OVERLAPPING);
1073     assert("REGRESSION", REGRESSION);
1074 }
1075 
1076 #[test]
1077 #[should_panic]
stream_not_allowed_leftmost_first()1078 fn stream_not_allowed_leftmost_first() {
1079     let fsm = AhoCorasickBuilder::new()
1080         .match_kind(MatchKind::LeftmostFirst)
1081         .build(None::<String>);
1082     assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0);
1083 }
1084 
1085 #[test]
1086 #[should_panic]
stream_not_allowed_leftmost_longest()1087 fn stream_not_allowed_leftmost_longest() {
1088     let fsm = AhoCorasickBuilder::new()
1089         .match_kind(MatchKind::LeftmostLongest)
1090         .build(None::<String>);
1091     assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0);
1092 }
1093 
1094 #[test]
1095 #[should_panic]
overlapping_not_allowed_leftmost_first()1096 fn overlapping_not_allowed_leftmost_first() {
1097     let fsm = AhoCorasickBuilder::new()
1098         .match_kind(MatchKind::LeftmostFirst)
1099         .build(None::<String>);
1100     assert_eq!(fsm.find_overlapping_iter("").count(), 0);
1101 }
1102 
1103 #[test]
1104 #[should_panic]
overlapping_not_allowed_leftmost_longest()1105 fn overlapping_not_allowed_leftmost_longest() {
1106     let fsm = AhoCorasickBuilder::new()
1107         .match_kind(MatchKind::LeftmostLongest)
1108         .build(None::<String>);
1109     assert_eq!(fsm.find_overlapping_iter("").count(), 0);
1110 }
1111 
1112 #[test]
state_id_too_small()1113 fn state_id_too_small() {
1114     let mut patterns = vec![];
1115     for c1 in (b'a'..b'z').map(|b| b as char) {
1116         for c2 in (b'a'..b'z').map(|b| b as char) {
1117             for c3 in (b'a'..b'z').map(|b| b as char) {
1118                 patterns.push(format!("{}{}{}", c1, c2, c3));
1119             }
1120         }
1121     }
1122     let result =
1123         AhoCorasickBuilder::new().build_with_size::<u8, _, _>(&patterns);
1124     assert!(result.is_err());
1125 }
1126 
1127 // See: https://github.com/BurntSushi/aho-corasick/issues/44
1128 //
1129 // In short, this test ensures that enabling ASCII case insensitivity does not
1130 // visit an exponential number of states when filling in failure transitions.
1131 #[test]
regression_ascii_case_insensitive_no_exponential()1132 fn regression_ascii_case_insensitive_no_exponential() {
1133     let ac = AhoCorasickBuilder::new()
1134         .ascii_case_insensitive(true)
1135         .build(&["Tsubaki House-Triple Shot Vol01校花三姐妹"]);
1136     assert!(ac.find("").is_none());
1137 }
1138 
1139 // See: https://github.com/BurntSushi/aho-corasick/issues/53
1140 //
1141 // This test ensures that the rare byte prefilter works in a particular corner
1142 // case. In particular, the shift offset detected for '/' in the patterns below
1143 // was incorrect, leading to a false negative.
1144 #[test]
regression_rare_byte_prefilter()1145 fn regression_rare_byte_prefilter() {
1146     use crate::AhoCorasick;
1147 
1148     let ac = AhoCorasick::new_auto_configured(&["ab/j/", "x/"]);
1149     assert!(ac.is_match("ab/j/"));
1150 }
1151 
1152 #[test]
regression_case_insensitive_prefilter()1153 fn regression_case_insensitive_prefilter() {
1154     use crate::AhoCorasickBuilder;
1155 
1156     for c in b'a'..b'z' {
1157         for c2 in b'a'..b'z' {
1158             let c = c as char;
1159             let c2 = c2 as char;
1160             let needle = format!("{}{}", c, c2).to_lowercase();
1161             let haystack = needle.to_uppercase();
1162             let ac = AhoCorasickBuilder::new()
1163                 .ascii_case_insensitive(true)
1164                 .prefilter(true)
1165                 .build(&[&needle]);
1166             assert_eq!(
1167                 1,
1168                 ac.find_iter(&haystack).count(),
1169                 "failed to find {:?} in {:?}\n\nautomaton:\n{:?}",
1170                 needle,
1171                 haystack,
1172                 ac,
1173             );
1174         }
1175     }
1176 }
1177 
1178 // See: https://github.com/BurntSushi/aho-corasick/issues/64
1179 //
1180 // This occurs when the rare byte prefilter is active.
1181 #[test]
regression_stream_rare_byte_prefilter()1182 fn regression_stream_rare_byte_prefilter() {
1183     use std::io::Read;
1184 
1185     // NOTE: The test only fails if this ends with j.
1186     const MAGIC: [u8; 5] = *b"1234j";
1187 
1188     // NOTE: The test fails for value in 8188..=8191 These value put the string
1189     // to search accross two call to read because the buffer size is 8192 by
1190     // default.
1191     const BEGIN: usize = 8191;
1192 
1193     /// This is just a structure that implements Reader. The reader
1194     /// implementation will simulate a file filled with 0, except for the MAGIC
1195     /// string at offset BEGIN.
1196     #[derive(Default)]
1197     struct R {
1198         read: usize,
1199     }
1200 
1201     impl Read for R {
1202         fn read(&mut self, buf: &mut [u8]) -> ::std::io::Result<usize> {
1203             //dbg!(buf.len());
1204             if self.read > 100000 {
1205                 return Ok(0);
1206             }
1207             let mut from = 0;
1208             if self.read < BEGIN {
1209                 from = buf.len().min(BEGIN - self.read);
1210                 for x in 0..from {
1211                     buf[x] = 0;
1212                 }
1213                 self.read += from;
1214             }
1215             if self.read >= BEGIN && self.read <= BEGIN + MAGIC.len() {
1216                 let to = buf.len().min(BEGIN + MAGIC.len() - self.read + from);
1217                 if to > from {
1218                     buf[from..to].copy_from_slice(
1219                         &MAGIC
1220                             [self.read - BEGIN..self.read - BEGIN + to - from],
1221                     );
1222                     self.read += to - from;
1223                     from = to;
1224                 }
1225             }
1226             for x in from..buf.len() {
1227                 buf[x] = 0;
1228                 self.read += 1;
1229             }
1230             Ok(buf.len())
1231         }
1232     }
1233 
1234     fn run() -> ::std::io::Result<()> {
1235         let aut = AhoCorasickBuilder::new().build(&[&MAGIC]);
1236 
1237         // While reading from a vector, it works:
1238         let mut buf = vec![];
1239         R::default().read_to_end(&mut buf)?;
1240         let from_whole = aut.find_iter(&buf).next().unwrap().start();
1241 
1242         //But using stream_find_iter fails!
1243         let mut file = R::default();
1244         let begin = aut
1245             .stream_find_iter(&mut file)
1246             .next()
1247             .expect("NOT FOUND!!!!")? // Panic here
1248             .start();
1249         assert_eq!(from_whole, begin);
1250         Ok(())
1251     }
1252 
1253     run().unwrap()
1254 }
1255