1 use std::collections::HashMap;
2 use std::io;
3 use std::usize;
4
5 use crate::{AhoCorasickBuilder, Match, MatchKind};
6
7 /// A description of a single test against an Aho-Corasick automaton.
8 ///
9 /// A single test may not necessarily pass on every configuration of an
10 /// Aho-Corasick automaton. The tests are categorized and grouped appropriately
11 /// below.
12 #[derive(Clone, Debug, Eq, PartialEq)]
13 struct SearchTest {
14 /// The name of this test, for debugging.
15 name: &'static str,
16 /// The patterns to search for.
17 patterns: &'static [&'static str],
18 /// The text to search.
19 haystack: &'static str,
20 /// Each match is a triple of (pattern_index, start, end), where
21 /// pattern_index is an index into `patterns` and `start`/`end` are indices
22 /// into `haystack`.
23 matches: &'static [(usize, usize, usize)],
24 }
25
26 /// Short-hand constructor for SearchTest. We use it a lot below.
27 macro_rules! t {
28 ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
29 SearchTest {
30 name: stringify!($name),
31 patterns: $patterns,
32 haystack: $haystack,
33 matches: $matches,
34 }
35 };
36 }
37
38 /// A collection of test groups.
39 type TestCollection = &'static [&'static [SearchTest]];
40
41 // Define several collections corresponding to the different type of match
42 // semantics supported by Aho-Corasick. These collections have some overlap,
43 // but each collection should have some tests that no other collection has.
44
45 /// Tests for Aho-Corasick's standard non-overlapping match semantics.
46 const AC_STANDARD_NON_OVERLAPPING: TestCollection =
47 &[BASICS, NON_OVERLAPPING, STANDARD, REGRESSION];
48
49 /// Tests for Aho-Corasick's anchored standard non-overlapping match semantics.
50 const AC_STANDARD_ANCHORED_NON_OVERLAPPING: TestCollection =
51 &[ANCHORED_BASICS, ANCHORED_NON_OVERLAPPING, STANDARD_ANCHORED];
52
53 /// Tests for Aho-Corasick's standard overlapping match semantics.
54 const AC_STANDARD_OVERLAPPING: TestCollection =
55 &[BASICS, OVERLAPPING, REGRESSION];
56
57 /// Tests for Aho-Corasick's anchored standard overlapping match semantics.
58 const AC_STANDARD_ANCHORED_OVERLAPPING: TestCollection =
59 &[ANCHORED_BASICS, ANCHORED_OVERLAPPING];
60
61 /// Tests for Aho-Corasick's leftmost-first match semantics.
62 const AC_LEFTMOST_FIRST: TestCollection =
63 &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION];
64
65 /// Tests for Aho-Corasick's anchored leftmost-first match semantics.
66 const AC_LEFTMOST_FIRST_ANCHORED: TestCollection = &[
67 ANCHORED_BASICS,
68 ANCHORED_NON_OVERLAPPING,
69 ANCHORED_LEFTMOST,
70 ANCHORED_LEFTMOST_FIRST,
71 ];
72
73 /// Tests for Aho-Corasick's leftmost-longest match semantics.
74 const AC_LEFTMOST_LONGEST: TestCollection =
75 &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION];
76
77 /// Tests for Aho-Corasick's anchored leftmost-longest match semantics.
78 const AC_LEFTMOST_LONGEST_ANCHORED: TestCollection = &[
79 ANCHORED_BASICS,
80 ANCHORED_NON_OVERLAPPING,
81 ANCHORED_LEFTMOST,
82 ANCHORED_LEFTMOST_LONGEST,
83 ];
84
85 // Now define the individual tests that make up the collections above.
86
87 /// A collection of tests for the Aho-Corasick algorithm that should always be
88 /// true regardless of match semantics. That is, all combinations of
89 /// leftmost-{shortest, first, longest} x {overlapping, non-overlapping}
90 /// should produce the same answer.
91 const BASICS: &'static [SearchTest] = &[
92 t!(basic000, &[], "", &[]),
93 t!(basic001, &["a"], "", &[]),
94 t!(basic010, &["a"], "a", &[(0, 0, 1)]),
95 t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
96 t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
97 t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
98 t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
99 t!(basic060, &["a"], "bbb", &[]),
100 t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
101 t!(basic100, &["aa"], "", &[]),
102 t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
103 t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
104 t!(basic130, &["aa"], "abbab", &[]),
105 t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
106 t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
107 t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
108 t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
109 t!(basic300, &["a", "b"], "", &[]),
110 t!(basic310, &["a", "b"], "z", &[]),
111 t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
112 t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
113 t!(
114 basic340,
115 &["a", "b"],
116 "abba",
117 &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
118 ),
119 t!(
120 basic350,
121 &["b", "a"],
122 "abba",
123 &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
124 ),
125 t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
126 t!(basic400, &["foo", "bar"], "", &[]),
127 t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
128 t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
129 t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
130 t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
131 t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
132 t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
133 t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
134 t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
135 t!(basic600, &[""], "", &[(0, 0, 0)]),
136 t!(basic610, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
137 t!(basic620, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]),
138 t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
139 t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
140 t!(
141 basic720,
142 &["yabcdef", "bcdeyabc", "abcdezghi"],
143 "yabcdezghi",
144 &[(2, 1, 10),]
145 ),
146 ];
147
148 /// A collection of *anchored* tests for the Aho-Corasick algorithm that should
149 /// always be true regardless of match semantics. That is, all combinations of
150 /// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} should
151 /// produce the same answer.
152 const ANCHORED_BASICS: &'static [SearchTest] = &[
153 t!(abasic000, &[], "", &[]),
154 t!(abasic010, &[""], "", &[(0, 0, 0)]),
155 t!(abasic020, &[""], "a", &[(0, 0, 0)]),
156 t!(abasic030, &[""], "abc", &[(0, 0, 0)]),
157 t!(abasic100, &["a"], "a", &[(0, 0, 1)]),
158 t!(abasic110, &["a"], "aa", &[(0, 0, 1)]),
159 t!(abasic120, &["a", "b"], "ab", &[(0, 0, 1)]),
160 t!(abasic130, &["a", "b"], "ba", &[(1, 0, 1)]),
161 t!(abasic140, &["foo", "foofoo"], "foo", &[(0, 0, 3)]),
162 t!(abasic150, &["foofoo", "foo"], "foo", &[(1, 0, 3)]),
163 ];
164
165 /// Tests for non-overlapping standard match semantics.
166 ///
167 /// These tests generally shouldn't pass for leftmost-{first,longest}, although
168 /// some do in order to write clearer tests. For example, standard000 will
169 /// pass with leftmost-first semantics, but standard010 will not. We write
170 /// both to emphasize how the match semantics work.
171 const STANDARD: &'static [SearchTest] = &[
172 t!(standard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
173 t!(standard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]),
174 t!(standard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
175 t!(standard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
176 t!(standard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
177 t!(
178 standard400,
179 &["abcd", "bcd", "cd", "b"],
180 "abcd",
181 &[(3, 1, 2), (2, 2, 4),]
182 ),
183 t!(standard410, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
184 t!(standard420, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2),]),
185 t!(standard430, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
186 t!(standard440, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1),]),
187 t!(standard450, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
188 ];
189
190 /// Like STANDARD, but for anchored searches.
191 const STANDARD_ANCHORED: &'static [SearchTest] = &[
192 t!(astandard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
193 t!(astandard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]),
194 t!(astandard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
195 t!(astandard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
196 t!(astandard040, &["a", ""], "a", &[(1, 0, 0)]),
197 t!(astandard050, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
198 t!(astandard410, &["", "a"], "a", &[(0, 0, 0)]),
199 t!(astandard420, &["", "a"], "aa", &[(0, 0, 0)]),
200 t!(astandard430, &["", "a", ""], "a", &[(0, 0, 0)]),
201 t!(astandard440, &["a", "", ""], "a", &[(1, 0, 0)]),
202 t!(astandard450, &["", "", "a"], "a", &[(0, 0, 0)]),
203 ];
204
205 /// Tests for non-overlapping leftmost match semantics. These should pass for
206 /// both leftmost-first and leftmost-longest match kinds. Stated differently,
207 /// among ambiguous matches, the longest match and the match that appeared
208 /// first when constructing the automaton should always be the same.
209 const LEFTMOST: &'static [SearchTest] = &[
210 t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
211 t!(leftmost010, &["a", ""], "a", &[(0, 0, 1), (1, 1, 1)]),
212 t!(leftmost020, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
213 t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
214 t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
215 t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
216 t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
217 t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
218 t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
219 t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
220 t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
221 t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
222 t!(
223 leftmost360,
224 &["abcdefghi", "hz", "abcdefgh"],
225 "abcdefghz",
226 &[(2, 0, 8),]
227 ),
228 t!(
229 leftmost370,
230 &["abcdefghi", "cde", "hz", "abcdefgh"],
231 "abcdefghz",
232 &[(3, 0, 8),]
233 ),
234 t!(
235 leftmost380,
236 &["abcdefghi", "hz", "abcdefgh", "a"],
237 "abcdefghz",
238 &[(2, 0, 8),]
239 ),
240 t!(
241 leftmost390,
242 &["b", "abcdefghi", "hz", "abcdefgh"],
243 "abcdefghz",
244 &[(3, 0, 8),]
245 ),
246 t!(
247 leftmost400,
248 &["h", "abcdefghi", "hz", "abcdefgh"],
249 "abcdefghz",
250 &[(3, 0, 8),]
251 ),
252 t!(
253 leftmost410,
254 &["z", "abcdefghi", "hz", "abcdefgh"],
255 "abcdefghz",
256 &[(3, 0, 8), (0, 8, 9),]
257 ),
258 ];
259
260 /// Like LEFTMOST, but for anchored searches.
261 const ANCHORED_LEFTMOST: &'static [SearchTest] = &[
262 t!(aleftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
263 t!(aleftmost010, &["a", ""], "a", &[(0, 0, 1)]),
264 t!(aleftmost020, &["", ""], "a", &[(0, 0, 0)]),
265 t!(aleftmost030, &["a", "ab"], "aa", &[(0, 0, 1)]),
266 t!(aleftmost031, &["ab", "a"], "aa", &[(1, 0, 1)]),
267 t!(aleftmost032, &["ab", "a"], "xayabbbz", &[]),
268 t!(aleftmost300, &["abcd", "bce", "b"], "abce", &[]),
269 t!(aleftmost310, &["abcd", "ce", "bc"], "abce", &[]),
270 t!(aleftmost320, &["abcd", "bce", "ce", "b"], "abce", &[]),
271 t!(aleftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[]),
272 t!(aleftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
273 t!(aleftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
274 t!(
275 aleftmost360,
276 &["abcdefghi", "hz", "abcdefgh"],
277 "abcdefghz",
278 &[(2, 0, 8),]
279 ),
280 t!(
281 aleftmost370,
282 &["abcdefghi", "cde", "hz", "abcdefgh"],
283 "abcdefghz",
284 &[(3, 0, 8),]
285 ),
286 t!(
287 aleftmost380,
288 &["abcdefghi", "hz", "abcdefgh", "a"],
289 "abcdefghz",
290 &[(2, 0, 8),]
291 ),
292 t!(
293 aleftmost390,
294 &["b", "abcdefghi", "hz", "abcdefgh"],
295 "abcdefghz",
296 &[(3, 0, 8),]
297 ),
298 t!(
299 aleftmost400,
300 &["h", "abcdefghi", "hz", "abcdefgh"],
301 "abcdefghz",
302 &[(3, 0, 8),]
303 ),
304 t!(
305 aleftmost410,
306 &["z", "abcdefghi", "hz", "abcdefgh"],
307 "abcdefghz",
308 &[(3, 0, 8)]
309 ),
310 ];
311
312 /// Tests for non-overlapping leftmost-first match semantics. These tests
313 /// should generally be specific to leftmost-first, which means they should
314 /// generally fail under leftmost-longest semantics.
315 const LEFTMOST_FIRST: &'static [SearchTest] = &[
316 t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
317 t!(leftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
318 t!(leftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
319 t!(leftfirst012, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]),
320 t!(leftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
321 t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
322 t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
323 t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
324 t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
325 t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
326 t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
327 t!(
328 leftfirst310,
329 &["abcd", "b", "bce", "ce"],
330 "abce",
331 &[(1, 1, 2), (3, 2, 4),]
332 ),
333 t!(
334 leftfirst320,
335 &["a", "abcdefghi", "hz", "abcdefgh"],
336 "abcdefghz",
337 &[(0, 0, 1), (2, 7, 9),]
338 ),
339 t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
340 t!(leftfirst400, &["amwix", "samwise", "sam"], "Zsamwix", &[(2, 1, 4)]),
341 ];
342
343 /// Like LEFTMOST_FIRST, but for anchored searches.
344 const ANCHORED_LEFTMOST_FIRST: &'static [SearchTest] = &[
345 t!(aleftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
346 t!(aleftfirst010, &["", "a"], "a", &[(0, 0, 0)]),
347 t!(aleftfirst011, &["", "a", ""], "a", &[(0, 0, 0)]),
348 t!(aleftfirst012, &["a", "", ""], "a", &[(0, 0, 1)]),
349 t!(aleftfirst013, &["", "", "a"], "a", &[(0, 0, 0)]),
350 t!(aleftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
351 t!(aleftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
352 t!(aleftfirst040, &["a", "ab"], "xayabbbz", &[]),
353 t!(aleftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]),
354 t!(aleftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]),
355 t!(aleftfirst300, &["abcd", "b", "bce"], "abce", &[]),
356 t!(aleftfirst310, &["abcd", "b", "bce", "ce"], "abce", &[]),
357 t!(
358 aleftfirst320,
359 &["a", "abcdefghi", "hz", "abcdefgh"],
360 "abcdefghz",
361 &[(0, 0, 1)]
362 ),
363 t!(aleftfirst330, &["a", "abab"], "abab", &[(0, 0, 1)]),
364 t!(aleftfirst400, &["wise", "samwise", "sam"], "samwix", &[(2, 0, 3)]),
365 ];
366
367 /// Tests for non-overlapping leftmost-longest match semantics. These tests
368 /// should generally be specific to leftmost-longest, which means they should
369 /// generally fail under leftmost-first semantics.
370 const LEFTMOST_LONGEST: &'static [SearchTest] = &[
371 t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
372 t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
373 t!(leftlong020, &["", "a"], "a", &[(1, 0, 1), (0, 1, 1),]),
374 t!(leftlong021, &["", "a", ""], "a", &[(1, 0, 1), (0, 1, 1),]),
375 t!(leftlong022, &["a", "", ""], "a", &[(0, 0, 1), (1, 1, 1),]),
376 t!(leftlong023, &["", "", "a"], "a", &[(2, 0, 1), (0, 1, 1),]),
377 t!(leftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2), (0, 2, 2),]),
378 t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
379 t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
380 t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
381 t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
382 t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
383 t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
384 t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
385 t!(
386 leftlong310,
387 &["a", "abcdefghi", "hz", "abcdefgh"],
388 "abcdefghz",
389 &[(3, 0, 8),]
390 ),
391 t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
392 t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
393 t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
394 ];
395
396 /// Like LEFTMOST_LONGEST, but for anchored searches.
397 const ANCHORED_LEFTMOST_LONGEST: &'static [SearchTest] = &[
398 t!(aleftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
399 t!(aleftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
400 t!(aleftlong020, &["", "a"], "a", &[(1, 0, 1)]),
401 t!(aleftlong021, &["", "a", ""], "a", &[(1, 0, 1)]),
402 t!(aleftlong022, &["a", "", ""], "a", &[(0, 0, 1)]),
403 t!(aleftlong023, &["", "", "a"], "a", &[(2, 0, 1)]),
404 t!(aleftlong030, &["", "a"], "aa", &[(1, 0, 1)]),
405 t!(aleftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
406 t!(aleftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
407 t!(aleftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
408 t!(aleftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
409 t!(aleftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]),
410 t!(aleftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]),
411 t!(aleftlong300, &["abcd", "b", "bce"], "abce", &[]),
412 t!(
413 aleftlong310,
414 &["a", "abcdefghi", "hz", "abcdefgh"],
415 "abcdefghz",
416 &[(3, 0, 8),]
417 ),
418 t!(aleftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
419 t!(aleftlong330, &["abcd", "b", "ce"], "abce", &[]),
420 t!(aleftlong340, &["a", "ab"], "xayabbbz", &[]),
421 ];
422
423 /// Tests for non-overlapping match semantics.
424 ///
425 /// Generally these tests shouldn't pass when using overlapping semantics.
426 /// These should pass for both standard and leftmost match semantics.
427 const NON_OVERLAPPING: &'static [SearchTest] = &[
428 t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
429 t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
430 t!(nover030, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
431 t!(
432 nover100,
433 &["ab", "ba"],
434 "abababa",
435 &[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
436 ),
437 t!(nover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
438 t!(nover300, &["", ""], "", &[(0, 0, 0),]),
439 t!(nover310, &["", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
440 ];
441
442 /// Like NON_OVERLAPPING, but for anchored searches.
443 const ANCHORED_NON_OVERLAPPING: &'static [SearchTest] = &[
444 t!(anover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
445 t!(anover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
446 t!(anover030, &["abc", "bc"], "zazabcz", &[]),
447 t!(anover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]),
448 t!(anover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3)]),
449 t!(anover300, &["", ""], "", &[(0, 0, 0),]),
450 t!(anover310, &["", ""], "a", &[(0, 0, 0)]),
451 ];
452
453 /// Tests for overlapping match semantics.
454 ///
455 /// This only supports standard match semantics, since leftmost-{first,longest}
456 /// do not support overlapping matches.
457 const OVERLAPPING: &'static [SearchTest] = &[
458 t!(
459 over000,
460 &["abcd", "bcd", "cd", "b"],
461 "abcd",
462 &[(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),]
463 ),
464 t!(
465 over010,
466 &["bcd", "cd", "b", "abcd"],
467 "abcd",
468 &[(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),]
469 ),
470 t!(
471 over020,
472 &["abcd", "bcd", "cd"],
473 "abcd",
474 &[(0, 0, 4), (1, 1, 4), (2, 2, 4),]
475 ),
476 t!(
477 over030,
478 &["bcd", "abcd", "cd"],
479 "abcd",
480 &[(1, 0, 4), (0, 1, 4), (2, 2, 4),]
481 ),
482 t!(
483 over040,
484 &["bcd", "cd", "abcd"],
485 "abcd",
486 &[(2, 0, 4), (0, 1, 4), (1, 2, 4),]
487 ),
488 t!(over050, &["abc", "bc"], "zazabcz", &[(0, 3, 6), (1, 4, 6),]),
489 t!(
490 over100,
491 &["ab", "ba"],
492 "abababa",
493 &[(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),]
494 ),
495 t!(
496 over200,
497 &["foo", "foo"],
498 "foobarfoo",
499 &[(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),]
500 ),
501 t!(over300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]),
502 t!(
503 over310,
504 &["", ""],
505 "a",
506 &[(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),]
507 ),
508 t!(over320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1), (0, 1, 1),]),
509 t!(
510 over330,
511 &["", "a", ""],
512 "a",
513 &[(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),]
514 ),
515 t!(
516 over340,
517 &["a", "", ""],
518 "a",
519 &[(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),]
520 ),
521 t!(
522 over350,
523 &["", "", "a"],
524 "a",
525 &[(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),]
526 ),
527 t!(
528 over360,
529 &["foo", "foofoo"],
530 "foofoo",
531 &[(0, 0, 3), (1, 0, 6), (0, 3, 6)]
532 ),
533 ];
534
535 /// Like OVERLAPPING, but for anchored searches.
536 const ANCHORED_OVERLAPPING: &'static [SearchTest] = &[
537 t!(aover000, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
538 t!(aover010, &["bcd", "cd", "b", "abcd"], "abcd", &[(3, 0, 4)]),
539 t!(aover020, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4)]),
540 t!(aover030, &["bcd", "abcd", "cd"], "abcd", &[(1, 0, 4)]),
541 t!(aover040, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4)]),
542 t!(aover050, &["abc", "bc"], "zazabcz", &[]),
543 t!(aover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]),
544 t!(aover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (1, 0, 3)]),
545 t!(aover300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]),
546 t!(aover310, &["", ""], "a", &[(0, 0, 0), (1, 0, 0)]),
547 t!(aover320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1)]),
548 t!(aover330, &["", "a", ""], "a", &[(0, 0, 0), (2, 0, 0), (1, 0, 1)]),
549 t!(aover340, &["a", "", ""], "a", &[(1, 0, 0), (2, 0, 0), (0, 0, 1)]),
550 t!(aover350, &["", "", "a"], "a", &[(0, 0, 0), (1, 0, 0), (2, 0, 1)]),
551 t!(aover360, &["foo", "foofoo"], "foofoo", &[(0, 0, 3), (1, 0, 6)]),
552 ];
553
554 /// Tests for ASCII case insensitivity.
555 ///
556 /// These tests should all have the same behavior regardless of match semantics
557 /// or whether the search is overlapping.
558 const ASCII_CASE_INSENSITIVE: &'static [SearchTest] = &[
559 t!(acasei000, &["a"], "A", &[(0, 0, 1)]),
560 t!(acasei010, &["Samwise"], "SAMWISE", &[(0, 0, 7)]),
561 t!(acasei011, &["Samwise"], "SAMWISE.abcd", &[(0, 0, 7)]),
562 t!(acasei020, &["fOoBaR"], "quux foobar baz", &[(0, 5, 11)]),
563 ];
564
565 /// Like ASCII_CASE_INSENSITIVE, but specifically for non-overlapping tests.
566 const ASCII_CASE_INSENSITIVE_NON_OVERLAPPING: &'static [SearchTest] = &[
567 t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3)]),
568 t!(acasei000, &["FOO", "foo"], "fOo", &[(0, 0, 3)]),
569 t!(acasei010, &["abc", "def"], "abcdef", &[(0, 0, 3), (1, 3, 6)]),
570 ];
571
572 /// Like ASCII_CASE_INSENSITIVE, but specifically for overlapping tests.
573 const ASCII_CASE_INSENSITIVE_OVERLAPPING: &'static [SearchTest] = &[
574 t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3), (1, 0, 3)]),
575 t!(acasei001, &["FOO", "foo"], "fOo", &[(0, 0, 3), (1, 0, 3)]),
576 // This is a regression test from:
577 // https://github.com/BurntSushi/aho-corasick/issues/68
578 // Previously, it was reporting a duplicate (1, 3, 6) match.
579 t!(
580 acasei010,
581 &["abc", "def", "abcdef"],
582 "abcdef",
583 &[(0, 0, 3), (2, 0, 6), (1, 3, 6)]
584 ),
585 ];
586
587 /// Regression tests that are applied to all Aho-Corasick combinations.
588 ///
589 /// If regression tests are needed for specific match semantics, then add them
590 /// to the appropriate group above.
591 const REGRESSION: &'static [SearchTest] = &[
592 t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
593 t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
594 t!(
595 regression030,
596 &["libcore/", "libstd/"],
597 "libcore/char/methods.rs",
598 &[(0, 0, 8),]
599 ),
600 t!(
601 regression040,
602 &["libstd/", "libcore/"],
603 "libcore/char/methods.rs",
604 &[(1, 0, 8),]
605 ),
606 t!(
607 regression050,
608 &["\x00\x00\x01", "\x00\x00\x00"],
609 "\x00\x00\x00",
610 &[(1, 0, 3),]
611 ),
612 t!(
613 regression060,
614 &["\x00\x00\x00", "\x00\x00\x01"],
615 "\x00\x00\x00",
616 &[(0, 0, 3),]
617 ),
618 ];
619
620 // Now define a test for each combination of things above that we want to run.
621 // Since there are a few different combinations for each collection of tests,
622 // we define a couple of macros to avoid repetition drudgery. The testconfig
623 // macro constructs the automaton from a given match kind, and runs the search
624 // tests one-by-one over the given collection. The `with` parameter allows one
625 // to configure the builder with additional parameters. The testcombo macro
626 // invokes testconfig in precisely this way: it sets up several tests where
627 // each one turns a different knob on AhoCorasickBuilder.
628
629 macro_rules! testconfig {
630 (overlapping, $name:ident, $collection:expr, $kind:ident, $with:expr) => {
631 #[test]
632 fn $name() {
633 run_search_tests($collection, |test| {
634 let mut builder = AhoCorasickBuilder::new();
635 $with(&mut builder);
636 builder
637 .match_kind(MatchKind::$kind)
638 .build(test.patterns)
639 .find_overlapping_iter(test.haystack)
640 .collect()
641 });
642 }
643 };
644 (stream, $name:ident, $collection:expr, $kind:ident, $with:expr) => {
645 #[test]
646 fn $name() {
647 run_search_tests($collection, |test| {
648 let buf =
649 io::BufReader::with_capacity(1, test.haystack.as_bytes());
650 let mut builder = AhoCorasickBuilder::new();
651 $with(&mut builder);
652 builder
653 .match_kind(MatchKind::$kind)
654 .build(test.patterns)
655 .stream_find_iter(buf)
656 .map(|result| result.unwrap())
657 .collect()
658 });
659 }
660 };
661 ($name:ident, $collection:expr, $kind:ident, $with:expr) => {
662 #[test]
663 fn $name() {
664 run_search_tests($collection, |test| {
665 let mut builder = AhoCorasickBuilder::new();
666 $with(&mut builder);
667 builder
668 .match_kind(MatchKind::$kind)
669 .build(test.patterns)
670 .find_iter(test.haystack)
671 .collect()
672 });
673 }
674 };
675 }
676
677 macro_rules! testcombo {
678 ($name:ident, $collection:expr, $kind:ident) => {
679 mod $name {
680 use super::*;
681
682 testconfig!(nfa_default, $collection, $kind, |_| ());
683 testconfig!(
684 nfa_no_prefilter,
685 $collection,
686 $kind,
687 |b: &mut AhoCorasickBuilder| {
688 b.prefilter(false);
689 }
690 );
691 testconfig!(
692 nfa_all_sparse,
693 $collection,
694 $kind,
695 |b: &mut AhoCorasickBuilder| {
696 b.dense_depth(0);
697 }
698 );
699 testconfig!(
700 nfa_all_dense,
701 $collection,
702 $kind,
703 |b: &mut AhoCorasickBuilder| {
704 b.dense_depth(usize::MAX);
705 }
706 );
707 testconfig!(
708 dfa_default,
709 $collection,
710 $kind,
711 |b: &mut AhoCorasickBuilder| {
712 b.dfa(true);
713 }
714 );
715 testconfig!(
716 dfa_no_prefilter,
717 $collection,
718 $kind,
719 |b: &mut AhoCorasickBuilder| {
720 b.dfa(true).prefilter(false);
721 }
722 );
723 testconfig!(
724 dfa_all_sparse,
725 $collection,
726 $kind,
727 |b: &mut AhoCorasickBuilder| {
728 b.dfa(true).dense_depth(0);
729 }
730 );
731 testconfig!(
732 dfa_all_dense,
733 $collection,
734 $kind,
735 |b: &mut AhoCorasickBuilder| {
736 b.dfa(true).dense_depth(usize::MAX);
737 }
738 );
739 testconfig!(
740 dfa_no_byte_class,
741 $collection,
742 $kind,
743 |b: &mut AhoCorasickBuilder| {
744 // TODO: remove tests when option is removed.
745 #[allow(deprecated)]
746 b.dfa(true).byte_classes(false);
747 }
748 );
749 testconfig!(
750 dfa_no_premultiply,
751 $collection,
752 $kind,
753 |b: &mut AhoCorasickBuilder| {
754 // TODO: remove tests when option is removed.
755 #[allow(deprecated)]
756 b.dfa(true).premultiply(false);
757 }
758 );
759 testconfig!(
760 dfa_no_byte_class_no_premultiply,
761 $collection,
762 $kind,
763 |b: &mut AhoCorasickBuilder| {
764 // TODO: remove tests when options are removed.
765 #[allow(deprecated)]
766 b.dfa(true).byte_classes(false).premultiply(false);
767 }
768 );
769 }
770 };
771 }
772
773 // Write out the combinations.
774 testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest);
775 testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst);
776 testcombo!(
777 search_standard_nonoverlapping,
778 AC_STANDARD_NON_OVERLAPPING,
779 Standard
780 );
781
782 // Write out the overlapping combo by hand since there is only one of them.
783 testconfig!(
784 overlapping,
785 search_standard_overlapping_nfa_default,
786 AC_STANDARD_OVERLAPPING,
787 Standard,
788 |_| ()
789 );
790 testconfig!(
791 overlapping,
792 search_standard_overlapping_nfa_all_sparse,
793 AC_STANDARD_OVERLAPPING,
794 Standard,
795 |b: &mut AhoCorasickBuilder| {
796 b.dense_depth(0);
797 }
798 );
799 testconfig!(
800 overlapping,
801 search_standard_overlapping_nfa_all_dense,
802 AC_STANDARD_OVERLAPPING,
803 Standard,
804 |b: &mut AhoCorasickBuilder| {
805 b.dense_depth(usize::MAX);
806 }
807 );
808 testconfig!(
809 overlapping,
810 search_standard_overlapping_dfa_default,
811 AC_STANDARD_OVERLAPPING,
812 Standard,
813 |b: &mut AhoCorasickBuilder| {
814 b.dfa(true);
815 }
816 );
817 testconfig!(
818 overlapping,
819 search_standard_overlapping_dfa_all_sparse,
820 AC_STANDARD_OVERLAPPING,
821 Standard,
822 |b: &mut AhoCorasickBuilder| {
823 b.dfa(true).dense_depth(0);
824 }
825 );
826 testconfig!(
827 overlapping,
828 search_standard_overlapping_dfa_all_dense,
829 AC_STANDARD_OVERLAPPING,
830 Standard,
831 |b: &mut AhoCorasickBuilder| {
832 b.dfa(true).dense_depth(usize::MAX);
833 }
834 );
835 testconfig!(
836 overlapping,
837 search_standard_overlapping_dfa_no_byte_class,
838 AC_STANDARD_OVERLAPPING,
839 Standard,
840 |b: &mut AhoCorasickBuilder| {
841 // TODO: remove tests when option is removed.
842 #[allow(deprecated)]
843 b.dfa(true).byte_classes(false);
844 }
845 );
846 testconfig!(
847 overlapping,
848 search_standard_overlapping_dfa_no_premultiply,
849 AC_STANDARD_OVERLAPPING,
850 Standard,
851 |b: &mut AhoCorasickBuilder| {
852 // TODO: remove tests when option is removed.
853 #[allow(deprecated)]
854 b.dfa(true).premultiply(false);
855 }
856 );
857 testconfig!(
858 overlapping,
859 search_standard_overlapping_dfa_no_byte_class_no_premultiply,
860 AC_STANDARD_OVERLAPPING,
861 Standard,
862 |b: &mut AhoCorasickBuilder| {
863 // TODO: remove tests when options are removed.
864 #[allow(deprecated)]
865 b.dfa(true).byte_classes(false).premultiply(false);
866 }
867 );
868
869 // Also write out tests manually for streams, since we only test the standard
870 // match semantics. We also don't bother testing different automaton
871 // configurations, since those are well covered by tests above.
872 testconfig!(
873 stream,
874 search_standard_stream_nfa_default,
875 AC_STANDARD_NON_OVERLAPPING,
876 Standard,
877 |_| ()
878 );
879 testconfig!(
880 stream,
881 search_standard_stream_dfa_default,
882 AC_STANDARD_NON_OVERLAPPING,
883 Standard,
884 |b: &mut AhoCorasickBuilder| {
885 b.dfa(true);
886 }
887 );
888
889 // Same thing for anchored searches. Write them out manually.
890 testconfig!(
891 search_standard_anchored_nfa_default,
892 AC_STANDARD_ANCHORED_NON_OVERLAPPING,
893 Standard,
894 |b: &mut AhoCorasickBuilder| {
895 b.anchored(true);
896 }
897 );
898 testconfig!(
899 search_standard_anchored_dfa_default,
900 AC_STANDARD_ANCHORED_NON_OVERLAPPING,
901 Standard,
902 |b: &mut AhoCorasickBuilder| {
903 b.anchored(true).dfa(true);
904 }
905 );
906 testconfig!(
907 overlapping,
908 search_standard_anchored_overlapping_nfa_default,
909 AC_STANDARD_ANCHORED_OVERLAPPING,
910 Standard,
911 |b: &mut AhoCorasickBuilder| {
912 b.anchored(true);
913 }
914 );
915 testconfig!(
916 overlapping,
917 search_standard_anchored_overlapping_dfa_default,
918 AC_STANDARD_ANCHORED_OVERLAPPING,
919 Standard,
920 |b: &mut AhoCorasickBuilder| {
921 b.anchored(true).dfa(true);
922 }
923 );
924 testconfig!(
925 search_leftmost_first_anchored_nfa_default,
926 AC_LEFTMOST_FIRST_ANCHORED,
927 LeftmostFirst,
928 |b: &mut AhoCorasickBuilder| {
929 b.anchored(true);
930 }
931 );
932 testconfig!(
933 search_leftmost_first_anchored_dfa_default,
934 AC_LEFTMOST_FIRST_ANCHORED,
935 LeftmostFirst,
936 |b: &mut AhoCorasickBuilder| {
937 b.anchored(true).dfa(true);
938 }
939 );
940 testconfig!(
941 search_leftmost_longest_anchored_nfa_default,
942 AC_LEFTMOST_LONGEST_ANCHORED,
943 LeftmostLongest,
944 |b: &mut AhoCorasickBuilder| {
945 b.anchored(true);
946 }
947 );
948 testconfig!(
949 search_leftmost_longest_anchored_dfa_default,
950 AC_LEFTMOST_LONGEST_ANCHORED,
951 LeftmostLongest,
952 |b: &mut AhoCorasickBuilder| {
953 b.anchored(true).dfa(true);
954 }
955 );
956
957 // And also write out the test combinations for ASCII case insensitivity.
958 testconfig!(
959 acasei_standard_nfa_default,
960 &[ASCII_CASE_INSENSITIVE],
961 Standard,
962 |b: &mut AhoCorasickBuilder| {
963 b.prefilter(false).ascii_case_insensitive(true);
964 }
965 );
966 testconfig!(
967 acasei_standard_dfa_default,
968 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
969 Standard,
970 |b: &mut AhoCorasickBuilder| {
971 b.ascii_case_insensitive(true).dfa(true);
972 }
973 );
974 testconfig!(
975 overlapping,
976 acasei_standard_overlapping_nfa_default,
977 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING],
978 Standard,
979 |b: &mut AhoCorasickBuilder| {
980 b.ascii_case_insensitive(true);
981 }
982 );
983 testconfig!(
984 overlapping,
985 acasei_standard_overlapping_dfa_default,
986 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING],
987 Standard,
988 |b: &mut AhoCorasickBuilder| {
989 b.ascii_case_insensitive(true).dfa(true);
990 }
991 );
992 testconfig!(
993 acasei_leftmost_first_nfa_default,
994 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
995 LeftmostFirst,
996 |b: &mut AhoCorasickBuilder| {
997 b.ascii_case_insensitive(true);
998 }
999 );
1000 testconfig!(
1001 acasei_leftmost_first_dfa_default,
1002 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1003 LeftmostFirst,
1004 |b: &mut AhoCorasickBuilder| {
1005 b.ascii_case_insensitive(true).dfa(true);
1006 }
1007 );
1008 testconfig!(
1009 acasei_leftmost_longest_nfa_default,
1010 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1011 LeftmostLongest,
1012 |b: &mut AhoCorasickBuilder| {
1013 b.ascii_case_insensitive(true);
1014 }
1015 );
1016 testconfig!(
1017 acasei_leftmost_longest_dfa_default,
1018 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1019 LeftmostLongest,
1020 |b: &mut AhoCorasickBuilder| {
1021 b.ascii_case_insensitive(true).dfa(true);
1022 }
1023 );
1024
run_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>( which: TestCollection, mut f: F, )1025 fn run_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>(
1026 which: TestCollection,
1027 mut f: F,
1028 ) {
1029 let get_match_triples =
1030 |matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
1031 matches
1032 .into_iter()
1033 .map(|m| (m.pattern(), m.start(), m.end()))
1034 .collect()
1035 };
1036 for &tests in which {
1037 for test in tests {
1038 assert_eq!(
1039 test.matches,
1040 get_match_triples(f(&test)).as_slice(),
1041 "test: {}, patterns: {:?}, haystack: {:?}",
1042 test.name,
1043 test.patterns,
1044 test.haystack
1045 );
1046 }
1047 }
1048 }
1049
1050 #[test]
search_tests_have_unique_names()1051 fn search_tests_have_unique_names() {
1052 let assert = |constname, tests: &[SearchTest]| {
1053 let mut seen = HashMap::new(); // map from test name to position
1054 for (i, test) in tests.iter().enumerate() {
1055 if !seen.contains_key(test.name) {
1056 seen.insert(test.name, i);
1057 } else {
1058 let last = seen[test.name];
1059 panic!(
1060 "{} tests have duplicate names at positions {} and {}",
1061 constname, last, i
1062 );
1063 }
1064 }
1065 };
1066 assert("BASICS", BASICS);
1067 assert("STANDARD", STANDARD);
1068 assert("LEFTMOST", LEFTMOST);
1069 assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
1070 assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
1071 assert("NON_OVERLAPPING", NON_OVERLAPPING);
1072 assert("OVERLAPPING", OVERLAPPING);
1073 assert("REGRESSION", REGRESSION);
1074 }
1075
1076 #[test]
1077 #[should_panic]
stream_not_allowed_leftmost_first()1078 fn stream_not_allowed_leftmost_first() {
1079 let fsm = AhoCorasickBuilder::new()
1080 .match_kind(MatchKind::LeftmostFirst)
1081 .build(None::<String>);
1082 assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0);
1083 }
1084
1085 #[test]
1086 #[should_panic]
stream_not_allowed_leftmost_longest()1087 fn stream_not_allowed_leftmost_longest() {
1088 let fsm = AhoCorasickBuilder::new()
1089 .match_kind(MatchKind::LeftmostLongest)
1090 .build(None::<String>);
1091 assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0);
1092 }
1093
1094 #[test]
1095 #[should_panic]
overlapping_not_allowed_leftmost_first()1096 fn overlapping_not_allowed_leftmost_first() {
1097 let fsm = AhoCorasickBuilder::new()
1098 .match_kind(MatchKind::LeftmostFirst)
1099 .build(None::<String>);
1100 assert_eq!(fsm.find_overlapping_iter("").count(), 0);
1101 }
1102
1103 #[test]
1104 #[should_panic]
overlapping_not_allowed_leftmost_longest()1105 fn overlapping_not_allowed_leftmost_longest() {
1106 let fsm = AhoCorasickBuilder::new()
1107 .match_kind(MatchKind::LeftmostLongest)
1108 .build(None::<String>);
1109 assert_eq!(fsm.find_overlapping_iter("").count(), 0);
1110 }
1111
1112 #[test]
state_id_too_small()1113 fn state_id_too_small() {
1114 let mut patterns = vec![];
1115 for c1 in (b'a'..b'z').map(|b| b as char) {
1116 for c2 in (b'a'..b'z').map(|b| b as char) {
1117 for c3 in (b'a'..b'z').map(|b| b as char) {
1118 patterns.push(format!("{}{}{}", c1, c2, c3));
1119 }
1120 }
1121 }
1122 let result =
1123 AhoCorasickBuilder::new().build_with_size::<u8, _, _>(&patterns);
1124 assert!(result.is_err());
1125 }
1126
1127 // See: https://github.com/BurntSushi/aho-corasick/issues/44
1128 //
1129 // In short, this test ensures that enabling ASCII case insensitivity does not
1130 // visit an exponential number of states when filling in failure transitions.
1131 #[test]
regression_ascii_case_insensitive_no_exponential()1132 fn regression_ascii_case_insensitive_no_exponential() {
1133 let ac = AhoCorasickBuilder::new()
1134 .ascii_case_insensitive(true)
1135 .build(&["Tsubaki House-Triple Shot Vol01校花三姐妹"]);
1136 assert!(ac.find("").is_none());
1137 }
1138
1139 // See: https://github.com/BurntSushi/aho-corasick/issues/53
1140 //
1141 // This test ensures that the rare byte prefilter works in a particular corner
1142 // case. In particular, the shift offset detected for '/' in the patterns below
1143 // was incorrect, leading to a false negative.
1144 #[test]
regression_rare_byte_prefilter()1145 fn regression_rare_byte_prefilter() {
1146 use crate::AhoCorasick;
1147
1148 let ac = AhoCorasick::new_auto_configured(&["ab/j/", "x/"]);
1149 assert!(ac.is_match("ab/j/"));
1150 }
1151
1152 #[test]
regression_case_insensitive_prefilter()1153 fn regression_case_insensitive_prefilter() {
1154 use crate::AhoCorasickBuilder;
1155
1156 for c in b'a'..b'z' {
1157 for c2 in b'a'..b'z' {
1158 let c = c as char;
1159 let c2 = c2 as char;
1160 let needle = format!("{}{}", c, c2).to_lowercase();
1161 let haystack = needle.to_uppercase();
1162 let ac = AhoCorasickBuilder::new()
1163 .ascii_case_insensitive(true)
1164 .prefilter(true)
1165 .build(&[&needle]);
1166 assert_eq!(
1167 1,
1168 ac.find_iter(&haystack).count(),
1169 "failed to find {:?} in {:?}\n\nautomaton:\n{:?}",
1170 needle,
1171 haystack,
1172 ac,
1173 );
1174 }
1175 }
1176 }
1177
1178 // See: https://github.com/BurntSushi/aho-corasick/issues/64
1179 //
1180 // This occurs when the rare byte prefilter is active.
1181 #[test]
regression_stream_rare_byte_prefilter()1182 fn regression_stream_rare_byte_prefilter() {
1183 use std::io::Read;
1184
1185 // NOTE: The test only fails if this ends with j.
1186 const MAGIC: [u8; 5] = *b"1234j";
1187
1188 // NOTE: The test fails for value in 8188..=8191 These value put the string
1189 // to search accross two call to read because the buffer size is 8192 by
1190 // default.
1191 const BEGIN: usize = 8191;
1192
1193 /// This is just a structure that implements Reader. The reader
1194 /// implementation will simulate a file filled with 0, except for the MAGIC
1195 /// string at offset BEGIN.
1196 #[derive(Default)]
1197 struct R {
1198 read: usize,
1199 }
1200
1201 impl Read for R {
1202 fn read(&mut self, buf: &mut [u8]) -> ::std::io::Result<usize> {
1203 //dbg!(buf.len());
1204 if self.read > 100000 {
1205 return Ok(0);
1206 }
1207 let mut from = 0;
1208 if self.read < BEGIN {
1209 from = buf.len().min(BEGIN - self.read);
1210 for x in 0..from {
1211 buf[x] = 0;
1212 }
1213 self.read += from;
1214 }
1215 if self.read >= BEGIN && self.read <= BEGIN + MAGIC.len() {
1216 let to = buf.len().min(BEGIN + MAGIC.len() - self.read + from);
1217 if to > from {
1218 buf[from..to].copy_from_slice(
1219 &MAGIC
1220 [self.read - BEGIN..self.read - BEGIN + to - from],
1221 );
1222 self.read += to - from;
1223 from = to;
1224 }
1225 }
1226 for x in from..buf.len() {
1227 buf[x] = 0;
1228 self.read += 1;
1229 }
1230 Ok(buf.len())
1231 }
1232 }
1233
1234 fn run() -> ::std::io::Result<()> {
1235 let aut = AhoCorasickBuilder::new().build(&[&MAGIC]);
1236
1237 // While reading from a vector, it works:
1238 let mut buf = vec![];
1239 R::default().read_to_end(&mut buf)?;
1240 let from_whole = aut.find_iter(&buf).next().unwrap().start();
1241
1242 //But using stream_find_iter fails!
1243 let mut file = R::default();
1244 let begin = aut
1245 .stream_find_iter(&mut file)
1246 .next()
1247 .expect("NOT FOUND!!!!")? // Panic here
1248 .start();
1249 assert_eq!(from_whole, begin);
1250 Ok(())
1251 }
1252
1253 run().unwrap()
1254 }
1255