• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use std::collections::HashMap;
2 use std::usize;
3 
4 use crate::packed::{Config, MatchKind};
5 use crate::Match;
6 
7 /// A description of a single test against a multi-pattern searcher.
8 ///
9 /// A single test may not necessarily pass on every configuration of a
10 /// searcher. The tests are categorized and grouped appropriately below.
11 #[derive(Clone, Debug, Eq, PartialEq)]
12 struct SearchTest {
13     /// The name of this test, for debugging.
14     name: &'static str,
15     /// The patterns to search for.
16     patterns: &'static [&'static str],
17     /// The text to search.
18     haystack: &'static str,
19     /// Each match is a triple of (pattern_index, start, end), where
20     /// pattern_index is an index into `patterns` and `start`/`end` are indices
21     /// into `haystack`.
22     matches: &'static [(usize, usize, usize)],
23 }
24 
25 struct SearchTestOwned {
26     offset: usize,
27     name: String,
28     patterns: Vec<String>,
29     haystack: String,
30     matches: Vec<(usize, usize, usize)>,
31 }
32 
33 impl SearchTest {
variations(&self) -> Vec<SearchTestOwned>34     fn variations(&self) -> Vec<SearchTestOwned> {
35         let mut tests = vec![];
36         for i in 0..=260 {
37             tests.push(self.offset_prefix(i));
38             tests.push(self.offset_suffix(i));
39             tests.push(self.offset_both(i));
40         }
41         tests
42     }
43 
offset_both(&self, off: usize) -> SearchTestOwned44     fn offset_both(&self, off: usize) -> SearchTestOwned {
45         SearchTestOwned {
46             offset: off,
47             name: self.name.to_string(),
48             patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
49             haystack: format!(
50                 "{}{}{}",
51                 "Z".repeat(off),
52                 self.haystack,
53                 "Z".repeat(off)
54             ),
55             matches: self
56                 .matches
57                 .iter()
58                 .map(|&(id, s, e)| (id, s + off, e + off))
59                 .collect(),
60         }
61     }
62 
offset_prefix(&self, off: usize) -> SearchTestOwned63     fn offset_prefix(&self, off: usize) -> SearchTestOwned {
64         SearchTestOwned {
65             offset: off,
66             name: self.name.to_string(),
67             patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
68             haystack: format!("{}{}", "Z".repeat(off), self.haystack),
69             matches: self
70                 .matches
71                 .iter()
72                 .map(|&(id, s, e)| (id, s + off, e + off))
73                 .collect(),
74         }
75     }
76 
offset_suffix(&self, off: usize) -> SearchTestOwned77     fn offset_suffix(&self, off: usize) -> SearchTestOwned {
78         SearchTestOwned {
79             offset: off,
80             name: self.name.to_string(),
81             patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
82             haystack: format!("{}{}", self.haystack, "Z".repeat(off)),
83             matches: self.matches.to_vec(),
84         }
85     }
86 
87     // fn to_owned(&self) -> SearchTestOwned {
88     // SearchTestOwned {
89     // name: self.name.to_string(),
90     // patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
91     // haystack: self.haystack.to_string(),
92     // matches: self.matches.iter().cloned().collect(),
93     // }
94     // }
95 }
96 
97 /// Short-hand constructor for SearchTest. We use it a lot below.
98 macro_rules! t {
99     ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
100         SearchTest {
101             name: stringify!($name),
102             patterns: $patterns,
103             haystack: $haystack,
104             matches: $matches,
105         }
106     };
107 }
108 
109 /// A collection of test groups.
110 type TestCollection = &'static [&'static [SearchTest]];
111 
112 // Define several collections corresponding to the different type of match
113 // semantics supported. These collections have some overlap, but each
114 // collection should have some tests that no other collection has.
115 
116 /// Tests for leftmost-first match semantics.
117 const PACKED_LEFTMOST_FIRST: TestCollection =
118     &[BASICS, LEFTMOST, LEFTMOST_FIRST, REGRESSION, TEDDY];
119 
120 /// Tests for leftmost-longest match semantics.
121 const PACKED_LEFTMOST_LONGEST: TestCollection =
122     &[BASICS, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, TEDDY];
123 
124 // Now define the individual tests that make up the collections above.
125 
126 /// A collection of tests for the that should always be true regardless of
127 /// match semantics. That is, all combinations of leftmost-{first, longest}
128 /// should produce the same answer.
129 const BASICS: &'static [SearchTest] = &[
130     t!(basic001, &["a"], "", &[]),
131     t!(basic010, &["a"], "a", &[(0, 0, 1)]),
132     t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
133     t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
134     t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
135     t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
136     t!(basic060, &["a"], "bbb", &[]),
137     t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
138     t!(basic100, &["aa"], "", &[]),
139     t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
140     t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
141     t!(basic130, &["aa"], "abbab", &[]),
142     t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
143     t!(basic150, &["aaa"], "aaa", &[(0, 0, 3)]),
144     t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
145     t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
146     t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
147     t!(basic300, &["a", "b"], "", &[]),
148     t!(basic310, &["a", "b"], "z", &[]),
149     t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
150     t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
151     t!(
152         basic340,
153         &["a", "b"],
154         "abba",
155         &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
156     ),
157     t!(
158         basic350,
159         &["b", "a"],
160         "abba",
161         &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
162     ),
163     t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
164     t!(basic400, &["foo", "bar"], "", &[]),
165     t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
166     t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
167     t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
168     t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
169     t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
170     t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
171     t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
172     t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
173     t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
174     t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
175     t!(
176         basic720,
177         &["yabcdef", "bcdeyabc", "abcdezghi"],
178         "yabcdezghi",
179         &[(2, 1, 10),]
180     ),
181     t!(basic810, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
182     t!(basic820, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
183     t!(basic830, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
184     t!(
185         basic840,
186         &["ab", "ba"],
187         "abababa",
188         &[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
189     ),
190     t!(basic850, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
191 ];
192 
193 /// Tests for leftmost match semantics. These should pass for both
194 /// leftmost-first and leftmost-longest match kinds. Stated differently, among
195 /// ambiguous matches, the longest match and the match that appeared first when
196 /// constructing the automaton should always be the same.
197 const LEFTMOST: &'static [SearchTest] = &[
198     t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
199     t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
200     t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
201     t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
202     t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
203     t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
204     t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
205     t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
206     t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
207     t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
208     t!(
209         leftmost360,
210         &["abcdefghi", "hz", "abcdefgh"],
211         "abcdefghz",
212         &[(2, 0, 8),]
213     ),
214     t!(
215         leftmost370,
216         &["abcdefghi", "cde", "hz", "abcdefgh"],
217         "abcdefghz",
218         &[(3, 0, 8),]
219     ),
220     t!(
221         leftmost380,
222         &["abcdefghi", "hz", "abcdefgh", "a"],
223         "abcdefghz",
224         &[(2, 0, 8),]
225     ),
226     t!(
227         leftmost390,
228         &["b", "abcdefghi", "hz", "abcdefgh"],
229         "abcdefghz",
230         &[(3, 0, 8),]
231     ),
232     t!(
233         leftmost400,
234         &["h", "abcdefghi", "hz", "abcdefgh"],
235         "abcdefghz",
236         &[(3, 0, 8),]
237     ),
238     t!(
239         leftmost410,
240         &["z", "abcdefghi", "hz", "abcdefgh"],
241         "abcdefghz",
242         &[(3, 0, 8), (0, 8, 9),]
243     ),
244 ];
245 
246 /// Tests for non-overlapping leftmost-first match semantics. These tests
247 /// should generally be specific to leftmost-first, which means they should
248 /// generally fail under leftmost-longest semantics.
249 const LEFTMOST_FIRST: &'static [SearchTest] = &[
250     t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
251     t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
252     t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
253     t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
254     t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
255     t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
256     t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
257     t!(
258         leftfirst310,
259         &["abcd", "b", "bce", "ce"],
260         "abce",
261         &[(1, 1, 2), (3, 2, 4),]
262     ),
263     t!(
264         leftfirst320,
265         &["a", "abcdefghi", "hz", "abcdefgh"],
266         "abcdefghz",
267         &[(0, 0, 1), (2, 7, 9),]
268     ),
269     t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
270     t!(
271         leftfirst340,
272         &["abcdef", "x", "x", "x", "x", "x", "x", "abcde"],
273         "abcdef",
274         &[(0, 0, 6)]
275     ),
276 ];
277 
278 /// Tests for non-overlapping leftmost-longest match semantics. These tests
279 /// should generally be specific to leftmost-longest, which means they should
280 /// generally fail under leftmost-first semantics.
281 const LEFTMOST_LONGEST: &'static [SearchTest] = &[
282     t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
283     t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
284     t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
285     t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
286     t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
287     t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
288     t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
289     t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
290     t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
291     t!(
292         leftlong310,
293         &["a", "abcdefghi", "hz", "abcdefgh"],
294         "abcdefghz",
295         &[(3, 0, 8),]
296     ),
297     t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
298     t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
299     t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
300 ];
301 
302 /// Regression tests that are applied to all combinations.
303 ///
304 /// If regression tests are needed for specific match semantics, then add them
305 /// to the appropriate group above.
306 const REGRESSION: &'static [SearchTest] = &[
307     t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
308     t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
309     t!(
310         regression030,
311         &["libcore/", "libstd/"],
312         "libcore/char/methods.rs",
313         &[(0, 0, 8),]
314     ),
315     t!(
316         regression040,
317         &["libstd/", "libcore/"],
318         "libcore/char/methods.rs",
319         &[(1, 0, 8),]
320     ),
321     t!(
322         regression050,
323         &["\x00\x00\x01", "\x00\x00\x00"],
324         "\x00\x00\x00",
325         &[(1, 0, 3),]
326     ),
327     t!(
328         regression060,
329         &["\x00\x00\x00", "\x00\x00\x01"],
330         "\x00\x00\x00",
331         &[(0, 0, 3),]
332     ),
333 ];
334 
335 const TEDDY: &'static [SearchTest] = &[
336     t!(
337         teddy010,
338         &["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
339         "abcdefghijk",
340         &[
341             (0, 0, 1),
342             (1, 1, 2),
343             (2, 2, 3),
344             (3, 3, 4),
345             (4, 4, 5),
346             (5, 5, 6),
347             (6, 6, 7),
348             (7, 7, 8),
349             (8, 8, 9),
350             (9, 9, 10),
351             (10, 10, 11)
352         ]
353     ),
354     t!(
355         teddy020,
356         &["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"],
357         "abcdefghijk",
358         &[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),]
359     ),
360     t!(
361         teddy030,
362         &["abc"],
363         "abcdefghijklmnopqrstuvwxyzabcdefghijk",
364         &[(0, 0, 3), (0, 26, 29)]
365     ),
366 ];
367 
368 // Now define a test for each combination of things above that we want to run.
369 // Since there are a few different combinations for each collection of tests,
370 // we define a couple of macros to avoid repetition drudgery. The testconfig
371 // macro constructs the automaton from a given match kind, and runs the search
372 // tests one-by-one over the given collection. The `with` parameter allows one
373 // to configure the config with additional parameters. The testcombo macro
374 // invokes testconfig in precisely this way: it sets up several tests where
375 // each one turns a different knob on Config.
376 
377 macro_rules! testconfig {
378     ($name:ident, $collection:expr, $with:expr) => {
379         #[test]
380         fn $name() {
381             run_search_tests($collection, |test| {
382                 let mut config = Config::new();
383                 $with(&mut config);
384                 config
385                     .builder()
386                     .extend(test.patterns.iter().map(|p| p.as_bytes()))
387                     .build()
388                     .unwrap()
389                     .find_iter(&test.haystack)
390                     .collect()
391             });
392         }
393     };
394 }
395 
396 #[cfg(target_arch = "x86_64")]
397 testconfig!(
398     search_default_leftmost_first,
399     PACKED_LEFTMOST_FIRST,
400     |_: &mut Config| {}
401 );
402 
403 #[cfg(target_arch = "x86_64")]
404 testconfig!(
405     search_default_leftmost_longest,
406     PACKED_LEFTMOST_LONGEST,
407     |c: &mut Config| {
408         c.match_kind(MatchKind::LeftmostLongest);
409     }
410 );
411 
412 #[cfg(target_arch = "x86_64")]
413 testconfig!(
414     search_teddy_leftmost_first,
415     PACKED_LEFTMOST_FIRST,
416     |c: &mut Config| {
417         c.force_teddy(true);
418     }
419 );
420 
421 #[cfg(target_arch = "x86_64")]
422 testconfig!(
423     search_teddy_leftmost_longest,
424     PACKED_LEFTMOST_LONGEST,
425     |c: &mut Config| {
426         c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
427     }
428 );
429 
430 #[cfg(target_arch = "x86_64")]
431 testconfig!(
432     search_teddy_ssse3_leftmost_first,
433     PACKED_LEFTMOST_FIRST,
434     |c: &mut Config| {
435         c.force_teddy(true);
436         if is_x86_feature_detected!("ssse3") {
437             c.force_avx(Some(false));
438         }
439     }
440 );
441 
442 #[cfg(target_arch = "x86_64")]
443 testconfig!(
444     search_teddy_ssse3_leftmost_longest,
445     PACKED_LEFTMOST_LONGEST,
446     |c: &mut Config| {
447         c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
448         if is_x86_feature_detected!("ssse3") {
449             c.force_avx(Some(false));
450         }
451     }
452 );
453 
454 #[cfg(target_arch = "x86_64")]
455 testconfig!(
456     search_teddy_avx2_leftmost_first,
457     PACKED_LEFTMOST_FIRST,
458     |c: &mut Config| {
459         c.force_teddy(true);
460         if is_x86_feature_detected!("avx2") {
461             c.force_avx(Some(true));
462         }
463     }
464 );
465 
466 #[cfg(target_arch = "x86_64")]
467 testconfig!(
468     search_teddy_avx2_leftmost_longest,
469     PACKED_LEFTMOST_LONGEST,
470     |c: &mut Config| {
471         c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
472         if is_x86_feature_detected!("avx2") {
473             c.force_avx(Some(true));
474         }
475     }
476 );
477 
478 #[cfg(target_arch = "x86_64")]
479 testconfig!(
480     search_teddy_fat_leftmost_first,
481     PACKED_LEFTMOST_FIRST,
482     |c: &mut Config| {
483         c.force_teddy(true);
484         if is_x86_feature_detected!("avx2") {
485             c.force_teddy_fat(Some(true));
486         }
487     }
488 );
489 
490 #[cfg(target_arch = "x86_64")]
491 testconfig!(
492     search_teddy_fat_leftmost_longest,
493     PACKED_LEFTMOST_LONGEST,
494     |c: &mut Config| {
495         c.force_teddy(true).match_kind(MatchKind::LeftmostLongest);
496         if is_x86_feature_detected!("avx2") {
497             c.force_teddy_fat(Some(true));
498         }
499     }
500 );
501 
502 testconfig!(
503     search_rabinkarp_leftmost_first,
504     PACKED_LEFTMOST_FIRST,
505     |c: &mut Config| {
506         c.force_rabin_karp(true);
507     }
508 );
509 
510 testconfig!(
511     search_rabinkarp_leftmost_longest,
512     PACKED_LEFTMOST_LONGEST,
513     |c: &mut Config| {
514         c.force_rabin_karp(true).match_kind(MatchKind::LeftmostLongest);
515     }
516 );
517 
518 #[test]
search_tests_have_unique_names()519 fn search_tests_have_unique_names() {
520     let assert = |constname, tests: &[SearchTest]| {
521         let mut seen = HashMap::new(); // map from test name to position
522         for (i, test) in tests.iter().enumerate() {
523             if !seen.contains_key(test.name) {
524                 seen.insert(test.name, i);
525             } else {
526                 let last = seen[test.name];
527                 panic!(
528                     "{} tests have duplicate names at positions {} and {}",
529                     constname, last, i
530                 );
531             }
532         }
533     };
534     assert("BASICS", BASICS);
535     assert("LEFTMOST", LEFTMOST);
536     assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
537     assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
538     assert("REGRESSION", REGRESSION);
539     assert("TEDDY", TEDDY);
540 }
541 
run_search_tests<F: FnMut(&SearchTestOwned) -> Vec<Match>>( which: TestCollection, mut f: F, )542 fn run_search_tests<F: FnMut(&SearchTestOwned) -> Vec<Match>>(
543     which: TestCollection,
544     mut f: F,
545 ) {
546     let get_match_triples =
547         |matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
548             matches
549                 .into_iter()
550                 .map(|m| (m.pattern(), m.start(), m.end()))
551                 .collect()
552         };
553     for &tests in which {
554         for spec in tests {
555             for test in spec.variations() {
556                 assert_eq!(
557                     test.matches,
558                     get_match_triples(f(&test)).as_slice(),
559                     "test: {}, patterns: {:?}, haystack: {:?}, offset: {:?}",
560                     test.name,
561                     test.patterns,
562                     test.haystack,
563                     test.offset,
564                 );
565             }
566         }
567     }
568 }
569