• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2 This module provides forward and reverse substring search routines.
3 
4 Unlike the standard library's substring search routines, these work on
5 arbitrary bytes. For all non-empty needles, these routines will report exactly
6 the same values as the corresponding routines in the standard library. For
7 the empty needle, the standard library reports matches only at valid UTF-8
8 boundaries, where as these routines will report matches at every position.
9 
10 Other than being able to work on arbitrary bytes, the primary reason to prefer
11 these routines over the standard library routines is that these will generally
12 be faster. In some cases, significantly so.
13 
14 # Example: iterating over substring matches
15 
16 This example shows how to use [`find_iter`] to find occurrences of a substring
17 in a haystack.
18 
19 ```
20 use memchr::memmem;
21 
22 let haystack = b"foo bar foo baz foo";
23 
24 let mut it = memmem::find_iter(haystack, "foo");
25 assert_eq!(Some(0), it.next());
26 assert_eq!(Some(8), it.next());
27 assert_eq!(Some(16), it.next());
28 assert_eq!(None, it.next());
29 ```
30 
31 # Example: iterating over substring matches in reverse
32 
33 This example shows how to use [`rfind_iter`] to find occurrences of a substring
34 in a haystack starting from the end of the haystack.
35 
36 **NOTE:** This module does not implement double ended iterators, so reverse
37 searches aren't done by calling `rev` on a forward iterator.
38 
39 ```
40 use memchr::memmem;
41 
42 let haystack = b"foo bar foo baz foo";
43 
44 let mut it = memmem::rfind_iter(haystack, "foo");
45 assert_eq!(Some(16), it.next());
46 assert_eq!(Some(8), it.next());
47 assert_eq!(Some(0), it.next());
48 assert_eq!(None, it.next());
49 ```
50 
51 # Example: repeating a search for the same needle
52 
53 It may be possible for the overhead of constructing a substring searcher to be
54 measurable in some workloads. In cases where the same needle is used to search
55 many haystacks, it is possible to do construction once and thus to avoid it for
56 subsequent searches. This can be done with a [`Finder`] (or a [`FinderRev`] for
57 reverse searches).
58 
59 ```
60 use memchr::memmem;
61 
62 let finder = memmem::Finder::new("foo");
63 
64 assert_eq!(Some(4), finder.find(b"baz foo quux"));
65 assert_eq!(None, finder.find(b"quux baz bar"));
66 ```
67 */
68 
69 pub use self::prefilter::Prefilter;
70 
71 use crate::{
72     cow::CowBytes,
73     memmem::{
74         prefilter::{Pre, PrefilterFn, PrefilterState},
75         rabinkarp::NeedleHash,
76         rarebytes::RareNeedleBytes,
77     },
78 };
79 
80 /// Defines a suite of quickcheck properties for forward and reverse
81 /// substring searching.
82 ///
83 /// This is defined in this specific spot so that it can be used freely among
84 /// the different substring search implementations. I couldn't be bothered to
85 /// fight with the macro-visibility rules enough to figure out how to stuff it
86 /// somewhere more convenient.
87 #[cfg(all(test, feature = "std"))]
88 macro_rules! define_memmem_quickcheck_tests {
89     ($fwd:expr, $rev:expr) => {
90         use crate::memmem::proptests;
91 
92         quickcheck::quickcheck! {
93             fn qc_fwd_prefix_is_substring(bs: Vec<u8>) -> bool {
94                 proptests::prefix_is_substring(false, &bs, $fwd)
95             }
96 
97             fn qc_fwd_suffix_is_substring(bs: Vec<u8>) -> bool {
98                 proptests::suffix_is_substring(false, &bs, $fwd)
99             }
100 
101             fn qc_fwd_matches_naive(
102                 haystack: Vec<u8>,
103                 needle: Vec<u8>
104             ) -> bool {
105                 proptests::matches_naive(false, &haystack, &needle, $fwd)
106             }
107 
108             fn qc_rev_prefix_is_substring(bs: Vec<u8>) -> bool {
109                 proptests::prefix_is_substring(true, &bs, $rev)
110             }
111 
112             fn qc_rev_suffix_is_substring(bs: Vec<u8>) -> bool {
113                 proptests::suffix_is_substring(true, &bs, $rev)
114             }
115 
116             fn qc_rev_matches_naive(
117                 haystack: Vec<u8>,
118                 needle: Vec<u8>
119             ) -> bool {
120                 proptests::matches_naive(true, &haystack, &needle, $rev)
121             }
122         }
123     };
124 }
125 
126 /// Defines a suite of "simple" hand-written tests for a substring
127 /// implementation.
128 ///
129 /// This is defined here for the same reason that
130 /// define_memmem_quickcheck_tests is defined here.
131 #[cfg(test)]
132 macro_rules! define_memmem_simple_tests {
133     ($fwd:expr, $rev:expr) => {
134         use crate::memmem::testsimples;
135 
136         #[test]
137         fn simple_forward() {
138             testsimples::run_search_tests_fwd($fwd);
139         }
140 
141         #[test]
142         fn simple_reverse() {
143             testsimples::run_search_tests_rev($rev);
144         }
145     };
146 }
147 
148 mod byte_frequencies;
149 #[cfg(memchr_runtime_simd)]
150 mod genericsimd;
151 mod prefilter;
152 mod rabinkarp;
153 mod rarebytes;
154 mod twoway;
155 mod util;
156 #[cfg(memchr_runtime_simd)]
157 mod vector;
158 #[cfg(all(memchr_runtime_wasm128))]
159 mod wasm;
160 #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
161 mod x86;
162 
163 /// Returns an iterator over all non-overlapping occurrences of a substring in
164 /// a haystack.
165 ///
166 /// # Complexity
167 ///
168 /// This routine is guaranteed to have worst case linear time complexity
169 /// with respect to both the needle and the haystack. That is, this runs
170 /// in `O(needle.len() + haystack.len())` time.
171 ///
172 /// This routine is also guaranteed to have worst case constant space
173 /// complexity.
174 ///
175 /// # Examples
176 ///
177 /// Basic usage:
178 ///
179 /// ```
180 /// use memchr::memmem;
181 ///
182 /// let haystack = b"foo bar foo baz foo";
183 /// let mut it = memmem::find_iter(haystack, b"foo");
184 /// assert_eq!(Some(0), it.next());
185 /// assert_eq!(Some(8), it.next());
186 /// assert_eq!(Some(16), it.next());
187 /// assert_eq!(None, it.next());
188 /// ```
189 #[inline]
find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( haystack: &'h [u8], needle: &'n N, ) -> FindIter<'h, 'n>190 pub fn find_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>(
191     haystack: &'h [u8],
192     needle: &'n N,
193 ) -> FindIter<'h, 'n> {
194     FindIter::new(haystack, Finder::new(needle))
195 }
196 
197 /// Returns a reverse iterator over all non-overlapping occurrences of a
198 /// substring in a haystack.
199 ///
200 /// # Complexity
201 ///
202 /// This routine is guaranteed to have worst case linear time complexity
203 /// with respect to both the needle and the haystack. That is, this runs
204 /// in `O(needle.len() + haystack.len())` time.
205 ///
206 /// This routine is also guaranteed to have worst case constant space
207 /// complexity.
208 ///
209 /// # Examples
210 ///
211 /// Basic usage:
212 ///
213 /// ```
214 /// use memchr::memmem;
215 ///
216 /// let haystack = b"foo bar foo baz foo";
217 /// let mut it = memmem::rfind_iter(haystack, b"foo");
218 /// assert_eq!(Some(16), it.next());
219 /// assert_eq!(Some(8), it.next());
220 /// assert_eq!(Some(0), it.next());
221 /// assert_eq!(None, it.next());
222 /// ```
223 #[inline]
rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>( haystack: &'h [u8], needle: &'n N, ) -> FindRevIter<'h, 'n>224 pub fn rfind_iter<'h, 'n, N: 'n + ?Sized + AsRef<[u8]>>(
225     haystack: &'h [u8],
226     needle: &'n N,
227 ) -> FindRevIter<'h, 'n> {
228     FindRevIter::new(haystack, FinderRev::new(needle))
229 }
230 
231 /// Returns the index of the first occurrence of the given needle.
232 ///
233 /// Note that if you're are searching for the same needle in many different
234 /// small haystacks, it may be faster to initialize a [`Finder`] once,
235 /// and reuse it for each search.
236 ///
237 /// # Complexity
238 ///
239 /// This routine is guaranteed to have worst case linear time complexity
240 /// with respect to both the needle and the haystack. That is, this runs
241 /// in `O(needle.len() + haystack.len())` time.
242 ///
243 /// This routine is also guaranteed to have worst case constant space
244 /// complexity.
245 ///
246 /// # Examples
247 ///
248 /// Basic usage:
249 ///
250 /// ```
251 /// use memchr::memmem;
252 ///
253 /// let haystack = b"foo bar baz";
254 /// assert_eq!(Some(0), memmem::find(haystack, b"foo"));
255 /// assert_eq!(Some(4), memmem::find(haystack, b"bar"));
256 /// assert_eq!(None, memmem::find(haystack, b"quux"));
257 /// ```
258 #[inline]
find(haystack: &[u8], needle: &[u8]) -> Option<usize>259 pub fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
260     if haystack.len() < 64 {
261         rabinkarp::find(haystack, needle)
262     } else {
263         Finder::new(needle).find(haystack)
264     }
265 }
266 
267 /// Returns the index of the last occurrence of the given needle.
268 ///
269 /// Note that if you're are searching for the same needle in many different
270 /// small haystacks, it may be faster to initialize a [`FinderRev`] once,
271 /// and reuse it for each search.
272 ///
273 /// # Complexity
274 ///
275 /// This routine is guaranteed to have worst case linear time complexity
276 /// with respect to both the needle and the haystack. That is, this runs
277 /// in `O(needle.len() + haystack.len())` time.
278 ///
279 /// This routine is also guaranteed to have worst case constant space
280 /// complexity.
281 ///
282 /// # Examples
283 ///
284 /// Basic usage:
285 ///
286 /// ```
287 /// use memchr::memmem;
288 ///
289 /// let haystack = b"foo bar baz";
290 /// assert_eq!(Some(0), memmem::rfind(haystack, b"foo"));
291 /// assert_eq!(Some(4), memmem::rfind(haystack, b"bar"));
292 /// assert_eq!(Some(8), memmem::rfind(haystack, b"ba"));
293 /// assert_eq!(None, memmem::rfind(haystack, b"quux"));
294 /// ```
295 #[inline]
rfind(haystack: &[u8], needle: &[u8]) -> Option<usize>296 pub fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> {
297     if haystack.len() < 64 {
298         rabinkarp::rfind(haystack, needle)
299     } else {
300         FinderRev::new(needle).rfind(haystack)
301     }
302 }
303 
304 /// An iterator over non-overlapping substring matches.
305 ///
306 /// Matches are reported by the byte offset at which they begin.
307 ///
308 /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
309 /// needle.
310 #[derive(Debug)]
311 pub struct FindIter<'h, 'n> {
312     haystack: &'h [u8],
313     prestate: PrefilterState,
314     finder: Finder<'n>,
315     pos: usize,
316 }
317 
318 impl<'h, 'n> FindIter<'h, 'n> {
319     #[inline(always)]
new( haystack: &'h [u8], finder: Finder<'n>, ) -> FindIter<'h, 'n>320     pub(crate) fn new(
321         haystack: &'h [u8],
322         finder: Finder<'n>,
323     ) -> FindIter<'h, 'n> {
324         let prestate = finder.searcher.prefilter_state();
325         FindIter { haystack, prestate, finder, pos: 0 }
326     }
327 
328     /// Convert this iterator into its owned variant, such that it no longer
329     /// borrows the finder and needle.
330     ///
331     /// If this is already an owned iterator, then this is a no-op. Otherwise,
332     /// this copies the needle.
333     ///
334     /// This is only available when the `std` feature is enabled.
335     #[cfg(feature = "std")]
336     #[inline]
into_owned(self) -> FindIter<'h, 'static>337     pub fn into_owned(self) -> FindIter<'h, 'static> {
338         FindIter {
339             haystack: self.haystack,
340             prestate: self.prestate,
341             finder: self.finder.into_owned(),
342             pos: self.pos,
343         }
344     }
345 }
346 
347 impl<'h, 'n> Iterator for FindIter<'h, 'n> {
348     type Item = usize;
349 
next(&mut self) -> Option<usize>350     fn next(&mut self) -> Option<usize> {
351         if self.pos > self.haystack.len() {
352             return None;
353         }
354         let result = self
355             .finder
356             .searcher
357             .find(&mut self.prestate, &self.haystack[self.pos..]);
358         match result {
359             None => None,
360             Some(i) => {
361                 let pos = self.pos + i;
362                 self.pos = pos + core::cmp::max(1, self.finder.needle().len());
363                 Some(pos)
364             }
365         }
366     }
367 }
368 
369 /// An iterator over non-overlapping substring matches in reverse.
370 ///
371 /// Matches are reported by the byte offset at which they begin.
372 ///
373 /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
374 /// needle.
375 #[derive(Debug)]
376 pub struct FindRevIter<'h, 'n> {
377     haystack: &'h [u8],
378     finder: FinderRev<'n>,
379     /// When searching with an empty needle, this gets set to `None` after
380     /// we've yielded the last element at `0`.
381     pos: Option<usize>,
382 }
383 
384 impl<'h, 'n> FindRevIter<'h, 'n> {
385     #[inline(always)]
new( haystack: &'h [u8], finder: FinderRev<'n>, ) -> FindRevIter<'h, 'n>386     pub(crate) fn new(
387         haystack: &'h [u8],
388         finder: FinderRev<'n>,
389     ) -> FindRevIter<'h, 'n> {
390         let pos = Some(haystack.len());
391         FindRevIter { haystack, finder, pos }
392     }
393 
394     /// Convert this iterator into its owned variant, such that it no longer
395     /// borrows the finder and needle.
396     ///
397     /// If this is already an owned iterator, then this is a no-op. Otherwise,
398     /// this copies the needle.
399     ///
400     /// This is only available when the `std` feature is enabled.
401     #[cfg(feature = "std")]
402     #[inline]
into_owned(self) -> FindRevIter<'h, 'static>403     pub fn into_owned(self) -> FindRevIter<'h, 'static> {
404         FindRevIter {
405             haystack: self.haystack,
406             finder: self.finder.into_owned(),
407             pos: self.pos,
408         }
409     }
410 }
411 
412 impl<'h, 'n> Iterator for FindRevIter<'h, 'n> {
413     type Item = usize;
414 
next(&mut self) -> Option<usize>415     fn next(&mut self) -> Option<usize> {
416         let pos = match self.pos {
417             None => return None,
418             Some(pos) => pos,
419         };
420         let result = self.finder.rfind(&self.haystack[..pos]);
421         match result {
422             None => None,
423             Some(i) => {
424                 if pos == i {
425                     self.pos = pos.checked_sub(1);
426                 } else {
427                     self.pos = Some(i);
428                 }
429                 Some(i)
430             }
431         }
432     }
433 }
434 
435 /// A single substring searcher fixed to a particular needle.
436 ///
437 /// The purpose of this type is to permit callers to construct a substring
438 /// searcher that can be used to search haystacks without the overhead of
439 /// constructing the searcher in the first place. This is a somewhat niche
440 /// concern when it's necessary to re-use the same needle to search multiple
441 /// different haystacks with as little overhead as possible. In general, using
442 /// [`find`] is good enough, but `Finder` is useful when you can meaningfully
443 /// observe searcher construction time in a profile.
444 ///
445 /// When the `std` feature is enabled, then this type has an `into_owned`
446 /// version which permits building a `Finder` that is not connected to
447 /// the lifetime of its needle.
448 #[derive(Clone, Debug)]
449 pub struct Finder<'n> {
450     searcher: Searcher<'n>,
451 }
452 
453 impl<'n> Finder<'n> {
454     /// Create a new finder for the given needle.
455     #[inline]
new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n>456     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n> {
457         FinderBuilder::new().build_forward(needle)
458     }
459 
460     /// Returns the index of the first occurrence of this needle in the given
461     /// haystack.
462     ///
463     /// # Complexity
464     ///
465     /// This routine is guaranteed to have worst case linear time complexity
466     /// with respect to both the needle and the haystack. That is, this runs
467     /// in `O(needle.len() + haystack.len())` time.
468     ///
469     /// This routine is also guaranteed to have worst case constant space
470     /// complexity.
471     ///
472     /// # Examples
473     ///
474     /// Basic usage:
475     ///
476     /// ```
477     /// use memchr::memmem::Finder;
478     ///
479     /// let haystack = b"foo bar baz";
480     /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
481     /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
482     /// assert_eq!(None, Finder::new("quux").find(haystack));
483     /// ```
find(&self, haystack: &[u8]) -> Option<usize>484     pub fn find(&self, haystack: &[u8]) -> Option<usize> {
485         self.searcher.find(&mut self.searcher.prefilter_state(), haystack)
486     }
487 
488     /// Returns an iterator over all occurrences of a substring in a haystack.
489     ///
490     /// # Complexity
491     ///
492     /// This routine is guaranteed to have worst case linear time complexity
493     /// with respect to both the needle and the haystack. That is, this runs
494     /// in `O(needle.len() + haystack.len())` time.
495     ///
496     /// This routine is also guaranteed to have worst case constant space
497     /// complexity.
498     ///
499     /// # Examples
500     ///
501     /// Basic usage:
502     ///
503     /// ```
504     /// use memchr::memmem::Finder;
505     ///
506     /// let haystack = b"foo bar foo baz foo";
507     /// let finder = Finder::new(b"foo");
508     /// let mut it = finder.find_iter(haystack);
509     /// assert_eq!(Some(0), it.next());
510     /// assert_eq!(Some(8), it.next());
511     /// assert_eq!(Some(16), it.next());
512     /// assert_eq!(None, it.next());
513     /// ```
514     #[inline]
find_iter<'a, 'h>( &'a self, haystack: &'h [u8], ) -> FindIter<'h, 'a>515     pub fn find_iter<'a, 'h>(
516         &'a self,
517         haystack: &'h [u8],
518     ) -> FindIter<'h, 'a> {
519         FindIter::new(haystack, self.as_ref())
520     }
521 
522     /// Convert this finder into its owned variant, such that it no longer
523     /// borrows the needle.
524     ///
525     /// If this is already an owned finder, then this is a no-op. Otherwise,
526     /// this copies the needle.
527     ///
528     /// This is only available when the `std` feature is enabled.
529     #[cfg(feature = "std")]
530     #[inline]
into_owned(self) -> Finder<'static>531     pub fn into_owned(self) -> Finder<'static> {
532         Finder { searcher: self.searcher.into_owned() }
533     }
534 
535     /// Convert this finder into its borrowed variant.
536     ///
537     /// This is primarily useful if your finder is owned and you'd like to
538     /// store its borrowed variant in some intermediate data structure.
539     ///
540     /// Note that the lifetime parameter of the returned finder is tied to the
541     /// lifetime of `self`, and may be shorter than the `'n` lifetime of the
542     /// needle itself. Namely, a finder's needle can be either borrowed or
543     /// owned, so the lifetime of the needle returned must necessarily be the
544     /// shorter of the two.
545     #[inline]
as_ref(&self) -> Finder<'_>546     pub fn as_ref(&self) -> Finder<'_> {
547         Finder { searcher: self.searcher.as_ref() }
548     }
549 
550     /// Returns the needle that this finder searches for.
551     ///
552     /// Note that the lifetime of the needle returned is tied to the lifetime
553     /// of the finder, and may be shorter than the `'n` lifetime. Namely, a
554     /// finder's needle can be either borrowed or owned, so the lifetime of the
555     /// needle returned must necessarily be the shorter of the two.
556     #[inline]
needle(&self) -> &[u8]557     pub fn needle(&self) -> &[u8] {
558         self.searcher.needle()
559     }
560 }
561 
562 /// A single substring reverse searcher fixed to a particular needle.
563 ///
564 /// The purpose of this type is to permit callers to construct a substring
565 /// searcher that can be used to search haystacks without the overhead of
566 /// constructing the searcher in the first place. This is a somewhat niche
567 /// concern when it's necessary to re-use the same needle to search multiple
568 /// different haystacks with as little overhead as possible. In general,
569 /// using [`rfind`] is good enough, but `FinderRev` is useful when you can
570 /// meaningfully observe searcher construction time in a profile.
571 ///
572 /// When the `std` feature is enabled, then this type has an `into_owned`
573 /// version which permits building a `FinderRev` that is not connected to
574 /// the lifetime of its needle.
575 #[derive(Clone, Debug)]
576 pub struct FinderRev<'n> {
577     searcher: SearcherRev<'n>,
578 }
579 
580 impl<'n> FinderRev<'n> {
581     /// Create a new reverse finder for the given needle.
582     #[inline]
new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n>583     pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n> {
584         FinderBuilder::new().build_reverse(needle)
585     }
586 
587     /// Returns the index of the last occurrence of this needle in the given
588     /// haystack.
589     ///
590     /// The haystack may be any type that can be cheaply converted into a
591     /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
592     ///
593     /// # Complexity
594     ///
595     /// This routine is guaranteed to have worst case linear time complexity
596     /// with respect to both the needle and the haystack. That is, this runs
597     /// in `O(needle.len() + haystack.len())` time.
598     ///
599     /// This routine is also guaranteed to have worst case constant space
600     /// complexity.
601     ///
602     /// # Examples
603     ///
604     /// Basic usage:
605     ///
606     /// ```
607     /// use memchr::memmem::FinderRev;
608     ///
609     /// let haystack = b"foo bar baz";
610     /// assert_eq!(Some(0), FinderRev::new("foo").rfind(haystack));
611     /// assert_eq!(Some(4), FinderRev::new("bar").rfind(haystack));
612     /// assert_eq!(None, FinderRev::new("quux").rfind(haystack));
613     /// ```
rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize>614     pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
615         self.searcher.rfind(haystack.as_ref())
616     }
617 
618     /// Returns a reverse iterator over all occurrences of a substring in a
619     /// haystack.
620     ///
621     /// # Complexity
622     ///
623     /// This routine is guaranteed to have worst case linear time complexity
624     /// with respect to both the needle and the haystack. That is, this runs
625     /// in `O(needle.len() + haystack.len())` time.
626     ///
627     /// This routine is also guaranteed to have worst case constant space
628     /// complexity.
629     ///
630     /// # Examples
631     ///
632     /// Basic usage:
633     ///
634     /// ```
635     /// use memchr::memmem::FinderRev;
636     ///
637     /// let haystack = b"foo bar foo baz foo";
638     /// let finder = FinderRev::new(b"foo");
639     /// let mut it = finder.rfind_iter(haystack);
640     /// assert_eq!(Some(16), it.next());
641     /// assert_eq!(Some(8), it.next());
642     /// assert_eq!(Some(0), it.next());
643     /// assert_eq!(None, it.next());
644     /// ```
645     #[inline]
rfind_iter<'a, 'h>( &'a self, haystack: &'h [u8], ) -> FindRevIter<'h, 'a>646     pub fn rfind_iter<'a, 'h>(
647         &'a self,
648         haystack: &'h [u8],
649     ) -> FindRevIter<'h, 'a> {
650         FindRevIter::new(haystack, self.as_ref())
651     }
652 
653     /// Convert this finder into its owned variant, such that it no longer
654     /// borrows the needle.
655     ///
656     /// If this is already an owned finder, then this is a no-op. Otherwise,
657     /// this copies the needle.
658     ///
659     /// This is only available when the `std` feature is enabled.
660     #[cfg(feature = "std")]
661     #[inline]
into_owned(self) -> FinderRev<'static>662     pub fn into_owned(self) -> FinderRev<'static> {
663         FinderRev { searcher: self.searcher.into_owned() }
664     }
665 
666     /// Convert this finder into its borrowed variant.
667     ///
668     /// This is primarily useful if your finder is owned and you'd like to
669     /// store its borrowed variant in some intermediate data structure.
670     ///
671     /// Note that the lifetime parameter of the returned finder is tied to the
672     /// lifetime of `self`, and may be shorter than the `'n` lifetime of the
673     /// needle itself. Namely, a finder's needle can be either borrowed or
674     /// owned, so the lifetime of the needle returned must necessarily be the
675     /// shorter of the two.
676     #[inline]
as_ref(&self) -> FinderRev<'_>677     pub fn as_ref(&self) -> FinderRev<'_> {
678         FinderRev { searcher: self.searcher.as_ref() }
679     }
680 
681     /// Returns the needle that this finder searches for.
682     ///
683     /// Note that the lifetime of the needle returned is tied to the lifetime
684     /// of the finder, and may be shorter than the `'n` lifetime. Namely, a
685     /// finder's needle can be either borrowed or owned, so the lifetime of the
686     /// needle returned must necessarily be the shorter of the two.
687     #[inline]
needle(&self) -> &[u8]688     pub fn needle(&self) -> &[u8] {
689         self.searcher.needle()
690     }
691 }
692 
693 /// A builder for constructing non-default forward or reverse memmem finders.
694 ///
695 /// A builder is primarily useful for configuring a substring searcher.
696 /// Currently, the only configuration exposed is the ability to disable
697 /// heuristic prefilters used to speed up certain searches.
698 #[derive(Clone, Debug, Default)]
699 pub struct FinderBuilder {
700     config: SearcherConfig,
701 }
702 
703 impl FinderBuilder {
704     /// Create a new finder builder with default settings.
new() -> FinderBuilder705     pub fn new() -> FinderBuilder {
706         FinderBuilder::default()
707     }
708 
709     /// Build a forward finder using the given needle from the current
710     /// settings.
build_forward<'n, B: ?Sized + AsRef<[u8]>>( &self, needle: &'n B, ) -> Finder<'n>711     pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>(
712         &self,
713         needle: &'n B,
714     ) -> Finder<'n> {
715         Finder { searcher: Searcher::new(self.config, needle.as_ref()) }
716     }
717 
718     /// Build a reverse finder using the given needle from the current
719     /// settings.
build_reverse<'n, B: ?Sized + AsRef<[u8]>>( &self, needle: &'n B, ) -> FinderRev<'n>720     pub fn build_reverse<'n, B: ?Sized + AsRef<[u8]>>(
721         &self,
722         needle: &'n B,
723     ) -> FinderRev<'n> {
724         FinderRev { searcher: SearcherRev::new(needle.as_ref()) }
725     }
726 
727     /// Configure the prefilter setting for the finder.
728     ///
729     /// See the documentation for [`Prefilter`] for more discussion on why
730     /// you might want to configure this.
prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder731     pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder {
732         self.config.prefilter = prefilter;
733         self
734     }
735 }
736 
737 /// The internal implementation of a forward substring searcher.
738 ///
739 /// The reality is that this is a "meta" searcher. Namely, depending on a
740 /// variety of parameters (CPU support, target, needle size, haystack size and
741 /// even dynamic properties such as prefilter effectiveness), the actual
742 /// algorithm employed to do substring search may change.
743 #[derive(Clone, Debug)]
744 struct Searcher<'n> {
745     /// The actual needle we're searching for.
746     ///
747     /// A CowBytes is like a Cow<[u8]>, except in no_std environments, it is
748     /// specialized to a single variant (the borrowed form).
749     needle: CowBytes<'n>,
750     /// A collection of facts computed on the needle that are useful for more
751     /// than one substring search algorithm.
752     ninfo: NeedleInfo,
753     /// A prefilter function, if it was deemed appropriate.
754     ///
755     /// Some substring search implementations (like Two-Way) benefit greatly
756     /// if we can quickly find candidate starting positions for a match.
757     prefn: Option<PrefilterFn>,
758     /// The actual substring implementation in use.
759     kind: SearcherKind,
760 }
761 
762 /// A collection of facts computed about a search needle.
763 ///
764 /// We group these things together because it's useful to be able to hand them
765 /// to prefilters or substring algorithms that want them.
766 #[derive(Clone, Copy, Debug)]
767 pub(crate) struct NeedleInfo {
768     /// The offsets of "rare" bytes detected in the needle.
769     ///
770     /// This is meant to be a heuristic in order to maximize the effectiveness
771     /// of vectorized code. Namely, vectorized code tends to focus on only
772     /// one or two bytes. If we pick bytes from the needle that occur
773     /// infrequently, then more time will be spent in the vectorized code and
774     /// will likely make the overall search (much) faster.
775     ///
776     /// Of course, this is only a heuristic based on a background frequency
777     /// distribution of bytes. But it tends to work very well in practice.
778     pub(crate) rarebytes: RareNeedleBytes,
779     /// A Rabin-Karp hash of the needle.
780     ///
781     /// This is store here instead of in a more specific Rabin-Karp search
782     /// since Rabin-Karp may be used even if another SearchKind corresponds
783     /// to some other search implementation. e.g., If measurements suggest RK
784     /// is faster in some cases or if a search implementation can't handle
785     /// particularly small haystack. (Moreover, we cannot use RK *generally*,
786     /// since its worst case time is multiplicative. Instead, we only use it
787     /// some small haystacks, where "small" is a constant.)
788     pub(crate) nhash: NeedleHash,
789 }
790 
791 /// Configuration for substring search.
792 #[derive(Clone, Copy, Debug, Default)]
793 struct SearcherConfig {
794     /// This permits changing the behavior of the prefilter, since it can have
795     /// a variable impact on performance.
796     prefilter: Prefilter,
797 }
798 
799 #[derive(Clone, Debug)]
800 enum SearcherKind {
801     /// A special case for empty needles. An empty needle always matches, even
802     /// in an empty haystack.
803     Empty,
804     /// This is used whenever the needle is a single byte. In this case, we
805     /// always use memchr.
806     OneByte(u8),
807     /// Two-Way is the generic work horse and is what provides our additive
808     /// linear time guarantee. In general, it's used when the needle is bigger
809     /// than 8 bytes or so.
810     TwoWay(twoway::Forward),
811     #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
812     GenericSIMD128(x86::sse::Forward),
813     #[cfg(memchr_runtime_wasm128)]
814     GenericSIMD128(wasm::Forward),
815     #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
816     GenericSIMD256(x86::avx::Forward),
817 }
818 
819 impl<'n> Searcher<'n> {
new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n>820     fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> {
821         use self::SearcherKind::*;
822 
823         let ninfo = NeedleInfo::new(needle);
824         let mk = |kind: SearcherKind| {
825             let prefn = prefilter::forward(
826                 &config.prefilter,
827                 &ninfo.rarebytes,
828                 needle,
829             );
830             Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind }
831         };
832         if needle.len() == 0 {
833             return mk(Empty);
834         }
835         if needle.len() == 1 {
836             return mk(OneByte(needle[0]));
837         }
838         #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
839         {
840             if let Some(fwd) = x86::avx::Forward::new(&ninfo, needle) {
841                 return mk(GenericSIMD256(fwd));
842             } else if let Some(fwd) = x86::sse::Forward::new(&ninfo, needle) {
843                 return mk(GenericSIMD128(fwd));
844             }
845         }
846         #[cfg(all(target_arch = "wasm32", memchr_runtime_simd))]
847         {
848             if let Some(fwd) = wasm::Forward::new(&ninfo, needle) {
849                 return mk(GenericSIMD128(fwd));
850             }
851         }
852 
853         mk(TwoWay(twoway::Forward::new(needle)))
854     }
855 
856     /// Return a fresh prefilter state that can be used with this searcher.
857     /// A prefilter state is used to track the effectiveness of a searcher's
858     /// prefilter for speeding up searches. Therefore, the prefilter state
859     /// should generally be reused on subsequent searches (such as in an
860     /// iterator). For searches on a different haystack, then a new prefilter
861     /// state should be used.
862     ///
863     /// This always initializes a valid (but possibly inert) prefilter state
864     /// even if this searcher does not have a prefilter enabled.
prefilter_state(&self) -> PrefilterState865     fn prefilter_state(&self) -> PrefilterState {
866         if self.prefn.is_none() {
867             PrefilterState::inert()
868         } else {
869             PrefilterState::new()
870         }
871     }
872 
needle(&self) -> &[u8]873     fn needle(&self) -> &[u8] {
874         self.needle.as_slice()
875     }
876 
as_ref(&self) -> Searcher<'_>877     fn as_ref(&self) -> Searcher<'_> {
878         use self::SearcherKind::*;
879 
880         let kind = match self.kind {
881             Empty => Empty,
882             OneByte(b) => OneByte(b),
883             TwoWay(tw) => TwoWay(tw),
884             #[cfg(all(not(miri), memchr_runtime_simd))]
885             GenericSIMD128(gs) => GenericSIMD128(gs),
886             #[cfg(all(
887                 not(miri),
888                 target_arch = "x86_64",
889                 memchr_runtime_simd
890             ))]
891             GenericSIMD256(gs) => GenericSIMD256(gs),
892         };
893         Searcher {
894             needle: CowBytes::new(self.needle()),
895             ninfo: self.ninfo,
896             prefn: self.prefn,
897             kind,
898         }
899     }
900 
901     #[cfg(feature = "std")]
into_owned(self) -> Searcher<'static>902     fn into_owned(self) -> Searcher<'static> {
903         use self::SearcherKind::*;
904 
905         let kind = match self.kind {
906             Empty => Empty,
907             OneByte(b) => OneByte(b),
908             TwoWay(tw) => TwoWay(tw),
909             #[cfg(all(not(miri), memchr_runtime_simd))]
910             GenericSIMD128(gs) => GenericSIMD128(gs),
911             #[cfg(all(
912                 not(miri),
913                 target_arch = "x86_64",
914                 memchr_runtime_simd
915             ))]
916             GenericSIMD256(gs) => GenericSIMD256(gs),
917         };
918         Searcher {
919             needle: self.needle.into_owned(),
920             ninfo: self.ninfo,
921             prefn: self.prefn,
922             kind,
923         }
924     }
925 
926     /// Implements forward substring search by selecting the implementation
927     /// chosen at construction and executing it on the given haystack with the
928     /// prefilter's current state of effectiveness.
929     #[inline(always)]
find( &self, state: &mut PrefilterState, haystack: &[u8], ) -> Option<usize>930     fn find(
931         &self,
932         state: &mut PrefilterState,
933         haystack: &[u8],
934     ) -> Option<usize> {
935         use self::SearcherKind::*;
936 
937         let needle = self.needle();
938         if haystack.len() < needle.len() {
939             return None;
940         }
941         match self.kind {
942             Empty => Some(0),
943             OneByte(b) => crate::memchr(b, haystack),
944             TwoWay(ref tw) => {
945                 // For very short haystacks (e.g., where the prefilter probably
946                 // can't run), it's faster to just run RK.
947                 if rabinkarp::is_fast(haystack, needle) {
948                     rabinkarp::find_with(&self.ninfo.nhash, haystack, needle)
949                 } else {
950                     self.find_tw(tw, state, haystack, needle)
951                 }
952             }
953             #[cfg(all(not(miri), memchr_runtime_simd))]
954             GenericSIMD128(ref gs) => {
955                 // The SIMD matcher can't handle particularly short haystacks,
956                 // so we fall back to RK in these cases.
957                 if haystack.len() < gs.min_haystack_len() {
958                     rabinkarp::find_with(&self.ninfo.nhash, haystack, needle)
959                 } else {
960                     gs.find(haystack, needle)
961                 }
962             }
963             #[cfg(all(
964                 not(miri),
965                 target_arch = "x86_64",
966                 memchr_runtime_simd
967             ))]
968             GenericSIMD256(ref gs) => {
969                 // The SIMD matcher can't handle particularly short haystacks,
970                 // so we fall back to RK in these cases.
971                 if haystack.len() < gs.min_haystack_len() {
972                     rabinkarp::find_with(&self.ninfo.nhash, haystack, needle)
973                 } else {
974                     gs.find(haystack, needle)
975                 }
976             }
977         }
978     }
979 
980     /// Calls Two-Way on the given haystack/needle.
981     ///
982     /// This is marked as unlineable since it seems to have a better overall
983     /// effect on benchmarks. However, this is one of those cases where
984     /// inlining it results an improvement in other benchmarks too, so I
985     /// suspect we just don't have enough data yet to make the right call here.
986     ///
987     /// I suspect the main problem is that this function contains two different
988     /// inlined copies of Two-Way: one with and one without prefilters enabled.
989     #[inline(never)]
find_tw( &self, tw: &twoway::Forward, state: &mut PrefilterState, haystack: &[u8], needle: &[u8], ) -> Option<usize>990     fn find_tw(
991         &self,
992         tw: &twoway::Forward,
993         state: &mut PrefilterState,
994         haystack: &[u8],
995         needle: &[u8],
996     ) -> Option<usize> {
997         if let Some(prefn) = self.prefn {
998             // We used to look at the length of a haystack here. That is, if
999             // it was too small, then don't bother with the prefilter. But two
1000             // things changed: the prefilter falls back to memchr for small
1001             // haystacks, and, above, Rabin-Karp is employed for tiny haystacks
1002             // anyway.
1003             if state.is_effective() {
1004                 let mut pre = Pre { state, prefn, ninfo: &self.ninfo };
1005                 return tw.find(Some(&mut pre), haystack, needle);
1006             }
1007         }
1008         tw.find(None, haystack, needle)
1009     }
1010 }
1011 
1012 impl NeedleInfo {
new(needle: &[u8]) -> NeedleInfo1013     pub(crate) fn new(needle: &[u8]) -> NeedleInfo {
1014         NeedleInfo {
1015             rarebytes: RareNeedleBytes::forward(needle),
1016             nhash: NeedleHash::forward(needle),
1017         }
1018     }
1019 }
1020 
1021 /// The internal implementation of a reverse substring searcher.
1022 ///
1023 /// See the forward searcher docs for more details. Currently, the reverse
1024 /// searcher is considerably simpler since it lacks prefilter support. This
1025 /// was done because it adds a lot of code, and more surface area to test. And
1026 /// in particular, it's not clear whether a prefilter on reverse searching is
1027 /// worth it. (If you have a compelling use case, please file an issue!)
1028 #[derive(Clone, Debug)]
1029 struct SearcherRev<'n> {
1030     /// The actual needle we're searching for.
1031     needle: CowBytes<'n>,
1032     /// A Rabin-Karp hash of the needle.
1033     nhash: NeedleHash,
1034     /// The actual substring implementation in use.
1035     kind: SearcherRevKind,
1036 }
1037 
1038 #[derive(Clone, Debug)]
1039 enum SearcherRevKind {
1040     /// A special case for empty needles. An empty needle always matches, even
1041     /// in an empty haystack.
1042     Empty,
1043     /// This is used whenever the needle is a single byte. In this case, we
1044     /// always use memchr.
1045     OneByte(u8),
1046     /// Two-Way is the generic work horse and is what provides our additive
1047     /// linear time guarantee. In general, it's used when the needle is bigger
1048     /// than 8 bytes or so.
1049     TwoWay(twoway::Reverse),
1050 }
1051 
1052 impl<'n> SearcherRev<'n> {
new(needle: &'n [u8]) -> SearcherRev<'n>1053     fn new(needle: &'n [u8]) -> SearcherRev<'n> {
1054         use self::SearcherRevKind::*;
1055 
1056         let kind = if needle.len() == 0 {
1057             Empty
1058         } else if needle.len() == 1 {
1059             OneByte(needle[0])
1060         } else {
1061             TwoWay(twoway::Reverse::new(needle))
1062         };
1063         SearcherRev {
1064             needle: CowBytes::new(needle),
1065             nhash: NeedleHash::reverse(needle),
1066             kind,
1067         }
1068     }
1069 
needle(&self) -> &[u8]1070     fn needle(&self) -> &[u8] {
1071         self.needle.as_slice()
1072     }
1073 
as_ref(&self) -> SearcherRev<'_>1074     fn as_ref(&self) -> SearcherRev<'_> {
1075         use self::SearcherRevKind::*;
1076 
1077         let kind = match self.kind {
1078             Empty => Empty,
1079             OneByte(b) => OneByte(b),
1080             TwoWay(tw) => TwoWay(tw),
1081         };
1082         SearcherRev {
1083             needle: CowBytes::new(self.needle()),
1084             nhash: self.nhash,
1085             kind,
1086         }
1087     }
1088 
1089     #[cfg(feature = "std")]
into_owned(self) -> SearcherRev<'static>1090     fn into_owned(self) -> SearcherRev<'static> {
1091         use self::SearcherRevKind::*;
1092 
1093         let kind = match self.kind {
1094             Empty => Empty,
1095             OneByte(b) => OneByte(b),
1096             TwoWay(tw) => TwoWay(tw),
1097         };
1098         SearcherRev {
1099             needle: self.needle.into_owned(),
1100             nhash: self.nhash,
1101             kind,
1102         }
1103     }
1104 
1105     /// Implements reverse substring search by selecting the implementation
1106     /// chosen at construction and executing it on the given haystack with the
1107     /// prefilter's current state of effectiveness.
1108     #[inline(always)]
rfind(&self, haystack: &[u8]) -> Option<usize>1109     fn rfind(&self, haystack: &[u8]) -> Option<usize> {
1110         use self::SearcherRevKind::*;
1111 
1112         let needle = self.needle();
1113         if haystack.len() < needle.len() {
1114             return None;
1115         }
1116         match self.kind {
1117             Empty => Some(haystack.len()),
1118             OneByte(b) => crate::memrchr(b, haystack),
1119             TwoWay(ref tw) => {
1120                 // For very short haystacks (e.g., where the prefilter probably
1121                 // can't run), it's faster to just run RK.
1122                 if rabinkarp::is_fast(haystack, needle) {
1123                     rabinkarp::rfind_with(&self.nhash, haystack, needle)
1124                 } else {
1125                     tw.rfind(haystack, needle)
1126                 }
1127             }
1128         }
1129     }
1130 }
1131 
1132 /// This module defines some generic quickcheck properties useful for testing
1133 /// any substring search algorithm. It also runs those properties for the
1134 /// top-level public API memmem routines. (The properties are also used to
1135 /// test various substring search implementations more granularly elsewhere as
1136 /// well.)
1137 #[cfg(all(test, feature = "std", not(miri)))]
1138 mod proptests {
1139     // N.B. This defines the quickcheck tests using the properties defined
1140     // below. Because of macro-visibility weirdness, the actual macro is
1141     // defined at the top of this file.
1142     define_memmem_quickcheck_tests!(super::find, super::rfind);
1143 
1144     /// Check that every prefix of the given byte string is a substring.
prefix_is_substring( reverse: bool, bs: &[u8], mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, ) -> bool1145     pub(crate) fn prefix_is_substring(
1146         reverse: bool,
1147         bs: &[u8],
1148         mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
1149     ) -> bool {
1150         if bs.is_empty() {
1151             return true;
1152         }
1153         for i in 0..(bs.len() - 1) {
1154             let prefix = &bs[..i];
1155             if reverse {
1156                 assert_eq!(naive_rfind(bs, prefix), search(bs, prefix));
1157             } else {
1158                 assert_eq!(naive_find(bs, prefix), search(bs, prefix));
1159             }
1160         }
1161         true
1162     }
1163 
1164     /// Check that every suffix of the given byte string is a substring.
suffix_is_substring( reverse: bool, bs: &[u8], mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, ) -> bool1165     pub(crate) fn suffix_is_substring(
1166         reverse: bool,
1167         bs: &[u8],
1168         mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
1169     ) -> bool {
1170         if bs.is_empty() {
1171             return true;
1172         }
1173         for i in 0..(bs.len() - 1) {
1174             let suffix = &bs[i..];
1175             if reverse {
1176                 assert_eq!(naive_rfind(bs, suffix), search(bs, suffix));
1177             } else {
1178                 assert_eq!(naive_find(bs, suffix), search(bs, suffix));
1179             }
1180         }
1181         true
1182     }
1183 
1184     /// Check that naive substring search matches the result of the given search
1185     /// algorithm.
matches_naive( reverse: bool, haystack: &[u8], needle: &[u8], mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, ) -> bool1186     pub(crate) fn matches_naive(
1187         reverse: bool,
1188         haystack: &[u8],
1189         needle: &[u8],
1190         mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
1191     ) -> bool {
1192         if reverse {
1193             naive_rfind(haystack, needle) == search(haystack, needle)
1194         } else {
1195             naive_find(haystack, needle) == search(haystack, needle)
1196         }
1197     }
1198 
1199     /// Naively search forwards for the given needle in the given haystack.
naive_find(haystack: &[u8], needle: &[u8]) -> Option<usize>1200     fn naive_find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
1201         if needle.is_empty() {
1202             return Some(0);
1203         } else if haystack.len() < needle.len() {
1204             return None;
1205         }
1206         for i in 0..(haystack.len() - needle.len() + 1) {
1207             if needle == &haystack[i..i + needle.len()] {
1208                 return Some(i);
1209             }
1210         }
1211         None
1212     }
1213 
1214     /// Naively search in reverse for the given needle in the given haystack.
naive_rfind(haystack: &[u8], needle: &[u8]) -> Option<usize>1215     fn naive_rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> {
1216         if needle.is_empty() {
1217             return Some(haystack.len());
1218         } else if haystack.len() < needle.len() {
1219             return None;
1220         }
1221         for i in (0..(haystack.len() - needle.len() + 1)).rev() {
1222             if needle == &haystack[i..i + needle.len()] {
1223                 return Some(i);
1224             }
1225         }
1226         None
1227     }
1228 }
1229 
1230 /// This module defines some hand-written "simple" substring tests. It
1231 /// also provides routines for easily running them on any substring search
1232 /// implementation.
1233 #[cfg(test)]
1234 mod testsimples {
1235     define_memmem_simple_tests!(super::find, super::rfind);
1236 
1237     /// Each test is a (needle, haystack, expected_fwd, expected_rev) tuple.
1238     type SearchTest =
1239         (&'static str, &'static str, Option<usize>, Option<usize>);
1240 
1241     const SEARCH_TESTS: &'static [SearchTest] = &[
1242         ("", "", Some(0), Some(0)),
1243         ("", "a", Some(0), Some(1)),
1244         ("", "ab", Some(0), Some(2)),
1245         ("", "abc", Some(0), Some(3)),
1246         ("a", "", None, None),
1247         ("a", "a", Some(0), Some(0)),
1248         ("a", "aa", Some(0), Some(1)),
1249         ("a", "ba", Some(1), Some(1)),
1250         ("a", "bba", Some(2), Some(2)),
1251         ("a", "bbba", Some(3), Some(3)),
1252         ("a", "bbbab", Some(3), Some(3)),
1253         ("a", "bbbabb", Some(3), Some(3)),
1254         ("a", "bbbabbb", Some(3), Some(3)),
1255         ("a", "bbbbbb", None, None),
1256         ("ab", "", None, None),
1257         ("ab", "a", None, None),
1258         ("ab", "b", None, None),
1259         ("ab", "ab", Some(0), Some(0)),
1260         ("ab", "aab", Some(1), Some(1)),
1261         ("ab", "aaab", Some(2), Some(2)),
1262         ("ab", "abaab", Some(0), Some(3)),
1263         ("ab", "baaab", Some(3), Some(3)),
1264         ("ab", "acb", None, None),
1265         ("ab", "abba", Some(0), Some(0)),
1266         ("abc", "ab", None, None),
1267         ("abc", "abc", Some(0), Some(0)),
1268         ("abc", "abcz", Some(0), Some(0)),
1269         ("abc", "abczz", Some(0), Some(0)),
1270         ("abc", "zabc", Some(1), Some(1)),
1271         ("abc", "zzabc", Some(2), Some(2)),
1272         ("abc", "azbc", None, None),
1273         ("abc", "abzc", None, None),
1274         ("abczdef", "abczdefzzzzzzzzzzzzzzzzzzzz", Some(0), Some(0)),
1275         ("abczdef", "zzzzzzzzzzzzzzzzzzzzabczdef", Some(20), Some(20)),
1276         ("xyz", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxyz", Some(32), Some(32)),
1277         // Failures caught by quickcheck.
1278         ("\u{0}\u{15}", "\u{0}\u{15}\u{15}\u{0}", Some(0), Some(0)),
1279         ("\u{0}\u{1e}", "\u{1e}\u{0}", None, None),
1280     ];
1281 
1282     /// Run the substring search tests. `search` should be a closure that
1283     /// accepts a haystack and a needle and returns the starting position
1284     /// of the first occurrence of needle in the haystack, or `None` if one
1285     /// doesn't exist.
run_search_tests_fwd( mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, )1286     pub(crate) fn run_search_tests_fwd(
1287         mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
1288     ) {
1289         for &(needle, haystack, expected_fwd, _) in SEARCH_TESTS {
1290             let (n, h) = (needle.as_bytes(), haystack.as_bytes());
1291             assert_eq!(
1292                 expected_fwd,
1293                 search(h, n),
1294                 "needle: {:?}, haystack: {:?}, expected: {:?}",
1295                 n,
1296                 h,
1297                 expected_fwd
1298             );
1299         }
1300     }
1301 
1302     /// Run the substring search tests. `search` should be a closure that
1303     /// accepts a haystack and a needle and returns the starting position of
1304     /// the last occurrence of needle in the haystack, or `None` if one doesn't
1305     /// exist.
run_search_tests_rev( mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>, )1306     pub(crate) fn run_search_tests_rev(
1307         mut search: impl FnMut(&[u8], &[u8]) -> Option<usize>,
1308     ) {
1309         for &(needle, haystack, _, expected_rev) in SEARCH_TESTS {
1310             let (n, h) = (needle.as_bytes(), haystack.as_bytes());
1311             assert_eq!(
1312                 expected_rev,
1313                 search(h, n),
1314                 "needle: {:?}, haystack: {:?}, expected: {:?}",
1315                 n,
1316                 h,
1317                 expected_rev
1318             );
1319         }
1320     }
1321 }
1322