• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 This module defines a common API (by convention) for all of the different
3 impls that we benchmark. The intent here is to 1) make it easy to write macros
4 for generating benchmark definitions generic over impls and 2) make it easier
5 to read the benchmarks themselves and grok how exactly each of the impls are
6 being invoked.
7 
8 The naming scheme of each function follows the pertinent parts of our benchmark
9 naming scheme (see parent module docs). Namely, it is
10 
11   {impl}/{fwd|rev}/{config}
12 
13 Where 'impl' is the underlying implementation and 'config' is the manner of
14 search. The slash indicates a module boundary. We use modules for this because
15 it makes writing macros to define benchmarks for all variants much easier.
16 */
17 
18 /// memchr's implementation of memmem. This is the implementation that we hope
19 /// does approximately as well as all other implementations, and a lot better
20 /// in at least some cases.
21 pub(crate) mod krate {
available(_: &str) -> &'static [&'static str]22     pub(crate) fn available(_: &str) -> &'static [&'static str] {
23         &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"]
24     }
25 
26     pub(crate) mod fwd {
27         use memchr::memmem;
28 
oneshot(haystack: &str, needle: &str) -> bool29         pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
30             memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
31         }
32 
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static33         pub(crate) fn prebuilt(
34             needle: &str,
35         ) -> impl Fn(&str) -> bool + 'static {
36             let finder = memmem::Finder::new(needle).into_owned();
37             move |h| finder.find(h.as_bytes()).is_some()
38         }
39 
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a40         pub(crate) fn oneshotiter<'a>(
41             haystack: &'a str,
42             needle: &'a str,
43         ) -> impl Iterator<Item = usize> + 'a {
44             memmem::find_iter(haystack.as_bytes(), needle.as_bytes())
45         }
46 
prebuiltiter(needle: &str) -> PrebuiltIter47         pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
48             PrebuiltIter(memmem::Finder::new(needle).into_owned())
49         }
50 
51         #[derive(Debug)]
52         pub(crate) struct PrebuiltIter(memmem::Finder<'static>);
53 
54         impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a55             pub(crate) fn iter<'a>(
56                 &'a self,
57                 haystack: &'a str,
58             ) -> impl Iterator<Item = usize> + 'a {
59                 self.0.find_iter(haystack.as_bytes())
60             }
61         }
62     }
63 
64     pub(crate) mod rev {
65         use memchr::memmem;
66 
oneshot(haystack: &str, needle: &str) -> bool67         pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
68             memmem::rfind(haystack.as_bytes(), needle.as_bytes()).is_some()
69         }
70 
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static71         pub(crate) fn prebuilt(
72             needle: &str,
73         ) -> impl Fn(&str) -> bool + 'static {
74             let finder = memmem::FinderRev::new(needle).into_owned();
75             move |h| finder.rfind(h.as_bytes()).is_some()
76         }
77 
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a78         pub(crate) fn oneshotiter<'a>(
79             haystack: &'a str,
80             needle: &'a str,
81         ) -> impl Iterator<Item = usize> + 'a {
82             memmem::rfind_iter(haystack.as_bytes(), needle.as_bytes())
83         }
84 
prebuiltiter(needle: &str) -> PrebuiltIter85         pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
86             PrebuiltIter(memmem::FinderRev::new(needle).into_owned())
87         }
88 
89         #[derive(Debug)]
90         pub(crate) struct PrebuiltIter(memmem::FinderRev<'static>);
91 
92         impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a93             pub(crate) fn iter<'a>(
94                 &'a self,
95                 haystack: &'a str,
96             ) -> impl Iterator<Item = usize> + 'a {
97                 self.0.rfind_iter(haystack.as_bytes())
98             }
99         }
100     }
101 }
102 
103 /// memchr's implementation of memmem, but without prefilters enabled. This
104 /// exists because sometimes prefilters aren't the right choice, and it's good
105 /// to be able to compare it against prefilter-accelerated searches to see
106 /// where this might be faster.
107 pub(crate) mod krate_nopre {
available(_: &str) -> &'static [&'static str]108     pub(crate) fn available(_: &str) -> &'static [&'static str] {
109         &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"]
110     }
111 
112     pub(crate) mod fwd {
113         use memchr::memmem;
114 
finder(needle: &[u8]) -> memmem::Finder<'_>115         fn finder(needle: &[u8]) -> memmem::Finder<'_> {
116             memmem::FinderBuilder::new()
117                 .prefilter(memmem::Prefilter::None)
118                 .build_forward(needle)
119         }
120 
oneshot(haystack: &str, needle: &str) -> bool121         pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
122             finder(needle.as_bytes()).find(haystack.as_bytes()).is_some()
123         }
124 
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static125         pub(crate) fn prebuilt(
126             needle: &str,
127         ) -> impl Fn(&str) -> bool + 'static {
128             let finder = finder(needle.as_bytes()).into_owned();
129             move |h| finder.find(h.as_bytes()).is_some()
130         }
131 
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a132         pub(crate) fn oneshotiter<'a>(
133             haystack: &'a str,
134             needle: &'a str,
135         ) -> impl Iterator<Item = usize> + 'a {
136             super::super::iter_from_find(
137                 haystack.as_bytes(),
138                 needle.as_bytes(),
139                 |h, n| finder(n).find(h),
140             )
141         }
142 
prebuiltiter(needle: &str) -> PrebuiltIter143         pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
144             PrebuiltIter(finder(needle.as_bytes()).into_owned())
145         }
146 
147         #[derive(Debug)]
148         pub(crate) struct PrebuiltIter(memmem::Finder<'static>);
149 
150         impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a151             pub(crate) fn iter<'a>(
152                 &'a self,
153                 haystack: &'a str,
154             ) -> impl Iterator<Item = usize> + 'a {
155                 self.0.find_iter(haystack.as_bytes())
156             }
157         }
158     }
159 
160     // N.B. memrmem/krate_nopre and memrmem/krate should be equivalent for now
161     // since reverse searching doesn't have any prefilter support.
162     pub(crate) mod rev {
163         use memchr::memmem;
164 
finder(needle: &[u8]) -> memmem::FinderRev<'_>165         fn finder(needle: &[u8]) -> memmem::FinderRev<'_> {
166             memmem::FinderBuilder::new()
167                 .prefilter(memmem::Prefilter::None)
168                 .build_reverse(needle)
169         }
170 
oneshot(haystack: &str, needle: &str) -> bool171         pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
172             finder(needle.as_bytes()).rfind(haystack.as_bytes()).is_some()
173         }
174 
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static175         pub(crate) fn prebuilt(
176             needle: &str,
177         ) -> impl Fn(&str) -> bool + 'static {
178             let finder = finder(needle.as_bytes()).into_owned();
179             move |h| finder.rfind(h.as_bytes()).is_some()
180         }
181 
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a182         pub(crate) fn oneshotiter<'a>(
183             haystack: &'a str,
184             needle: &'a str,
185         ) -> impl Iterator<Item = usize> + 'a {
186             super::super::iter_from_rfind(
187                 haystack.as_bytes(),
188                 needle.as_bytes(),
189                 |h, n| finder(n).rfind(h),
190             )
191         }
192 
prebuiltiter(needle: &str) -> PrebuiltIter193         pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
194             PrebuiltIter(finder(needle.as_bytes()).into_owned())
195         }
196 
197         #[derive(Debug)]
198         pub(crate) struct PrebuiltIter(memmem::FinderRev<'static>);
199 
200         impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a201             pub(crate) fn iter<'a>(
202                 &'a self,
203                 haystack: &'a str,
204             ) -> impl Iterator<Item = usize> + 'a {
205                 self.0.rfind_iter(haystack.as_bytes())
206             }
207         }
208     }
209 }
210 
211 /// bstr's implementation of memmem.
212 ///
213 /// The implementation in this crate was originally copied from bstr.
214 /// Eventually, bstr will just use the implementation in this crate, but at time
215 /// of writing, it was useful to benchmark against the "original" version.
216 pub(crate) mod bstr {
available(_: &str) -> &'static [&'static str]217     pub(crate) fn available(_: &str) -> &'static [&'static str] {
218         &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"]
219     }
220 
221     pub(crate) mod fwd {
oneshot(haystack: &str, needle: &str) -> bool222         pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
223             bstr::ByteSlice::find(haystack.as_bytes(), needle.as_bytes())
224                 .is_some()
225         }
226 
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static227         pub(crate) fn prebuilt(
228             needle: &str,
229         ) -> impl Fn(&str) -> bool + 'static {
230             let finder = bstr::Finder::new(needle).into_owned();
231             move |h| finder.find(h.as_bytes()).is_some()
232         }
233 
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a234         pub(crate) fn oneshotiter<'a>(
235             haystack: &'a str,
236             needle: &'a str,
237         ) -> impl Iterator<Item = usize> + 'a {
238             bstr::ByteSlice::find_iter(haystack.as_bytes(), needle.as_bytes())
239         }
240 
prebuiltiter(needle: &str) -> PrebuiltIter241         pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
242             PrebuiltIter(bstr::Finder::new(needle).into_owned())
243         }
244 
245         #[derive(Debug)]
246         pub(crate) struct PrebuiltIter(bstr::Finder<'static>);
247 
248         impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a249             pub(crate) fn iter<'a>(
250                 &'a self,
251                 haystack: &'a str,
252             ) -> impl Iterator<Item = usize> + 'a {
253                 super::super::iter_from_find(
254                     haystack.as_bytes(),
255                     self.0.needle(),
256                     move |h, _| self.0.find(h),
257                 )
258             }
259         }
260     }
261 
262     pub(crate) mod rev {
oneshot(haystack: &str, needle: &str) -> bool263         pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
264             bstr::ByteSlice::rfind(haystack.as_bytes(), needle.as_bytes())
265                 .is_some()
266         }
267 
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static268         pub(crate) fn prebuilt(
269             needle: &str,
270         ) -> impl Fn(&str) -> bool + 'static {
271             let finder = bstr::FinderReverse::new(needle).into_owned();
272             move |h| finder.rfind(h.as_bytes()).is_some()
273         }
274 
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a275         pub(crate) fn oneshotiter<'a>(
276             haystack: &'a str,
277             needle: &'a str,
278         ) -> impl Iterator<Item = usize> + 'a {
279             bstr::ByteSlice::rfind_iter(haystack.as_bytes(), needle.as_bytes())
280         }
281 
prebuiltiter(needle: &str) -> PrebuiltIter282         pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
283             PrebuiltIter(bstr::FinderReverse::new(needle).into_owned())
284         }
285 
286         #[derive(Debug)]
287         pub(crate) struct PrebuiltIter(bstr::FinderReverse<'static>);
288 
289         impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a290             pub(crate) fn iter<'a>(
291                 &'a self,
292                 haystack: &'a str,
293             ) -> impl Iterator<Item = usize> + 'a {
294                 super::super::iter_from_rfind(
295                     haystack.as_bytes(),
296                     self.0.needle(),
297                     move |h, _| self.0.rfind(h),
298                 )
299             }
300         }
301     }
302 }
303 
304 /// regex's implementation of substring search.
305 ///
306 /// regex is where the concept of using heuristics based on an a priori
307 /// assumption of byte frequency originated. Eventually, regex will just use the
308 /// implementation in this crate, but it will still be useful to benchmark since
309 /// regex tends to have higher latency. It would be good to measure that.
310 ///
311 /// For regex, we don't provide oneshots, since that requires compiling the
312 /// regex which we know is going to be ridiculously slow. No real need to
313 /// measure it I think.
314 pub(crate) mod regex {
available(_: &str) -> &'static [&'static str]315     pub(crate) fn available(_: &str) -> &'static [&'static str] {
316         &["prebuilt", "prebuiltiter"]
317     }
318 
319     pub(crate) mod fwd {
oneshot(_haystack: &str, _needle: &str) -> bool320         pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
321             unimplemented!("regex does not support oneshot searches")
322         }
323 
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static324         pub(crate) fn prebuilt(
325             needle: &str,
326         ) -> impl Fn(&str) -> bool + 'static {
327             let finder = regex::Regex::new(&regex::escape(needle)).unwrap();
328             move |h| finder.is_match(h)
329         }
330 
oneshotiter( _haystack: &str, _needle: &str, ) -> impl Iterator<Item = usize> + 'static331         pub(crate) fn oneshotiter(
332             _haystack: &str,
333             _needle: &str,
334         ) -> impl Iterator<Item = usize> + 'static {
335             std::iter::from_fn(move || {
336                 unimplemented!("regex does not support oneshot searches")
337             })
338         }
339 
prebuiltiter(needle: &str) -> PrebuiltIter340         pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
341             PrebuiltIter(regex::Regex::new(&regex::escape(needle)).unwrap())
342         }
343 
344         #[derive(Debug)]
345         pub(crate) struct PrebuiltIter(regex::Regex);
346 
347         impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a348             pub(crate) fn iter<'a>(
349                 &'a self,
350                 haystack: &'a str,
351             ) -> impl Iterator<Item = usize> + 'a {
352                 self.0.find_iter(haystack).map(|m| m.start())
353             }
354         }
355     }
356 
357     pub(crate) mod rev {
oneshot(_haystack: &str, _needle: &str) -> bool358         pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
359             unimplemented!("regex does not support reverse searches")
360         }
361 
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static362         pub(crate) fn prebuilt(
363             _needle: &str,
364         ) -> impl Fn(&str) -> bool + 'static {
365             |_| unimplemented!("regex does not support reverse searches")
366         }
367 
oneshotiter( _haystack: &str, _needle: &str, ) -> impl Iterator<Item = usize> + 'static368         pub(crate) fn oneshotiter(
369             _haystack: &str,
370             _needle: &str,
371         ) -> impl Iterator<Item = usize> + 'static {
372             std::iter::from_fn(move || {
373                 unimplemented!("regex does not support reverse searches")
374             })
375         }
376 
prebuiltiter(_needle: &str) -> super::super::NoIter377         pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
378             unimplemented!("regex does not support reverse searches")
379         }
380     }
381 }
382 
383 /// std's substring search implementation.
384 ///
385 /// std uses Two-Way like this crate, but doesn't have any prefilter
386 /// heuristics.
387 ///
388 /// std doesn't have any way to amortize the construction of the searcher, so
389 /// we can't implement any of the prebuilt routines.
390 pub(crate) mod stud {
available(_: &str) -> &'static [&'static str]391     pub(crate) fn available(_: &str) -> &'static [&'static str] {
392         &["reverse", "oneshot", "oneshotiter"]
393     }
394 
395     pub(crate) mod fwd {
oneshot(haystack: &str, needle: &str) -> bool396         pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
397             haystack.contains(needle)
398         }
399 
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static400         pub(crate) fn prebuilt(
401             _needle: &str,
402         ) -> impl Fn(&str) -> bool + 'static {
403             |_| unimplemented!("std does not support prebuilt searches")
404         }
405 
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a406         pub(crate) fn oneshotiter<'a>(
407             haystack: &'a str,
408             needle: &'a str,
409         ) -> impl Iterator<Item = usize> + 'a {
410             haystack.match_indices(needle).map(|(i, _)| i)
411         }
412 
prebuiltiter(_needle: &str) -> super::super::NoIter413         pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
414             super::super::NoIter { imp: "std" }
415         }
416     }
417 
418     pub(crate) mod rev {
oneshot(haystack: &str, needle: &str) -> bool419         pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
420             haystack.contains(needle)
421         }
422 
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static423         pub(crate) fn prebuilt(
424             _needle: &str,
425         ) -> impl Fn(&str) -> bool + 'static {
426             |_| unimplemented!("std does not support prebuilt searches")
427         }
428 
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a429         pub(crate) fn oneshotiter<'a>(
430             haystack: &'a str,
431             needle: &'a str,
432         ) -> impl Iterator<Item = usize> + 'a {
433             haystack.rmatch_indices(needle).map(|(i, _)| i)
434         }
435 
prebuiltiter(_needle: &str) -> super::super::NoIter436         pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
437             super::super::NoIter { imp: "std" }
438         }
439     }
440 }
441 
442 /// Substring search from the twoway crate.
443 ///
444 /// twoway uses, obviously, Two-Way as an implementation. AIUI, it was taken
445 /// from std at some point but heavily modified to support a prefilter via
446 /// PCMPESTRI from the SSE 4.2 ISA extension. (And also uses memchr for
447 /// single-byte needles.)
448 ///
449 /// Like std, there is no way to amortize the construction of the searcher, so
450 /// we can't implement any of the prebuilt routines.
451 pub(crate) mod twoway {
available(_: &str) -> &'static [&'static str]452     pub(crate) fn available(_: &str) -> &'static [&'static str] {
453         &["reverse", "oneshot", "oneshotiter"]
454     }
455 
456     pub(crate) mod fwd {
oneshot(haystack: &str, needle: &str) -> bool457         pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
458             twoway::find_bytes(haystack.as_bytes(), needle.as_bytes())
459                 .is_some()
460         }
461 
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static462         pub(crate) fn prebuilt(
463             _needle: &str,
464         ) -> impl Fn(&str) -> bool + 'static {
465             |_| unimplemented!("twoway does not support prebuilt searches")
466         }
467 
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a468         pub(crate) fn oneshotiter<'a>(
469             haystack: &'a str,
470             needle: &'a str,
471         ) -> impl Iterator<Item = usize> + 'a {
472             super::super::iter_from_find(
473                 haystack.as_bytes(),
474                 needle.as_bytes(),
475                 twoway::find_bytes,
476             )
477         }
478 
prebuiltiter(_needle: &str) -> super::super::NoIter479         pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
480             super::super::NoIter { imp: "twoway" }
481         }
482     }
483 
484     pub(crate) mod rev {
oneshot(haystack: &str, needle: &str) -> bool485         pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
486             twoway::rfind_bytes(haystack.as_bytes(), needle.as_bytes())
487                 .is_some()
488         }
489 
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static490         pub(crate) fn prebuilt(
491             _needle: &str,
492         ) -> impl Fn(&str) -> bool + 'static {
493             |_| unimplemented!("twoway does not support prebuilt searches")
494         }
495 
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a496         pub(crate) fn oneshotiter<'a>(
497             haystack: &'a str,
498             needle: &'a str,
499         ) -> impl Iterator<Item = usize> + 'a {
500             super::super::iter_from_rfind(
501                 haystack.as_bytes(),
502                 needle.as_bytes(),
503                 twoway::rfind_bytes,
504             )
505         }
506 
prebuiltiter(_needle: &str) -> super::super::NoIter507         pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
508             super::super::NoIter { imp: "twoway" }
509         }
510     }
511 }
512 
513 /// Substring search from the sliceslice crate.
514 ///
515 /// This crate is what inspired me to write a vectorized memmem implementation
516 /// in the memchr crate in the first place. In particular, it exposed some
517 /// serious weaknesses in my implementation in the bstr crate.
518 ///
519 /// sliceslice doesn't actually do anything "new" other
520 /// than bringing a long known SIMD algorithm to Rust:
521 /// http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd
522 ///
523 /// The main thrust of the algorithm is that it picks a couple of bytes in the
524 /// needle and uses SIMD to check whether those two bytes occur in the haystack
525 /// in a way that could lead to a match. If so, then you do a simple memcmp
526 /// confirmation step. The main problem with this algorithm is that its worst
527 /// case is multiplicative: that confirmatory step can become quite costly if
528 /// the SIMD prefilter isn't effective. The elegance of this method, however,
529 /// is that the prefilter is routinely effective.
530 ///
531 /// The essence of memchr's implementation of memmem comes from sliceslice,
532 /// but also from regex's original idea to use heuristics based on an a priori
533 /// assumption of relative byte frequency AND from bstr's desire to have a
534 /// constant space and worst case O(m+n) substring search. My claim is that
535 /// it is the best of all words, and that's why this benchmark suite is so
536 /// comprehensive. There are a lot of cases and implementations to test.
537 ///
538 /// NOTE: The API of sliceslice is quite constrained. My guess is that it was
539 /// designed for a very specific use case, and the API is heavily constrained
540 /// to that use case (whatever it is). While its API doesn't provide any
541 /// oneshot routines, we emulate them. (Its main problem is that every such
542 /// search requires copying the needle into a fresh allocation. The memchr
543 /// crate avoids that problem by being generic over the needle: it can be owned
544 /// or borrowed.) Also, since the API only enables testing whether a substring
545 /// exists or not, we can't benchmark iteration.
546 ///
547 /// NOTE: sliceslice only works on x86_64 CPUs with AVX enabled. So not only
548 /// do we conditionally compile the routines below, but we only run these
549 /// benchmarks when AVX2 is available.
550 #[cfg(target_arch = "x86_64")]
551 pub(crate) mod sliceslice {
available(needle: &str) -> &'static [&'static str]552     pub(crate) fn available(needle: &str) -> &'static [&'static str] {
553         // Apparently sliceslice doesn't support searching with an empty
554         // needle. Sheesh.
555         if !needle.is_empty() && is_x86_feature_detected!("avx2") {
556             &["oneshot", "prebuilt"]
557         } else {
558             &[]
559         }
560     }
561 
562     pub(crate) mod fwd {
oneshot(haystack: &str, needle: &str) -> bool563         pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
564             if !is_x86_feature_detected!("avx2") {
565                 unreachable!("sliceslice cannot be called without avx2");
566             }
567             let needle = needle.as_bytes();
568             // SAFETY: This code path is only entered when AVX2 is enabled,
569             // which is the only requirement for using DynamicAvx2Searcher.
570             unsafe {
571                 let finder = sliceslice::x86::DynamicAvx2Searcher::new(needle);
572                 finder.search_in(haystack.as_bytes())
573             }
574         }
575 
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static576         pub(crate) fn prebuilt(
577             needle: &str,
578         ) -> impl Fn(&str) -> bool + 'static {
579             if !is_x86_feature_detected!("avx2") {
580                 unreachable!("sliceslice cannot be called without avx2");
581             }
582             let needle = needle.as_bytes().to_vec();
583             // SAFETY: This code path is only entered when AVX2 is enabled,
584             // which is the only requirement for using DynamicAvx2Searcher.
585             unsafe {
586                 let finder = sliceslice::x86::DynamicAvx2Searcher::new(needle);
587                 move |h| finder.search_in(h.as_bytes())
588             }
589         }
590 
oneshotiter( _haystack: &str, _needle: &str, ) -> impl Iterator<Item = usize> + 'static591         pub(crate) fn oneshotiter(
592             _haystack: &str,
593             _needle: &str,
594         ) -> impl Iterator<Item = usize> + 'static {
595             std::iter::from_fn(move || {
596                 unimplemented!("sliceslice doesn't not support iteration")
597             })
598         }
599 
prebuiltiter(_needle: &str) -> super::super::NoIter600         pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
601             unimplemented!("sliceslice doesn't support prebuilt iteration")
602         }
603     }
604 
605     pub(crate) mod rev {
oneshot(_haystack: &str, _needle: &str) -> bool606         pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
607             unimplemented!("sliceslice does not support reverse searches")
608         }
609 
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static610         pub(crate) fn prebuilt(
611             _needle: &str,
612         ) -> impl Fn(&str) -> bool + 'static {
613             |_| unimplemented!("sliceslice does not support reverse searches")
614         }
615 
oneshotiter( _haystack: &str, _needle: &str, ) -> impl Iterator<Item = usize> + 'static616         pub(crate) fn oneshotiter(
617             _haystack: &str,
618             _needle: &str,
619         ) -> impl Iterator<Item = usize> + 'static {
620             std::iter::from_fn(move || {
621                 unimplemented!("sliceslice does not support reverse searches")
622             })
623         }
624 
prebuiltiter(_needle: &str) -> super::super::NoIter625         pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
626             unimplemented!("sliceslice does not support reverse searches")
627         }
628     }
629 }
630 
631 #[cfg(not(target_arch = "x86_64"))]
632 pub(crate) mod sliceslice {
available(_: &str) -> &'static [&'static str]633     pub(crate) fn available(_: &str) -> &'static [&'static str] {
634         &[]
635     }
636 
637     pub(crate) mod fwd {
oneshot(_: &str, _: &str) -> bool638         pub(crate) fn oneshot(_: &str, _: &str) -> bool {
639             unimplemented!("sliceslice only runs on x86")
640         }
641 
prebuilt(_: &str) -> impl Fn(&str) -> bool + 'static642         pub(crate) fn prebuilt(_: &str) -> impl Fn(&str) -> bool + 'static {
643             if true {
644                 unimplemented!("sliceslice only runs on x86")
645             }
646             |_| false
647         }
648 
oneshotiter<'a>( _haystack: &'a str, _needle: &'a str, ) -> impl Iterator<Item = usize> + 'static649         pub(crate) fn oneshotiter<'a>(
650             _haystack: &'a str,
651             _needle: &'a str,
652         ) -> impl Iterator<Item = usize> + 'static {
653             std::iter::from_fn(move || {
654                 unimplemented!("sliceslice only runs on x86")
655             })
656         }
657 
prebuiltiter(_needle: &str) -> super::super::NoIter658         pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
659             unimplemented!("sliceslice only runs on x86")
660         }
661     }
662 
663     pub(crate) mod rev {
oneshot(_haystack: &str, _needle: &str) -> bool664         pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
665             unimplemented!("sliceslice does not support reverse searches")
666         }
667 
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static668         pub(crate) fn prebuilt(
669             _needle: &str,
670         ) -> impl Fn(&str) -> bool + 'static {
671             |_| unimplemented!("sliceslice does not support reverse searches")
672         }
673 
oneshotiter( _haystack: &str, _needle: &str, ) -> impl Iterator<Item = usize> + 'static674         pub(crate) fn oneshotiter(
675             _haystack: &str,
676             _needle: &str,
677         ) -> impl Iterator<Item = usize> + 'static {
678             std::iter::from_fn(move || {
679                 unimplemented!("sliceslice does not support reverse searches")
680             })
681         }
682 
prebuiltiter(_needle: &str) -> super::super::NoIter683         pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
684             unimplemented!("sliceslice does not support reverse searches")
685         }
686     }
687 }
688 
689 /// libc's substring search implementation.
690 ///
691 /// libc doesn't have any way to amortize the construction of the searcher, so
692 /// we can't implement any of the prebuilt routines.
693 pub(crate) mod libc {
available(_: &str) -> &'static [&'static str]694     pub(crate) fn available(_: &str) -> &'static [&'static str] {
695         &["oneshot", "oneshotiter"]
696     }
697 
698     pub(crate) mod fwd {
699         #[cfg(target_arch = "wasm32")]
700         extern "C" {
memmem( haystack: *const libc::c_void, haystack_len: usize, needle: *const libc::c_void, needle_len: usize, ) -> *const libc::c_void701             fn memmem(
702                 haystack: *const libc::c_void,
703                 haystack_len: usize,
704                 needle: *const libc::c_void,
705                 needle_len: usize,
706             ) -> *const libc::c_void;
707         }
708         #[cfg(not(target_arch = "wasm32"))]
709         use libc::memmem;
710 
find(haystack: &[u8], needle: &[u8]) -> Option<usize>711         fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
712             let p = unsafe {
713                 memmem(
714                     haystack.as_ptr() as *const libc::c_void,
715                     haystack.len(),
716                     needle.as_ptr() as *const libc::c_void,
717                     needle.len(),
718                 )
719             };
720             if p.is_null() {
721                 None
722             } else {
723                 Some(p as usize - (haystack.as_ptr() as usize))
724             }
725         }
726 
oneshot(haystack: &str, needle: &str) -> bool727         pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
728             find(haystack.as_bytes(), needle.as_bytes()).is_some()
729         }
730 
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static731         pub(crate) fn prebuilt(
732             _needle: &str,
733         ) -> impl Fn(&str) -> bool + 'static {
734             |_| unimplemented!("std does not support prebuilt searches")
735         }
736 
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a737         pub(crate) fn oneshotiter<'a>(
738             haystack: &'a str,
739             needle: &'a str,
740         ) -> impl Iterator<Item = usize> + 'a {
741             super::super::iter_from_find(
742                 haystack.as_bytes(),
743                 needle.as_bytes(),
744                 find,
745             )
746         }
747 
prebuiltiter(_needle: &str) -> super::super::NoIter748         pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
749             super::super::NoIter { imp: "libc" }
750         }
751     }
752 
753     pub(crate) mod rev {
oneshot(_haystack: &str, _needle: &str) -> bool754         pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
755             unimplemented!("libc does not support reverse searches")
756         }
757 
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static758         pub(crate) fn prebuilt(
759             _needle: &str,
760         ) -> impl Fn(&str) -> bool + 'static {
761             |_| unimplemented!("libc does not support reverse searches")
762         }
763 
oneshotiter<'a>( _haystack: &'a str, _needle: &'a str, ) -> impl Iterator<Item = usize> + 'a764         pub(crate) fn oneshotiter<'a>(
765             _haystack: &'a str,
766             _needle: &'a str,
767         ) -> impl Iterator<Item = usize> + 'a {
768             std::iter::from_fn(move || {
769                 unimplemented!("libc does not support reverse searches")
770             })
771         }
772 
prebuiltiter(_needle: &str) -> super::super::NoIter773         pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
774             unimplemented!("libc does not support reverse searches")
775         }
776     }
777 }
778 
779 /// An iterator that looks like a PrebuilIter API-wise, but panics if it's
780 /// called. This should be used for implementations that don't support
781 /// prebuilt iteration.
782 #[derive(Debug)]
783 pub(crate) struct NoIter {
784     /// The name of the impl to use in the panic message in case it is invoked
785     /// by mistake. (But the benchmark harness should not invoke it, assuming
786     /// each impl's 'available' function is correct.
787     imp: &'static str,
788 }
789 
790 impl NoIter {
iter( &self, _: &str, ) -> impl Iterator<Item = usize> + 'static791     pub(crate) fn iter(
792         &self,
793         _: &str,
794     ) -> impl Iterator<Item = usize> + 'static {
795         let imp = self.imp;
796         std::iter::from_fn(move || {
797             unimplemented!("{} does not support prebuilt iteration", imp)
798         })
799     }
800 }
801 
802 /// Accepts a corpus and a needle and a routine that implements substring
803 /// search, and returns an iterator over all matches. This is useful for
804 /// benchmarking "find all matches" for substring search implementations that
805 /// don't expose a native way to do this.
806 ///
807 /// The closure given takes two parameters: the corpus and needle, in that
808 /// order.
iter_from_find<'a>( haystack: &'a [u8], needle: &'a [u8], mut find: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a, ) -> impl Iterator<Item = usize> + 'a809 fn iter_from_find<'a>(
810     haystack: &'a [u8],
811     needle: &'a [u8],
812     mut find: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a,
813 ) -> impl Iterator<Item = usize> + 'a {
814     let mut pos = 0;
815     std::iter::from_fn(move || {
816         if pos > haystack.len() {
817             return None;
818         }
819         match find(&haystack[pos..], needle) {
820             None => None,
821             Some(i) => {
822                 let found = pos + i;
823                 // We always need to add at least 1, in case of an empty needle.
824                 pos += i + std::cmp::max(1, needle.len());
825                 Some(found)
826             }
827         }
828     })
829 }
830 
831 /// Like iter_from_find, but for reverse searching.
iter_from_rfind<'a>( haystack: &'a [u8], needle: &'a [u8], mut rfind: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a, ) -> impl Iterator<Item = usize> + 'a832 fn iter_from_rfind<'a>(
833     haystack: &'a [u8],
834     needle: &'a [u8],
835     mut rfind: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a,
836 ) -> impl Iterator<Item = usize> + 'a {
837     let mut pos = Some(haystack.len());
838     std::iter::from_fn(move || {
839         let end = match pos {
840             None => return None,
841             Some(end) => end,
842         };
843         match rfind(&haystack[..end], needle) {
844             None => None,
845             Some(i) => {
846                 if end == i {
847                     // We always need to subtract at least 1, in case of an
848                     // empty needle.
849                     pos = end.checked_sub(1);
850                 } else {
851                     pos = Some(i);
852                 }
853                 Some(i)
854             }
855         }
856     })
857 }
858