1 /*
2 This module defines a common API (by convention) for all of the different
3 impls that we benchmark. The intent here is to 1) make it easy to write macros
4 for generating benchmark definitions generic over impls and 2) make it easier
5 to read the benchmarks themselves and grok how exactly each of the impls are
6 being invoked.
7
8 The naming scheme of each function follows the pertinent parts of our benchmark
9 naming scheme (see parent module docs). Namely, it is
10
11 {impl}/{fwd|rev}/{config}
12
13 Where 'impl' is the underlying implementation and 'config' is the manner of
14 search. The slash indicates a module boundary. We use modules for this because
15 it makes writing macros to define benchmarks for all variants much easier.
16 */
17
18 /// memchr's implementation of memmem. This is the implementation that we hope
19 /// does approximately as well as all other implementations, and a lot better
20 /// in at least some cases.
21 pub(crate) mod krate {
available(_: &str) -> &'static [&'static str]22 pub(crate) fn available(_: &str) -> &'static [&'static str] {
23 &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"]
24 }
25
26 pub(crate) mod fwd {
27 use memchr::memmem;
28
oneshot(haystack: &str, needle: &str) -> bool29 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
30 memmem::find(haystack.as_bytes(), needle.as_bytes()).is_some()
31 }
32
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static33 pub(crate) fn prebuilt(
34 needle: &str,
35 ) -> impl Fn(&str) -> bool + 'static {
36 let finder = memmem::Finder::new(needle).into_owned();
37 move |h| finder.find(h.as_bytes()).is_some()
38 }
39
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a40 pub(crate) fn oneshotiter<'a>(
41 haystack: &'a str,
42 needle: &'a str,
43 ) -> impl Iterator<Item = usize> + 'a {
44 memmem::find_iter(haystack.as_bytes(), needle.as_bytes())
45 }
46
prebuiltiter(needle: &str) -> PrebuiltIter47 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
48 PrebuiltIter(memmem::Finder::new(needle).into_owned())
49 }
50
51 #[derive(Debug)]
52 pub(crate) struct PrebuiltIter(memmem::Finder<'static>);
53
54 impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a55 pub(crate) fn iter<'a>(
56 &'a self,
57 haystack: &'a str,
58 ) -> impl Iterator<Item = usize> + 'a {
59 self.0.find_iter(haystack.as_bytes())
60 }
61 }
62 }
63
64 pub(crate) mod rev {
65 use memchr::memmem;
66
oneshot(haystack: &str, needle: &str) -> bool67 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
68 memmem::rfind(haystack.as_bytes(), needle.as_bytes()).is_some()
69 }
70
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static71 pub(crate) fn prebuilt(
72 needle: &str,
73 ) -> impl Fn(&str) -> bool + 'static {
74 let finder = memmem::FinderRev::new(needle).into_owned();
75 move |h| finder.rfind(h.as_bytes()).is_some()
76 }
77
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a78 pub(crate) fn oneshotiter<'a>(
79 haystack: &'a str,
80 needle: &'a str,
81 ) -> impl Iterator<Item = usize> + 'a {
82 memmem::rfind_iter(haystack.as_bytes(), needle.as_bytes())
83 }
84
prebuiltiter(needle: &str) -> PrebuiltIter85 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
86 PrebuiltIter(memmem::FinderRev::new(needle).into_owned())
87 }
88
89 #[derive(Debug)]
90 pub(crate) struct PrebuiltIter(memmem::FinderRev<'static>);
91
92 impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a93 pub(crate) fn iter<'a>(
94 &'a self,
95 haystack: &'a str,
96 ) -> impl Iterator<Item = usize> + 'a {
97 self.0.rfind_iter(haystack.as_bytes())
98 }
99 }
100 }
101 }
102
103 /// memchr's implementation of memmem, but without prefilters enabled. This
104 /// exists because sometimes prefilters aren't the right choice, and it's good
105 /// to be able to compare it against prefilter-accelerated searches to see
106 /// where this might be faster.
107 pub(crate) mod krate_nopre {
available(_: &str) -> &'static [&'static str]108 pub(crate) fn available(_: &str) -> &'static [&'static str] {
109 &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"]
110 }
111
112 pub(crate) mod fwd {
113 use memchr::memmem;
114
finder(needle: &[u8]) -> memmem::Finder<'_>115 fn finder(needle: &[u8]) -> memmem::Finder<'_> {
116 memmem::FinderBuilder::new()
117 .prefilter(memmem::Prefilter::None)
118 .build_forward(needle)
119 }
120
oneshot(haystack: &str, needle: &str) -> bool121 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
122 finder(needle.as_bytes()).find(haystack.as_bytes()).is_some()
123 }
124
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static125 pub(crate) fn prebuilt(
126 needle: &str,
127 ) -> impl Fn(&str) -> bool + 'static {
128 let finder = finder(needle.as_bytes()).into_owned();
129 move |h| finder.find(h.as_bytes()).is_some()
130 }
131
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a132 pub(crate) fn oneshotiter<'a>(
133 haystack: &'a str,
134 needle: &'a str,
135 ) -> impl Iterator<Item = usize> + 'a {
136 super::super::iter_from_find(
137 haystack.as_bytes(),
138 needle.as_bytes(),
139 |h, n| finder(n).find(h),
140 )
141 }
142
prebuiltiter(needle: &str) -> PrebuiltIter143 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
144 PrebuiltIter(finder(needle.as_bytes()).into_owned())
145 }
146
147 #[derive(Debug)]
148 pub(crate) struct PrebuiltIter(memmem::Finder<'static>);
149
150 impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a151 pub(crate) fn iter<'a>(
152 &'a self,
153 haystack: &'a str,
154 ) -> impl Iterator<Item = usize> + 'a {
155 self.0.find_iter(haystack.as_bytes())
156 }
157 }
158 }
159
160 // N.B. memrmem/krate_nopre and memrmem/krate should be equivalent for now
161 // since reverse searching doesn't have any prefilter support.
162 pub(crate) mod rev {
163 use memchr::memmem;
164
finder(needle: &[u8]) -> memmem::FinderRev<'_>165 fn finder(needle: &[u8]) -> memmem::FinderRev<'_> {
166 memmem::FinderBuilder::new()
167 .prefilter(memmem::Prefilter::None)
168 .build_reverse(needle)
169 }
170
oneshot(haystack: &str, needle: &str) -> bool171 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
172 finder(needle.as_bytes()).rfind(haystack.as_bytes()).is_some()
173 }
174
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static175 pub(crate) fn prebuilt(
176 needle: &str,
177 ) -> impl Fn(&str) -> bool + 'static {
178 let finder = finder(needle.as_bytes()).into_owned();
179 move |h| finder.rfind(h.as_bytes()).is_some()
180 }
181
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a182 pub(crate) fn oneshotiter<'a>(
183 haystack: &'a str,
184 needle: &'a str,
185 ) -> impl Iterator<Item = usize> + 'a {
186 super::super::iter_from_rfind(
187 haystack.as_bytes(),
188 needle.as_bytes(),
189 |h, n| finder(n).rfind(h),
190 )
191 }
192
prebuiltiter(needle: &str) -> PrebuiltIter193 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
194 PrebuiltIter(finder(needle.as_bytes()).into_owned())
195 }
196
197 #[derive(Debug)]
198 pub(crate) struct PrebuiltIter(memmem::FinderRev<'static>);
199
200 impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a201 pub(crate) fn iter<'a>(
202 &'a self,
203 haystack: &'a str,
204 ) -> impl Iterator<Item = usize> + 'a {
205 self.0.rfind_iter(haystack.as_bytes())
206 }
207 }
208 }
209 }
210
211 /// bstr's implementation of memmem.
212 ///
213 /// The implementation in this crate was originally copied from bstr.
214 /// Eventually, bstr will just use the implementation in this crate, but at time
215 /// of writing, it was useful to benchmark against the "original" version.
216 pub(crate) mod bstr {
available(_: &str) -> &'static [&'static str]217 pub(crate) fn available(_: &str) -> &'static [&'static str] {
218 &["reverse", "oneshot", "prebuilt", "oneshotiter", "prebuiltiter"]
219 }
220
221 pub(crate) mod fwd {
oneshot(haystack: &str, needle: &str) -> bool222 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
223 bstr::ByteSlice::find(haystack.as_bytes(), needle.as_bytes())
224 .is_some()
225 }
226
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static227 pub(crate) fn prebuilt(
228 needle: &str,
229 ) -> impl Fn(&str) -> bool + 'static {
230 let finder = bstr::Finder::new(needle).into_owned();
231 move |h| finder.find(h.as_bytes()).is_some()
232 }
233
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a234 pub(crate) fn oneshotiter<'a>(
235 haystack: &'a str,
236 needle: &'a str,
237 ) -> impl Iterator<Item = usize> + 'a {
238 bstr::ByteSlice::find_iter(haystack.as_bytes(), needle.as_bytes())
239 }
240
prebuiltiter(needle: &str) -> PrebuiltIter241 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
242 PrebuiltIter(bstr::Finder::new(needle).into_owned())
243 }
244
245 #[derive(Debug)]
246 pub(crate) struct PrebuiltIter(bstr::Finder<'static>);
247
248 impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a249 pub(crate) fn iter<'a>(
250 &'a self,
251 haystack: &'a str,
252 ) -> impl Iterator<Item = usize> + 'a {
253 super::super::iter_from_find(
254 haystack.as_bytes(),
255 self.0.needle(),
256 move |h, _| self.0.find(h),
257 )
258 }
259 }
260 }
261
262 pub(crate) mod rev {
oneshot(haystack: &str, needle: &str) -> bool263 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
264 bstr::ByteSlice::rfind(haystack.as_bytes(), needle.as_bytes())
265 .is_some()
266 }
267
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static268 pub(crate) fn prebuilt(
269 needle: &str,
270 ) -> impl Fn(&str) -> bool + 'static {
271 let finder = bstr::FinderReverse::new(needle).into_owned();
272 move |h| finder.rfind(h.as_bytes()).is_some()
273 }
274
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a275 pub(crate) fn oneshotiter<'a>(
276 haystack: &'a str,
277 needle: &'a str,
278 ) -> impl Iterator<Item = usize> + 'a {
279 bstr::ByteSlice::rfind_iter(haystack.as_bytes(), needle.as_bytes())
280 }
281
prebuiltiter(needle: &str) -> PrebuiltIter282 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
283 PrebuiltIter(bstr::FinderReverse::new(needle).into_owned())
284 }
285
286 #[derive(Debug)]
287 pub(crate) struct PrebuiltIter(bstr::FinderReverse<'static>);
288
289 impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a290 pub(crate) fn iter<'a>(
291 &'a self,
292 haystack: &'a str,
293 ) -> impl Iterator<Item = usize> + 'a {
294 super::super::iter_from_rfind(
295 haystack.as_bytes(),
296 self.0.needle(),
297 move |h, _| self.0.rfind(h),
298 )
299 }
300 }
301 }
302 }
303
304 /// regex's implementation of substring search.
305 ///
306 /// regex is where the concept of using heuristics based on an a priori
307 /// assumption of byte frequency originated. Eventually, regex will just use the
308 /// implementation in this crate, but it will still be useful to benchmark since
309 /// regex tends to have higher latency. It would be good to measure that.
310 ///
311 /// For regex, we don't provide oneshots, since that requires compiling the
312 /// regex which we know is going to be ridiculously slow. No real need to
313 /// measure it I think.
314 pub(crate) mod regex {
available(_: &str) -> &'static [&'static str]315 pub(crate) fn available(_: &str) -> &'static [&'static str] {
316 &["prebuilt", "prebuiltiter"]
317 }
318
319 pub(crate) mod fwd {
oneshot(_haystack: &str, _needle: &str) -> bool320 pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
321 unimplemented!("regex does not support oneshot searches")
322 }
323
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static324 pub(crate) fn prebuilt(
325 needle: &str,
326 ) -> impl Fn(&str) -> bool + 'static {
327 let finder = regex::Regex::new(®ex::escape(needle)).unwrap();
328 move |h| finder.is_match(h)
329 }
330
oneshotiter( _haystack: &str, _needle: &str, ) -> impl Iterator<Item = usize> + 'static331 pub(crate) fn oneshotiter(
332 _haystack: &str,
333 _needle: &str,
334 ) -> impl Iterator<Item = usize> + 'static {
335 std::iter::from_fn(move || {
336 unimplemented!("regex does not support oneshot searches")
337 })
338 }
339
prebuiltiter(needle: &str) -> PrebuiltIter340 pub(crate) fn prebuiltiter(needle: &str) -> PrebuiltIter {
341 PrebuiltIter(regex::Regex::new(®ex::escape(needle)).unwrap())
342 }
343
344 #[derive(Debug)]
345 pub(crate) struct PrebuiltIter(regex::Regex);
346
347 impl PrebuiltIter {
iter<'a>( &'a self, haystack: &'a str, ) -> impl Iterator<Item = usize> + 'a348 pub(crate) fn iter<'a>(
349 &'a self,
350 haystack: &'a str,
351 ) -> impl Iterator<Item = usize> + 'a {
352 self.0.find_iter(haystack).map(|m| m.start())
353 }
354 }
355 }
356
357 pub(crate) mod rev {
oneshot(_haystack: &str, _needle: &str) -> bool358 pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
359 unimplemented!("regex does not support reverse searches")
360 }
361
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static362 pub(crate) fn prebuilt(
363 _needle: &str,
364 ) -> impl Fn(&str) -> bool + 'static {
365 |_| unimplemented!("regex does not support reverse searches")
366 }
367
oneshotiter( _haystack: &str, _needle: &str, ) -> impl Iterator<Item = usize> + 'static368 pub(crate) fn oneshotiter(
369 _haystack: &str,
370 _needle: &str,
371 ) -> impl Iterator<Item = usize> + 'static {
372 std::iter::from_fn(move || {
373 unimplemented!("regex does not support reverse searches")
374 })
375 }
376
prebuiltiter(_needle: &str) -> super::super::NoIter377 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
378 unimplemented!("regex does not support reverse searches")
379 }
380 }
381 }
382
383 /// std's substring search implementation.
384 ///
385 /// std uses Two-Way like this crate, but doesn't have any prefilter
386 /// heuristics.
387 ///
388 /// std doesn't have any way to amortize the construction of the searcher, so
389 /// we can't implement any of the prebuilt routines.
390 pub(crate) mod stud {
available(_: &str) -> &'static [&'static str]391 pub(crate) fn available(_: &str) -> &'static [&'static str] {
392 &["reverse", "oneshot", "oneshotiter"]
393 }
394
395 pub(crate) mod fwd {
oneshot(haystack: &str, needle: &str) -> bool396 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
397 haystack.contains(needle)
398 }
399
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static400 pub(crate) fn prebuilt(
401 _needle: &str,
402 ) -> impl Fn(&str) -> bool + 'static {
403 |_| unimplemented!("std does not support prebuilt searches")
404 }
405
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a406 pub(crate) fn oneshotiter<'a>(
407 haystack: &'a str,
408 needle: &'a str,
409 ) -> impl Iterator<Item = usize> + 'a {
410 haystack.match_indices(needle).map(|(i, _)| i)
411 }
412
prebuiltiter(_needle: &str) -> super::super::NoIter413 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
414 super::super::NoIter { imp: "std" }
415 }
416 }
417
418 pub(crate) mod rev {
oneshot(haystack: &str, needle: &str) -> bool419 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
420 haystack.contains(needle)
421 }
422
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static423 pub(crate) fn prebuilt(
424 _needle: &str,
425 ) -> impl Fn(&str) -> bool + 'static {
426 |_| unimplemented!("std does not support prebuilt searches")
427 }
428
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a429 pub(crate) fn oneshotiter<'a>(
430 haystack: &'a str,
431 needle: &'a str,
432 ) -> impl Iterator<Item = usize> + 'a {
433 haystack.rmatch_indices(needle).map(|(i, _)| i)
434 }
435
prebuiltiter(_needle: &str) -> super::super::NoIter436 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
437 super::super::NoIter { imp: "std" }
438 }
439 }
440 }
441
442 /// Substring search from the twoway crate.
443 ///
444 /// twoway uses, obviously, Two-Way as an implementation. AIUI, it was taken
445 /// from std at some point but heavily modified to support a prefilter via
446 /// PCMPESTRI from the SSE 4.2 ISA extension. (And also uses memchr for
447 /// single-byte needles.)
448 ///
449 /// Like std, there is no way to amortize the construction of the searcher, so
450 /// we can't implement any of the prebuilt routines.
451 pub(crate) mod twoway {
available(_: &str) -> &'static [&'static str]452 pub(crate) fn available(_: &str) -> &'static [&'static str] {
453 &["reverse", "oneshot", "oneshotiter"]
454 }
455
456 pub(crate) mod fwd {
oneshot(haystack: &str, needle: &str) -> bool457 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
458 twoway::find_bytes(haystack.as_bytes(), needle.as_bytes())
459 .is_some()
460 }
461
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static462 pub(crate) fn prebuilt(
463 _needle: &str,
464 ) -> impl Fn(&str) -> bool + 'static {
465 |_| unimplemented!("twoway does not support prebuilt searches")
466 }
467
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a468 pub(crate) fn oneshotiter<'a>(
469 haystack: &'a str,
470 needle: &'a str,
471 ) -> impl Iterator<Item = usize> + 'a {
472 super::super::iter_from_find(
473 haystack.as_bytes(),
474 needle.as_bytes(),
475 twoway::find_bytes,
476 )
477 }
478
prebuiltiter(_needle: &str) -> super::super::NoIter479 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
480 super::super::NoIter { imp: "twoway" }
481 }
482 }
483
484 pub(crate) mod rev {
oneshot(haystack: &str, needle: &str) -> bool485 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
486 twoway::rfind_bytes(haystack.as_bytes(), needle.as_bytes())
487 .is_some()
488 }
489
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static490 pub(crate) fn prebuilt(
491 _needle: &str,
492 ) -> impl Fn(&str) -> bool + 'static {
493 |_| unimplemented!("twoway does not support prebuilt searches")
494 }
495
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a496 pub(crate) fn oneshotiter<'a>(
497 haystack: &'a str,
498 needle: &'a str,
499 ) -> impl Iterator<Item = usize> + 'a {
500 super::super::iter_from_rfind(
501 haystack.as_bytes(),
502 needle.as_bytes(),
503 twoway::rfind_bytes,
504 )
505 }
506
prebuiltiter(_needle: &str) -> super::super::NoIter507 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
508 super::super::NoIter { imp: "twoway" }
509 }
510 }
511 }
512
513 /// Substring search from the sliceslice crate.
514 ///
515 /// This crate is what inspired me to write a vectorized memmem implementation
516 /// in the memchr crate in the first place. In particular, it exposed some
517 /// serious weaknesses in my implementation in the bstr crate.
518 ///
519 /// sliceslice doesn't actually do anything "new" other
520 /// than bringing a long known SIMD algorithm to Rust:
521 /// http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd
522 ///
523 /// The main thrust of the algorithm is that it picks a couple of bytes in the
524 /// needle and uses SIMD to check whether those two bytes occur in the haystack
525 /// in a way that could lead to a match. If so, then you do a simple memcmp
526 /// confirmation step. The main problem with this algorithm is that its worst
527 /// case is multiplicative: that confirmatory step can become quite costly if
528 /// the SIMD prefilter isn't effective. The elegance of this method, however,
529 /// is that the prefilter is routinely effective.
530 ///
531 /// The essence of memchr's implementation of memmem comes from sliceslice,
532 /// but also from regex's original idea to use heuristics based on an a priori
533 /// assumption of relative byte frequency AND from bstr's desire to have a
534 /// constant space and worst case O(m+n) substring search. My claim is that
535 /// it is the best of all words, and that's why this benchmark suite is so
536 /// comprehensive. There are a lot of cases and implementations to test.
537 ///
538 /// NOTE: The API of sliceslice is quite constrained. My guess is that it was
539 /// designed for a very specific use case, and the API is heavily constrained
540 /// to that use case (whatever it is). While its API doesn't provide any
541 /// oneshot routines, we emulate them. (Its main problem is that every such
542 /// search requires copying the needle into a fresh allocation. The memchr
543 /// crate avoids that problem by being generic over the needle: it can be owned
544 /// or borrowed.) Also, since the API only enables testing whether a substring
545 /// exists or not, we can't benchmark iteration.
546 ///
547 /// NOTE: sliceslice only works on x86_64 CPUs with AVX enabled. So not only
548 /// do we conditionally compile the routines below, but we only run these
549 /// benchmarks when AVX2 is available.
550 #[cfg(target_arch = "x86_64")]
551 pub(crate) mod sliceslice {
available(needle: &str) -> &'static [&'static str]552 pub(crate) fn available(needle: &str) -> &'static [&'static str] {
553 // Apparently sliceslice doesn't support searching with an empty
554 // needle. Sheesh.
555 if !needle.is_empty() && is_x86_feature_detected!("avx2") {
556 &["oneshot", "prebuilt"]
557 } else {
558 &[]
559 }
560 }
561
562 pub(crate) mod fwd {
oneshot(haystack: &str, needle: &str) -> bool563 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
564 if !is_x86_feature_detected!("avx2") {
565 unreachable!("sliceslice cannot be called without avx2");
566 }
567 let needle = needle.as_bytes();
568 // SAFETY: This code path is only entered when AVX2 is enabled,
569 // which is the only requirement for using DynamicAvx2Searcher.
570 unsafe {
571 let finder = sliceslice::x86::DynamicAvx2Searcher::new(needle);
572 finder.search_in(haystack.as_bytes())
573 }
574 }
575
prebuilt( needle: &str, ) -> impl Fn(&str) -> bool + 'static576 pub(crate) fn prebuilt(
577 needle: &str,
578 ) -> impl Fn(&str) -> bool + 'static {
579 if !is_x86_feature_detected!("avx2") {
580 unreachable!("sliceslice cannot be called without avx2");
581 }
582 let needle = needle.as_bytes().to_vec();
583 // SAFETY: This code path is only entered when AVX2 is enabled,
584 // which is the only requirement for using DynamicAvx2Searcher.
585 unsafe {
586 let finder = sliceslice::x86::DynamicAvx2Searcher::new(needle);
587 move |h| finder.search_in(h.as_bytes())
588 }
589 }
590
oneshotiter( _haystack: &str, _needle: &str, ) -> impl Iterator<Item = usize> + 'static591 pub(crate) fn oneshotiter(
592 _haystack: &str,
593 _needle: &str,
594 ) -> impl Iterator<Item = usize> + 'static {
595 std::iter::from_fn(move || {
596 unimplemented!("sliceslice doesn't not support iteration")
597 })
598 }
599
prebuiltiter(_needle: &str) -> super::super::NoIter600 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
601 unimplemented!("sliceslice doesn't support prebuilt iteration")
602 }
603 }
604
605 pub(crate) mod rev {
oneshot(_haystack: &str, _needle: &str) -> bool606 pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
607 unimplemented!("sliceslice does not support reverse searches")
608 }
609
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static610 pub(crate) fn prebuilt(
611 _needle: &str,
612 ) -> impl Fn(&str) -> bool + 'static {
613 |_| unimplemented!("sliceslice does not support reverse searches")
614 }
615
oneshotiter( _haystack: &str, _needle: &str, ) -> impl Iterator<Item = usize> + 'static616 pub(crate) fn oneshotiter(
617 _haystack: &str,
618 _needle: &str,
619 ) -> impl Iterator<Item = usize> + 'static {
620 std::iter::from_fn(move || {
621 unimplemented!("sliceslice does not support reverse searches")
622 })
623 }
624
prebuiltiter(_needle: &str) -> super::super::NoIter625 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
626 unimplemented!("sliceslice does not support reverse searches")
627 }
628 }
629 }
630
631 #[cfg(not(target_arch = "x86_64"))]
632 pub(crate) mod sliceslice {
available(_: &str) -> &'static [&'static str]633 pub(crate) fn available(_: &str) -> &'static [&'static str] {
634 &[]
635 }
636
637 pub(crate) mod fwd {
oneshot(_: &str, _: &str) -> bool638 pub(crate) fn oneshot(_: &str, _: &str) -> bool {
639 unimplemented!("sliceslice only runs on x86")
640 }
641
prebuilt(_: &str) -> impl Fn(&str) -> bool + 'static642 pub(crate) fn prebuilt(_: &str) -> impl Fn(&str) -> bool + 'static {
643 if true {
644 unimplemented!("sliceslice only runs on x86")
645 }
646 |_| false
647 }
648
oneshotiter<'a>( _haystack: &'a str, _needle: &'a str, ) -> impl Iterator<Item = usize> + 'static649 pub(crate) fn oneshotiter<'a>(
650 _haystack: &'a str,
651 _needle: &'a str,
652 ) -> impl Iterator<Item = usize> + 'static {
653 std::iter::from_fn(move || {
654 unimplemented!("sliceslice only runs on x86")
655 })
656 }
657
prebuiltiter(_needle: &str) -> super::super::NoIter658 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
659 unimplemented!("sliceslice only runs on x86")
660 }
661 }
662
663 pub(crate) mod rev {
oneshot(_haystack: &str, _needle: &str) -> bool664 pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
665 unimplemented!("sliceslice does not support reverse searches")
666 }
667
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static668 pub(crate) fn prebuilt(
669 _needle: &str,
670 ) -> impl Fn(&str) -> bool + 'static {
671 |_| unimplemented!("sliceslice does not support reverse searches")
672 }
673
oneshotiter( _haystack: &str, _needle: &str, ) -> impl Iterator<Item = usize> + 'static674 pub(crate) fn oneshotiter(
675 _haystack: &str,
676 _needle: &str,
677 ) -> impl Iterator<Item = usize> + 'static {
678 std::iter::from_fn(move || {
679 unimplemented!("sliceslice does not support reverse searches")
680 })
681 }
682
prebuiltiter(_needle: &str) -> super::super::NoIter683 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
684 unimplemented!("sliceslice does not support reverse searches")
685 }
686 }
687 }
688
689 /// libc's substring search implementation.
690 ///
691 /// libc doesn't have any way to amortize the construction of the searcher, so
692 /// we can't implement any of the prebuilt routines.
693 pub(crate) mod libc {
available(_: &str) -> &'static [&'static str]694 pub(crate) fn available(_: &str) -> &'static [&'static str] {
695 &["oneshot", "oneshotiter"]
696 }
697
698 pub(crate) mod fwd {
699 #[cfg(target_arch = "wasm32")]
700 extern "C" {
memmem( haystack: *const libc::c_void, haystack_len: usize, needle: *const libc::c_void, needle_len: usize, ) -> *const libc::c_void701 fn memmem(
702 haystack: *const libc::c_void,
703 haystack_len: usize,
704 needle: *const libc::c_void,
705 needle_len: usize,
706 ) -> *const libc::c_void;
707 }
708 #[cfg(not(target_arch = "wasm32"))]
709 use libc::memmem;
710
find(haystack: &[u8], needle: &[u8]) -> Option<usize>711 fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
712 let p = unsafe {
713 memmem(
714 haystack.as_ptr() as *const libc::c_void,
715 haystack.len(),
716 needle.as_ptr() as *const libc::c_void,
717 needle.len(),
718 )
719 };
720 if p.is_null() {
721 None
722 } else {
723 Some(p as usize - (haystack.as_ptr() as usize))
724 }
725 }
726
oneshot(haystack: &str, needle: &str) -> bool727 pub(crate) fn oneshot(haystack: &str, needle: &str) -> bool {
728 find(haystack.as_bytes(), needle.as_bytes()).is_some()
729 }
730
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static731 pub(crate) fn prebuilt(
732 _needle: &str,
733 ) -> impl Fn(&str) -> bool + 'static {
734 |_| unimplemented!("std does not support prebuilt searches")
735 }
736
oneshotiter<'a>( haystack: &'a str, needle: &'a str, ) -> impl Iterator<Item = usize> + 'a737 pub(crate) fn oneshotiter<'a>(
738 haystack: &'a str,
739 needle: &'a str,
740 ) -> impl Iterator<Item = usize> + 'a {
741 super::super::iter_from_find(
742 haystack.as_bytes(),
743 needle.as_bytes(),
744 find,
745 )
746 }
747
prebuiltiter(_needle: &str) -> super::super::NoIter748 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
749 super::super::NoIter { imp: "libc" }
750 }
751 }
752
753 pub(crate) mod rev {
oneshot(_haystack: &str, _needle: &str) -> bool754 pub(crate) fn oneshot(_haystack: &str, _needle: &str) -> bool {
755 unimplemented!("libc does not support reverse searches")
756 }
757
prebuilt( _needle: &str, ) -> impl Fn(&str) -> bool + 'static758 pub(crate) fn prebuilt(
759 _needle: &str,
760 ) -> impl Fn(&str) -> bool + 'static {
761 |_| unimplemented!("libc does not support reverse searches")
762 }
763
oneshotiter<'a>( _haystack: &'a str, _needle: &'a str, ) -> impl Iterator<Item = usize> + 'a764 pub(crate) fn oneshotiter<'a>(
765 _haystack: &'a str,
766 _needle: &'a str,
767 ) -> impl Iterator<Item = usize> + 'a {
768 std::iter::from_fn(move || {
769 unimplemented!("libc does not support reverse searches")
770 })
771 }
772
prebuiltiter(_needle: &str) -> super::super::NoIter773 pub(crate) fn prebuiltiter(_needle: &str) -> super::super::NoIter {
774 unimplemented!("libc does not support reverse searches")
775 }
776 }
777 }
778
779 /// An iterator that looks like a PrebuilIter API-wise, but panics if it's
780 /// called. This should be used for implementations that don't support
781 /// prebuilt iteration.
782 #[derive(Debug)]
783 pub(crate) struct NoIter {
784 /// The name of the impl to use in the panic message in case it is invoked
785 /// by mistake. (But the benchmark harness should not invoke it, assuming
786 /// each impl's 'available' function is correct.
787 imp: &'static str,
788 }
789
790 impl NoIter {
iter( &self, _: &str, ) -> impl Iterator<Item = usize> + 'static791 pub(crate) fn iter(
792 &self,
793 _: &str,
794 ) -> impl Iterator<Item = usize> + 'static {
795 let imp = self.imp;
796 std::iter::from_fn(move || {
797 unimplemented!("{} does not support prebuilt iteration", imp)
798 })
799 }
800 }
801
802 /// Accepts a corpus and a needle and a routine that implements substring
803 /// search, and returns an iterator over all matches. This is useful for
804 /// benchmarking "find all matches" for substring search implementations that
805 /// don't expose a native way to do this.
806 ///
807 /// The closure given takes two parameters: the corpus and needle, in that
808 /// order.
iter_from_find<'a>( haystack: &'a [u8], needle: &'a [u8], mut find: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a, ) -> impl Iterator<Item = usize> + 'a809 fn iter_from_find<'a>(
810 haystack: &'a [u8],
811 needle: &'a [u8],
812 mut find: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a,
813 ) -> impl Iterator<Item = usize> + 'a {
814 let mut pos = 0;
815 std::iter::from_fn(move || {
816 if pos > haystack.len() {
817 return None;
818 }
819 match find(&haystack[pos..], needle) {
820 None => None,
821 Some(i) => {
822 let found = pos + i;
823 // We always need to add at least 1, in case of an empty needle.
824 pos += i + std::cmp::max(1, needle.len());
825 Some(found)
826 }
827 }
828 })
829 }
830
831 /// Like iter_from_find, but for reverse searching.
iter_from_rfind<'a>( haystack: &'a [u8], needle: &'a [u8], mut rfind: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a, ) -> impl Iterator<Item = usize> + 'a832 fn iter_from_rfind<'a>(
833 haystack: &'a [u8],
834 needle: &'a [u8],
835 mut rfind: impl FnMut(&[u8], &[u8]) -> Option<usize> + 'a,
836 ) -> impl Iterator<Item = usize> + 'a {
837 let mut pos = Some(haystack.len());
838 std::iter::from_fn(move || {
839 let end = match pos {
840 None => return None,
841 Some(end) => end,
842 };
843 match rfind(&haystack[..end], needle) {
844 None => None,
845 Some(i) => {
846 if end == i {
847 // We always need to subtract at least 1, in case of an
848 // empty needle.
849 pos = end.checked_sub(1);
850 } else {
851 pos = Some(i);
852 }
853 Some(i)
854 }
855 }
856 })
857 }
858