• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Parallel iterator types for [strings][std::str]
2 //!
3 //! You will rarely need to interact with this module directly unless you need
4 //! to name one of the iterator types.
5 //!
6 //! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
7 //! reference a `Pattern` trait which is not visible outside this crate.
8 //! This trait is intentionally kept private, for use only by Rayon itself.
9 //! It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
10 //! and any function or closure `F: Fn(char) -> bool + Sync + Send`.
11 //!
12 //! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
13 //! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
14 //!
15 //! [std::str]: https://doc.rust-lang.org/stable/std/str/
16 
17 use crate::iter::plumbing::*;
18 use crate::iter::*;
19 use crate::split_producer::*;
20 
21 /// Test if a byte is the start of a UTF-8 character.
22 /// (extracted from `str::is_char_boundary`)
23 #[inline]
is_char_boundary(b: u8) -> bool24 fn is_char_boundary(b: u8) -> bool {
25     // This is bit magic equivalent to: b < 128 || b >= 192
26     (b as i8) >= -0x40
27 }
28 
29 /// Find the index of a character boundary near the midpoint.
30 #[inline]
find_char_midpoint(chars: &str) -> usize31 fn find_char_midpoint(chars: &str) -> usize {
32     let mid = chars.len() / 2;
33 
34     // We want to split near the midpoint, but we need to find an actual
35     // character boundary.  So we look at the raw bytes, first scanning
36     // forward from the midpoint for a boundary, then trying backward.
37     let (left, right) = chars.as_bytes().split_at(mid);
38     match right.iter().copied().position(is_char_boundary) {
39         Some(i) => mid + i,
40         None => left
41             .iter()
42             .copied()
43             .rposition(is_char_boundary)
44             .unwrap_or(0),
45     }
46 }
47 
48 /// Try to split a string near the midpoint.
49 #[inline]
split(chars: &str) -> Option<(&str, &str)>50 fn split(chars: &str) -> Option<(&str, &str)> {
51     let index = find_char_midpoint(chars);
52     if index > 0 {
53         Some(chars.split_at(index))
54     } else {
55         None
56     }
57 }
58 
59 /// Parallel extensions for strings.
60 pub trait ParallelString {
61     /// Returns a plain string slice, which is used to implement the rest of
62     /// the parallel methods.
as_parallel_string(&self) -> &str63     fn as_parallel_string(&self) -> &str;
64 
65     /// Returns a parallel iterator over the characters of a string.
66     ///
67     /// # Examples
68     ///
69     /// ```
70     /// use rayon::prelude::*;
71     /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
72     /// assert_eq!(Some('o'), max);
73     /// ```
par_chars(&self) -> Chars<'_>74     fn par_chars(&self) -> Chars<'_> {
75         Chars {
76             chars: self.as_parallel_string(),
77         }
78     }
79 
80     /// Returns a parallel iterator over the characters of a string, with their positions.
81     ///
82     /// # Examples
83     ///
84     /// ```
85     /// use rayon::prelude::*;
86     /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
87     /// assert_eq!(Some((1, 'e')), min);
88     /// ```
par_char_indices(&self) -> CharIndices<'_>89     fn par_char_indices(&self) -> CharIndices<'_> {
90         CharIndices {
91             chars: self.as_parallel_string(),
92         }
93     }
94 
95     /// Returns a parallel iterator over the bytes of a string.
96     ///
97     /// Note that multi-byte sequences (for code points greater than `U+007F`)
98     /// are produced as separate items, but will not be split across threads.
99     /// If you would prefer an indexed iterator without that guarantee, consider
100     /// `string.as_bytes().par_iter().copied()` instead.
101     ///
102     /// # Examples
103     ///
104     /// ```
105     /// use rayon::prelude::*;
106     /// let max = "hello".par_bytes().max();
107     /// assert_eq!(Some(b'o'), max);
108     /// ```
par_bytes(&self) -> Bytes<'_>109     fn par_bytes(&self) -> Bytes<'_> {
110         Bytes {
111             chars: self.as_parallel_string(),
112         }
113     }
114 
115     /// Returns a parallel iterator over a string encoded as UTF-16.
116     ///
117     /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
118     /// produced as separate items, but will not be split across threads.
119     ///
120     /// # Examples
121     ///
122     /// ```
123     /// use rayon::prelude::*;
124     ///
125     /// let max = "hello".par_encode_utf16().max();
126     /// assert_eq!(Some(b'o' as u16), max);
127     ///
128     /// let text = "Zażółć gęślą jaźń";
129     /// let utf8_len = text.len();
130     /// let utf16_len = text.par_encode_utf16().count();
131     /// assert!(utf16_len <= utf8_len);
132     /// ```
par_encode_utf16(&self) -> EncodeUtf16<'_>133     fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
134         EncodeUtf16 {
135             chars: self.as_parallel_string(),
136         }
137     }
138 
139     /// Returns a parallel iterator over substrings separated by a
140     /// given character or predicate, similar to `str::split`.
141     ///
142     /// Note: the `Pattern` trait is private, for use only by Rayon itself.
143     /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
144     /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
145     ///
146     /// # Examples
147     ///
148     /// ```
149     /// use rayon::prelude::*;
150     /// let total = "1, 2, buckle, 3, 4, door"
151     ///    .par_split(',')
152     ///    .filter_map(|s| s.trim().parse::<i32>().ok())
153     ///    .sum();
154     /// assert_eq!(10, total);
155     /// ```
par_split<P: Pattern>(&self, separator: P) -> Split<'_, P>156     fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
157         Split::new(self.as_parallel_string(), separator)
158     }
159 
160     /// Returns a parallel iterator over substrings separated by a
161     /// given character or predicate, keeping the matched part as a terminator
162     /// of the substring similar to `str::split_inclusive`.
163     ///
164     /// Note: the `Pattern` trait is private, for use only by Rayon itself.
165     /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
166     /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
167     ///
168     /// # Examples
169     ///
170     /// ```
171     /// use rayon::prelude::*;
172     /// let lines: Vec<_> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
173     ///    .par_split_inclusive('\n')
174     ///    .collect();
175     /// assert_eq!(lines, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
176     /// ```
par_split_inclusive<P: Pattern>(&self, separator: P) -> SplitInclusive<'_, P>177     fn par_split_inclusive<P: Pattern>(&self, separator: P) -> SplitInclusive<'_, P> {
178         SplitInclusive::new(self.as_parallel_string(), separator)
179     }
180 
181     /// Returns a parallel iterator over substrings terminated by a
182     /// given character or predicate, similar to `str::split_terminator`.
183     /// It's equivalent to `par_split`, except it doesn't produce an empty
184     /// substring after a trailing terminator.
185     ///
186     /// Note: the `Pattern` trait is private, for use only by Rayon itself.
187     /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
188     /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
189     ///
190     /// # Examples
191     ///
192     /// ```
193     /// use rayon::prelude::*;
194     /// let parts: Vec<_> = "((1 + 3) * 2)"
195     ///     .par_split_terminator(|c| c == '(' || c == ')')
196     ///     .collect();
197     /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
198     /// ```
par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P>199     fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
200         SplitTerminator::new(self.as_parallel_string(), terminator)
201     }
202 
203     /// Returns a parallel iterator over the lines of a string, ending with an
204     /// optional carriage return and with a newline (`\r\n` or just `\n`).
205     /// The final line ending is optional, and line endings are not included in
206     /// the output strings.
207     ///
208     /// # Examples
209     ///
210     /// ```
211     /// use rayon::prelude::*;
212     /// let lengths: Vec<_> = "hello world\nfizbuzz"
213     ///     .par_lines()
214     ///     .map(|l| l.len())
215     ///     .collect();
216     /// assert_eq!(vec![11, 7], lengths);
217     /// ```
par_lines(&self) -> Lines<'_>218     fn par_lines(&self) -> Lines<'_> {
219         Lines(self.as_parallel_string())
220     }
221 
222     /// Returns a parallel iterator over the sub-slices of a string that are
223     /// separated by any amount of whitespace.
224     ///
225     /// As with `str::split_whitespace`, 'whitespace' is defined according to
226     /// the terms of the Unicode Derived Core Property `White_Space`.
227     /// If you only want to split on ASCII whitespace instead, use
228     /// [`par_split_ascii_whitespace`][`ParallelString::par_split_ascii_whitespace`].
229     ///
230     /// # Examples
231     ///
232     /// ```
233     /// use rayon::prelude::*;
234     /// let longest = "which is the longest word?"
235     ///     .par_split_whitespace()
236     ///     .max_by_key(|word| word.len());
237     /// assert_eq!(Some("longest"), longest);
238     /// ```
239     ///
240     /// All kinds of whitespace are considered:
241     ///
242     /// ```
243     /// use rayon::prelude::*;
244     /// let words: Vec<&str> = " Mary   had\ta\u{2009}little  \n\t lamb"
245     ///     .par_split_whitespace()
246     ///     .collect();
247     /// assert_eq!(words, ["Mary", "had", "a", "little", "lamb"]);
248     /// ```
249     ///
250     /// If the string is empty or all whitespace, the iterator yields no string slices:
251     ///
252     /// ```
253     /// use rayon::prelude::*;
254     /// assert_eq!("".par_split_whitespace().count(), 0);
255     /// assert_eq!("   ".par_split_whitespace().count(), 0);
256     /// ```
par_split_whitespace(&self) -> SplitWhitespace<'_>257     fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
258         SplitWhitespace(self.as_parallel_string())
259     }
260 
261     /// Returns a parallel iterator over the sub-slices of a string that are
262     /// separated by any amount of ASCII whitespace.
263     ///
264     /// To split by Unicode `White_Space` instead, use
265     /// [`par_split_whitespace`][`ParallelString::par_split_whitespace`].
266     ///
267     /// # Examples
268     ///
269     /// ```
270     /// use rayon::prelude::*;
271     /// let longest = "which is the longest word?"
272     ///     .par_split_ascii_whitespace()
273     ///     .max_by_key(|word| word.len());
274     /// assert_eq!(Some("longest"), longest);
275     /// ```
276     ///
277     /// All kinds of ASCII whitespace are considered, but not Unicode `White_Space`:
278     ///
279     /// ```
280     /// use rayon::prelude::*;
281     /// let words: Vec<&str> = " Mary   had\ta\u{2009}little  \n\t lamb"
282     ///     .par_split_ascii_whitespace()
283     ///     .collect();
284     /// assert_eq!(words, ["Mary", "had", "a\u{2009}little", "lamb"]);
285     /// ```
286     ///
287     /// If the string is empty or all ASCII whitespace, the iterator yields no string slices:
288     ///
289     /// ```
290     /// use rayon::prelude::*;
291     /// assert_eq!("".par_split_whitespace().count(), 0);
292     /// assert_eq!("   ".par_split_whitespace().count(), 0);
293     /// ```
par_split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_>294     fn par_split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
295         SplitAsciiWhitespace(self.as_parallel_string())
296     }
297 
298     /// Returns a parallel iterator over substrings that match a
299     /// given character or predicate, similar to `str::matches`.
300     ///
301     /// Note: the `Pattern` trait is private, for use only by Rayon itself.
302     /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
303     /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
304     ///
305     /// # Examples
306     ///
307     /// ```
308     /// use rayon::prelude::*;
309     /// let total = "1, 2, buckle, 3, 4, door"
310     ///    .par_matches(char::is_numeric)
311     ///    .map(|s| s.parse::<i32>().expect("digit"))
312     ///    .sum();
313     /// assert_eq!(10, total);
314     /// ```
par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P>315     fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
316         Matches {
317             chars: self.as_parallel_string(),
318             pattern,
319         }
320     }
321 
322     /// Returns a parallel iterator over substrings that match a given character
323     /// or predicate, with their positions, similar to `str::match_indices`.
324     ///
325     /// Note: the `Pattern` trait is private, for use only by Rayon itself.
326     /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
327     /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
328     ///
329     /// # Examples
330     ///
331     /// ```
332     /// use rayon::prelude::*;
333     /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
334     ///    .par_match_indices(char::is_numeric)
335     ///    .collect();
336     /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
337     /// ```
par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P>338     fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
339         MatchIndices {
340             chars: self.as_parallel_string(),
341             pattern,
342         }
343     }
344 }
345 
346 impl ParallelString for str {
347     #[inline]
as_parallel_string(&self) -> &str348     fn as_parallel_string(&self) -> &str {
349         self
350     }
351 }
352 
353 // /////////////////////////////////////////////////////////////////////////
354 
355 /// We hide the `Pattern` trait in a private module, as its API is not meant
356 /// for general consumption.  If we could have privacy on trait items, then it
357 /// would be nicer to have its basic existence and implementors public while
358 /// keeping all of the methods private.
359 mod private {
360     use crate::iter::plumbing::Folder;
361 
362     /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
363     /// `std::str::pattern::{Pattern, Searcher}`.
364     ///
365     /// Implementing this trait is not permitted outside of `rayon`.
366     pub trait Pattern: Sized + Sync + Send {
367         private_decl! {}
find_in(&self, haystack: &str) -> Option<usize>368         fn find_in(&self, haystack: &str) -> Option<usize>;
rfind_in(&self, haystack: &str) -> Option<usize>369         fn rfind_in(&self, haystack: &str) -> Option<usize>;
is_suffix_of(&self, haystack: &str) -> bool370         fn is_suffix_of(&self, haystack: &str) -> bool;
fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F where F: Folder<&'ch str>371         fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
372         where
373             F: Folder<&'ch str>;
fold_inclusive_splits<'ch, F>(&self, haystack: &'ch str, folder: F) -> F where F: Folder<&'ch str>374         fn fold_inclusive_splits<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
375         where
376             F: Folder<&'ch str>;
fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F where F: Folder<&'ch str>377         fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
378         where
379             F: Folder<&'ch str>;
fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F where F: Folder<(usize, &'ch str)>380         fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
381         where
382             F: Folder<(usize, &'ch str)>;
383     }
384 }
385 use self::private::Pattern;
386 
387 #[inline]
offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T)388 fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
389     move |(i, x)| (base + i, x)
390 }
391 
392 macro_rules! impl_pattern {
393     (&$self:ident => $pattern:expr) => {
394         private_impl! {}
395 
396         #[inline]
397         fn find_in(&$self, chars: &str) -> Option<usize> {
398             chars.find($pattern)
399         }
400 
401         #[inline]
402         fn rfind_in(&$self, chars: &str) -> Option<usize> {
403             chars.rfind($pattern)
404         }
405 
406         #[inline]
407         fn is_suffix_of(&$self, chars: &str) -> bool {
408             chars.ends_with($pattern)
409         }
410 
411         fn fold_splits<'ch, F>(&$self, chars: &'ch str, folder: F, skip_last: bool) -> F
412         where
413             F: Folder<&'ch str>,
414         {
415             let mut split = chars.split($pattern);
416             if skip_last {
417                 split.next_back();
418             }
419             folder.consume_iter(split)
420         }
421 
422         fn fold_inclusive_splits<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
423         where
424             F: Folder<&'ch str>,
425         {
426             folder.consume_iter(chars.split_inclusive($pattern))
427         }
428 
429         fn fold_matches<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
430         where
431             F: Folder<&'ch str>,
432         {
433             folder.consume_iter(chars.matches($pattern))
434         }
435 
436         fn fold_match_indices<'ch, F>(&$self, chars: &'ch str, folder: F, base: usize) -> F
437         where
438             F: Folder<(usize, &'ch str)>,
439         {
440             folder.consume_iter(chars.match_indices($pattern).map(offset(base)))
441         }
442     }
443 }
444 
445 impl Pattern for char {
446     impl_pattern!(&self => *self);
447 }
448 
449 impl Pattern for &[char] {
450     impl_pattern!(&self => *self);
451 }
452 
453 // TODO (MSRV 1.75): use `*self` for array patterns too.
454 // - Needs `DoubleEndedSearcher` so `split.next_back()` works.
455 
456 impl<const N: usize> Pattern for [char; N] {
457     impl_pattern!(&self => self.as_slice());
458 }
459 
460 impl<const N: usize> Pattern for &[char; N] {
461     impl_pattern!(&self => self.as_slice());
462 }
463 
464 impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
465     impl_pattern!(&self => self);
466 }
467 
468 // /////////////////////////////////////////////////////////////////////////
469 
470 /// Parallel iterator over the characters of a string
471 #[derive(Debug, Clone)]
472 pub struct Chars<'ch> {
473     chars: &'ch str,
474 }
475 
476 struct CharsProducer<'ch> {
477     chars: &'ch str,
478 }
479 
480 impl<'ch> ParallelIterator for Chars<'ch> {
481     type Item = char;
482 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,483     fn drive_unindexed<C>(self, consumer: C) -> C::Result
484     where
485         C: UnindexedConsumer<Self::Item>,
486     {
487         bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
488     }
489 }
490 
491 impl<'ch> UnindexedProducer for CharsProducer<'ch> {
492     type Item = char;
493 
split(self) -> (Self, Option<Self>)494     fn split(self) -> (Self, Option<Self>) {
495         match split(self.chars) {
496             Some((left, right)) => (
497                 CharsProducer { chars: left },
498                 Some(CharsProducer { chars: right }),
499             ),
500             None => (self, None),
501         }
502     }
503 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,504     fn fold_with<F>(self, folder: F) -> F
505     where
506         F: Folder<Self::Item>,
507     {
508         folder.consume_iter(self.chars.chars())
509     }
510 }
511 
512 // /////////////////////////////////////////////////////////////////////////
513 
514 /// Parallel iterator over the characters of a string, with their positions
515 #[derive(Debug, Clone)]
516 pub struct CharIndices<'ch> {
517     chars: &'ch str,
518 }
519 
520 struct CharIndicesProducer<'ch> {
521     index: usize,
522     chars: &'ch str,
523 }
524 
525 impl<'ch> ParallelIterator for CharIndices<'ch> {
526     type Item = (usize, char);
527 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,528     fn drive_unindexed<C>(self, consumer: C) -> C::Result
529     where
530         C: UnindexedConsumer<Self::Item>,
531     {
532         let producer = CharIndicesProducer {
533             index: 0,
534             chars: self.chars,
535         };
536         bridge_unindexed(producer, consumer)
537     }
538 }
539 
540 impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
541     type Item = (usize, char);
542 
split(self) -> (Self, Option<Self>)543     fn split(self) -> (Self, Option<Self>) {
544         match split(self.chars) {
545             Some((left, right)) => (
546                 CharIndicesProducer {
547                     chars: left,
548                     ..self
549                 },
550                 Some(CharIndicesProducer {
551                     chars: right,
552                     index: self.index + left.len(),
553                 }),
554             ),
555             None => (self, None),
556         }
557     }
558 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,559     fn fold_with<F>(self, folder: F) -> F
560     where
561         F: Folder<Self::Item>,
562     {
563         let base = self.index;
564         folder.consume_iter(self.chars.char_indices().map(offset(base)))
565     }
566 }
567 
568 // /////////////////////////////////////////////////////////////////////////
569 
570 /// Parallel iterator over the bytes of a string
571 #[derive(Debug, Clone)]
572 pub struct Bytes<'ch> {
573     chars: &'ch str,
574 }
575 
576 struct BytesProducer<'ch> {
577     chars: &'ch str,
578 }
579 
580 impl<'ch> ParallelIterator for Bytes<'ch> {
581     type Item = u8;
582 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,583     fn drive_unindexed<C>(self, consumer: C) -> C::Result
584     where
585         C: UnindexedConsumer<Self::Item>,
586     {
587         bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
588     }
589 }
590 
591 impl<'ch> UnindexedProducer for BytesProducer<'ch> {
592     type Item = u8;
593 
split(self) -> (Self, Option<Self>)594     fn split(self) -> (Self, Option<Self>) {
595         match split(self.chars) {
596             Some((left, right)) => (
597                 BytesProducer { chars: left },
598                 Some(BytesProducer { chars: right }),
599             ),
600             None => (self, None),
601         }
602     }
603 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,604     fn fold_with<F>(self, folder: F) -> F
605     where
606         F: Folder<Self::Item>,
607     {
608         folder.consume_iter(self.chars.bytes())
609     }
610 }
611 
612 // /////////////////////////////////////////////////////////////////////////
613 
614 /// Parallel iterator over a string encoded as UTF-16
615 #[derive(Debug, Clone)]
616 pub struct EncodeUtf16<'ch> {
617     chars: &'ch str,
618 }
619 
620 struct EncodeUtf16Producer<'ch> {
621     chars: &'ch str,
622 }
623 
624 impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
625     type Item = u16;
626 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,627     fn drive_unindexed<C>(self, consumer: C) -> C::Result
628     where
629         C: UnindexedConsumer<Self::Item>,
630     {
631         bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
632     }
633 }
634 
635 impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
636     type Item = u16;
637 
split(self) -> (Self, Option<Self>)638     fn split(self) -> (Self, Option<Self>) {
639         match split(self.chars) {
640             Some((left, right)) => (
641                 EncodeUtf16Producer { chars: left },
642                 Some(EncodeUtf16Producer { chars: right }),
643             ),
644             None => (self, None),
645         }
646     }
647 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,648     fn fold_with<F>(self, folder: F) -> F
649     where
650         F: Folder<Self::Item>,
651     {
652         folder.consume_iter(self.chars.encode_utf16())
653     }
654 }
655 
656 // /////////////////////////////////////////////////////////////////////////
657 
658 /// Parallel iterator over substrings separated by a pattern
659 #[derive(Debug, Clone)]
660 pub struct Split<'ch, P: Pattern> {
661     chars: &'ch str,
662     separator: P,
663 }
664 
665 impl<'ch, P: Pattern> Split<'ch, P> {
new(chars: &'ch str, separator: P) -> Self666     fn new(chars: &'ch str, separator: P) -> Self {
667         Split { chars, separator }
668     }
669 }
670 
671 impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
672     type Item = &'ch str;
673 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,674     fn drive_unindexed<C>(self, consumer: C) -> C::Result
675     where
676         C: UnindexedConsumer<Self::Item>,
677     {
678         let producer = SplitProducer::new(self.chars, &self.separator);
679         bridge_unindexed(producer, consumer)
680     }
681 }
682 
683 /// Implement support for `SplitProducer`.
684 impl<'ch, P: Pattern> Fissile<P> for &'ch str {
length(&self) -> usize685     fn length(&self) -> usize {
686         self.len()
687     }
688 
midpoint(&self, end: usize) -> usize689     fn midpoint(&self, end: usize) -> usize {
690         // First find a suitable UTF-8 boundary.
691         find_char_midpoint(&self[..end])
692     }
693 
find(&self, separator: &P, start: usize, end: usize) -> Option<usize>694     fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
695         separator.find_in(&self[start..end])
696     }
697 
rfind(&self, separator: &P, end: usize) -> Option<usize>698     fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
699         separator.rfind_in(&self[..end])
700     }
701 
split_once<const INCL: bool>(self, index: usize) -> (Self, Self)702     fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
703         if INCL {
704             // include the separator in the left side
705             let separator = self[index..].chars().next().unwrap();
706             self.split_at(index + separator.len_utf8())
707         } else {
708             let (left, right) = self.split_at(index);
709             let mut right_iter = right.chars();
710             right_iter.next(); // skip the separator
711             (left, right_iter.as_str())
712         }
713     }
714 
fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F where F: Folder<Self>,715     fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
716     where
717         F: Folder<Self>,
718     {
719         if INCL {
720             debug_assert!(!skip_last);
721             separator.fold_inclusive_splits(self, folder)
722         } else {
723             separator.fold_splits(self, folder, skip_last)
724         }
725     }
726 }
727 
728 // /////////////////////////////////////////////////////////////////////////
729 
730 /// Parallel iterator over substrings separated by a pattern
731 #[derive(Debug, Clone)]
732 pub struct SplitInclusive<'ch, P: Pattern> {
733     chars: &'ch str,
734     separator: P,
735 }
736 
737 impl<'ch, P: Pattern> SplitInclusive<'ch, P> {
new(chars: &'ch str, separator: P) -> Self738     fn new(chars: &'ch str, separator: P) -> Self {
739         SplitInclusive { chars, separator }
740     }
741 }
742 
743 impl<'ch, P: Pattern> ParallelIterator for SplitInclusive<'ch, P> {
744     type Item = &'ch str;
745 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,746     fn drive_unindexed<C>(self, consumer: C) -> C::Result
747     where
748         C: UnindexedConsumer<Self::Item>,
749     {
750         let producer = SplitInclusiveProducer::new_incl(self.chars, &self.separator);
751         bridge_unindexed(producer, consumer)
752     }
753 }
754 
755 // /////////////////////////////////////////////////////////////////////////
756 
757 /// Parallel iterator over substrings separated by a terminator pattern
758 #[derive(Debug, Clone)]
759 pub struct SplitTerminator<'ch, P: Pattern> {
760     chars: &'ch str,
761     terminator: P,
762 }
763 
764 struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
765     splitter: SplitProducer<'sep, P, &'ch str>,
766     skip_last: bool,
767 }
768 
769 impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
new(chars: &'ch str, terminator: P) -> Self770     fn new(chars: &'ch str, terminator: P) -> Self {
771         SplitTerminator { chars, terminator }
772     }
773 }
774 
775 impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
new(chars: &'ch str, terminator: &'sep P) -> Self776     fn new(chars: &'ch str, terminator: &'sep P) -> Self {
777         SplitTerminatorProducer {
778             splitter: SplitProducer::new(chars, terminator),
779             skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
780         }
781     }
782 }
783 
784 impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
785     type Item = &'ch str;
786 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,787     fn drive_unindexed<C>(self, consumer: C) -> C::Result
788     where
789         C: UnindexedConsumer<Self::Item>,
790     {
791         let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
792         bridge_unindexed(producer, consumer)
793     }
794 }
795 
796 impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
797     type Item = &'ch str;
798 
split(mut self) -> (Self, Option<Self>)799     fn split(mut self) -> (Self, Option<Self>) {
800         let (left, right) = self.splitter.split();
801         self.splitter = left;
802         let right = right.map(|right| {
803             let skip_last = self.skip_last;
804             self.skip_last = false;
805             SplitTerminatorProducer {
806                 splitter: right,
807                 skip_last,
808             }
809         });
810         (self, right)
811     }
812 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,813     fn fold_with<F>(self, folder: F) -> F
814     where
815         F: Folder<Self::Item>,
816     {
817         self.splitter.fold_with(folder, self.skip_last)
818     }
819 }
820 
821 // /////////////////////////////////////////////////////////////////////////
822 
823 /// Parallel iterator over lines in a string
824 #[derive(Debug, Clone)]
825 pub struct Lines<'ch>(&'ch str);
826 
827 #[inline]
no_carriage_return(line: &str) -> &str828 fn no_carriage_return(line: &str) -> &str {
829     line.strip_suffix('\r').unwrap_or(line)
830 }
831 
832 impl<'ch> ParallelIterator for Lines<'ch> {
833     type Item = &'ch str;
834 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,835     fn drive_unindexed<C>(self, consumer: C) -> C::Result
836     where
837         C: UnindexedConsumer<Self::Item>,
838     {
839         self.0
840             .par_split_terminator('\n')
841             .map(no_carriage_return)
842             .drive_unindexed(consumer)
843     }
844 }
845 
846 // /////////////////////////////////////////////////////////////////////////
847 
848 /// Parallel iterator over substrings separated by whitespace
849 #[derive(Debug, Clone)]
850 pub struct SplitWhitespace<'ch>(&'ch str);
851 
852 #[inline]
not_empty(s: &&str) -> bool853 fn not_empty(s: &&str) -> bool {
854     !s.is_empty()
855 }
856 
857 impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
858     type Item = &'ch str;
859 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,860     fn drive_unindexed<C>(self, consumer: C) -> C::Result
861     where
862         C: UnindexedConsumer<Self::Item>,
863     {
864         self.0
865             .par_split(char::is_whitespace)
866             .filter(not_empty)
867             .drive_unindexed(consumer)
868     }
869 }
870 
871 // /////////////////////////////////////////////////////////////////////////
872 
873 /// Parallel iterator over substrings separated by ASCII whitespace
874 #[derive(Debug, Clone)]
875 pub struct SplitAsciiWhitespace<'ch>(&'ch str);
876 
877 #[inline]
is_ascii_whitespace(c: char) -> bool878 fn is_ascii_whitespace(c: char) -> bool {
879     c.is_ascii_whitespace()
880 }
881 
882 impl<'ch> ParallelIterator for SplitAsciiWhitespace<'ch> {
883     type Item = &'ch str;
884 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,885     fn drive_unindexed<C>(self, consumer: C) -> C::Result
886     where
887         C: UnindexedConsumer<Self::Item>,
888     {
889         self.0
890             .par_split(is_ascii_whitespace)
891             .filter(not_empty)
892             .drive_unindexed(consumer)
893     }
894 }
895 
896 // /////////////////////////////////////////////////////////////////////////
897 
898 /// Parallel iterator over substrings that match a pattern
899 #[derive(Debug, Clone)]
900 pub struct Matches<'ch, P: Pattern> {
901     chars: &'ch str,
902     pattern: P,
903 }
904 
905 struct MatchesProducer<'ch, 'pat, P: Pattern> {
906     chars: &'ch str,
907     pattern: &'pat P,
908 }
909 
910 impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
911     type Item = &'ch str;
912 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,913     fn drive_unindexed<C>(self, consumer: C) -> C::Result
914     where
915         C: UnindexedConsumer<Self::Item>,
916     {
917         let producer = MatchesProducer {
918             chars: self.chars,
919             pattern: &self.pattern,
920         };
921         bridge_unindexed(producer, consumer)
922     }
923 }
924 
925 impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
926     type Item = &'ch str;
927 
split(self) -> (Self, Option<Self>)928     fn split(self) -> (Self, Option<Self>) {
929         match split(self.chars) {
930             Some((left, right)) => (
931                 MatchesProducer {
932                     chars: left,
933                     ..self
934                 },
935                 Some(MatchesProducer {
936                     chars: right,
937                     ..self
938                 }),
939             ),
940             None => (self, None),
941         }
942     }
943 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,944     fn fold_with<F>(self, folder: F) -> F
945     where
946         F: Folder<Self::Item>,
947     {
948         self.pattern.fold_matches(self.chars, folder)
949     }
950 }
951 
952 // /////////////////////////////////////////////////////////////////////////
953 
954 /// Parallel iterator over substrings that match a pattern, with their positions
955 #[derive(Debug, Clone)]
956 pub struct MatchIndices<'ch, P: Pattern> {
957     chars: &'ch str,
958     pattern: P,
959 }
960 
961 struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
962     index: usize,
963     chars: &'ch str,
964     pattern: &'pat P,
965 }
966 
967 impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
968     type Item = (usize, &'ch str);
969 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,970     fn drive_unindexed<C>(self, consumer: C) -> C::Result
971     where
972         C: UnindexedConsumer<Self::Item>,
973     {
974         let producer = MatchIndicesProducer {
975             index: 0,
976             chars: self.chars,
977             pattern: &self.pattern,
978         };
979         bridge_unindexed(producer, consumer)
980     }
981 }
982 
983 impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
984     type Item = (usize, &'ch str);
985 
split(self) -> (Self, Option<Self>)986     fn split(self) -> (Self, Option<Self>) {
987         match split(self.chars) {
988             Some((left, right)) => (
989                 MatchIndicesProducer {
990                     chars: left,
991                     ..self
992                 },
993                 Some(MatchIndicesProducer {
994                     chars: right,
995                     index: self.index + left.len(),
996                     ..self
997                 }),
998             ),
999             None => (self, None),
1000         }
1001     }
1002 
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,1003     fn fold_with<F>(self, folder: F) -> F
1004     where
1005         F: Folder<Self::Item>,
1006     {
1007         self.pattern
1008             .fold_match_indices(self.chars, folder, self.index)
1009     }
1010 }
1011