1 //! Parallel iterator types for [strings][std::str]
2 //!
3 //! You will rarely need to interact with this module directly unless you need
4 //! to name one of the iterator types.
5 //!
6 //! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
7 //! reference a `Pattern` trait which is not visible outside this crate.
8 //! This trait is intentionally kept private, for use only by Rayon itself.
9 //! It is implemented for `char`, `&[char]`, and any function or closure
10 //! `F: Fn(char) -> bool + Sync + Send`.
11 //!
12 //! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
13 //! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
14 //!
15 //! [std::str]: https://doc.rust-lang.org/stable/std/str/
16
17 use crate::iter::plumbing::*;
18 use crate::iter::*;
19 use crate::split_producer::*;
20
21 /// Test if a byte is the start of a UTF-8 character.
22 /// (extracted from `str::is_char_boundary`)
23 #[inline]
is_char_boundary(b: u8) -> bool24 fn is_char_boundary(b: u8) -> bool {
25 // This is bit magic equivalent to: b < 128 || b >= 192
26 (b as i8) >= -0x40
27 }
28
29 /// Find the index of a character boundary near the midpoint.
30 #[inline]
find_char_midpoint(chars: &str) -> usize31 fn find_char_midpoint(chars: &str) -> usize {
32 let mid = chars.len() / 2;
33
34 // We want to split near the midpoint, but we need to find an actual
35 // character boundary. So we look at the raw bytes, first scanning
36 // forward from the midpoint for a boundary, then trying backward.
37 let (left, right) = chars.as_bytes().split_at(mid);
38 match right.iter().copied().position(is_char_boundary) {
39 Some(i) => mid + i,
40 None => left
41 .iter()
42 .copied()
43 .rposition(is_char_boundary)
44 .unwrap_or(0),
45 }
46 }
47
48 /// Try to split a string near the midpoint.
49 #[inline]
split(chars: &str) -> Option<(&str, &str)>50 fn split(chars: &str) -> Option<(&str, &str)> {
51 let index = find_char_midpoint(chars);
52 if index > 0 {
53 Some(chars.split_at(index))
54 } else {
55 None
56 }
57 }
58
59 /// Parallel extensions for strings.
60 pub trait ParallelString {
61 /// Returns a plain string slice, which is used to implement the rest of
62 /// the parallel methods.
as_parallel_string(&self) -> &str63 fn as_parallel_string(&self) -> &str;
64
65 /// Returns a parallel iterator over the characters of a string.
66 ///
67 /// # Examples
68 ///
69 /// ```
70 /// use rayon::prelude::*;
71 /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
72 /// assert_eq!(Some('o'), max);
73 /// ```
par_chars(&self) -> Chars<'_>74 fn par_chars(&self) -> Chars<'_> {
75 Chars {
76 chars: self.as_parallel_string(),
77 }
78 }
79
80 /// Returns a parallel iterator over the characters of a string, with their positions.
81 ///
82 /// # Examples
83 ///
84 /// ```
85 /// use rayon::prelude::*;
86 /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
87 /// assert_eq!(Some((1, 'e')), min);
88 /// ```
par_char_indices(&self) -> CharIndices<'_>89 fn par_char_indices(&self) -> CharIndices<'_> {
90 CharIndices {
91 chars: self.as_parallel_string(),
92 }
93 }
94
95 /// Returns a parallel iterator over the bytes of a string.
96 ///
97 /// Note that multi-byte sequences (for code points greater than `U+007F`)
98 /// are produced as separate items, but will not be split across threads.
99 /// If you would prefer an indexed iterator without that guarantee, consider
100 /// `string.as_bytes().par_iter().copied()` instead.
101 ///
102 /// # Examples
103 ///
104 /// ```
105 /// use rayon::prelude::*;
106 /// let max = "hello".par_bytes().max();
107 /// assert_eq!(Some(b'o'), max);
108 /// ```
par_bytes(&self) -> Bytes<'_>109 fn par_bytes(&self) -> Bytes<'_> {
110 Bytes {
111 chars: self.as_parallel_string(),
112 }
113 }
114
115 /// Returns a parallel iterator over a string encoded as UTF-16.
116 ///
117 /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
118 /// produced as separate items, but will not be split across threads.
119 ///
120 /// # Examples
121 ///
122 /// ```
123 /// use rayon::prelude::*;
124 ///
125 /// let max = "hello".par_encode_utf16().max();
126 /// assert_eq!(Some(b'o' as u16), max);
127 ///
128 /// let text = "Zażółć gęślą jaźń";
129 /// let utf8_len = text.len();
130 /// let utf16_len = text.par_encode_utf16().count();
131 /// assert!(utf16_len <= utf8_len);
132 /// ```
par_encode_utf16(&self) -> EncodeUtf16<'_>133 fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
134 EncodeUtf16 {
135 chars: self.as_parallel_string(),
136 }
137 }
138
139 /// Returns a parallel iterator over substrings separated by a
140 /// given character or predicate, similar to `str::split`.
141 ///
142 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
143 /// It is implemented for `char`, `&[char]`, and any function or closure
144 /// `F: Fn(char) -> bool + Sync + Send`.
145 ///
146 /// # Examples
147 ///
148 /// ```
149 /// use rayon::prelude::*;
150 /// let total = "1, 2, buckle, 3, 4, door"
151 /// .par_split(',')
152 /// .filter_map(|s| s.trim().parse::<i32>().ok())
153 /// .sum();
154 /// assert_eq!(10, total);
155 /// ```
par_split<P: Pattern>(&self, separator: P) -> Split<'_, P>156 fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
157 Split::new(self.as_parallel_string(), separator)
158 }
159
160 /// Returns a parallel iterator over substrings terminated by a
161 /// given character or predicate, similar to `str::split_terminator`.
162 /// It's equivalent to `par_split`, except it doesn't produce an empty
163 /// substring after a trailing terminator.
164 ///
165 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
166 /// It is implemented for `char`, `&[char]`, and any function or closure
167 /// `F: Fn(char) -> bool + Sync + Send`.
168 ///
169 /// # Examples
170 ///
171 /// ```
172 /// use rayon::prelude::*;
173 /// let parts: Vec<_> = "((1 + 3) * 2)"
174 /// .par_split_terminator(|c| c == '(' || c == ')')
175 /// .collect();
176 /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
177 /// ```
par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P>178 fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
179 SplitTerminator::new(self.as_parallel_string(), terminator)
180 }
181
182 /// Returns a parallel iterator over the lines of a string, ending with an
183 /// optional carriage return and with a newline (`\r\n` or just `\n`).
184 /// The final line ending is optional, and line endings are not included in
185 /// the output strings.
186 ///
187 /// # Examples
188 ///
189 /// ```
190 /// use rayon::prelude::*;
191 /// let lengths: Vec<_> = "hello world\nfizbuzz"
192 /// .par_lines()
193 /// .map(|l| l.len())
194 /// .collect();
195 /// assert_eq!(vec![11, 7], lengths);
196 /// ```
par_lines(&self) -> Lines<'_>197 fn par_lines(&self) -> Lines<'_> {
198 Lines(self.as_parallel_string())
199 }
200
201 /// Returns a parallel iterator over the sub-slices of a string that are
202 /// separated by any amount of whitespace.
203 ///
204 /// As with `str::split_whitespace`, 'whitespace' is defined according to
205 /// the terms of the Unicode Derived Core Property `White_Space`.
206 ///
207 /// # Examples
208 ///
209 /// ```
210 /// use rayon::prelude::*;
211 /// let longest = "which is the longest word?"
212 /// .par_split_whitespace()
213 /// .max_by_key(|word| word.len());
214 /// assert_eq!(Some("longest"), longest);
215 /// ```
par_split_whitespace(&self) -> SplitWhitespace<'_>216 fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
217 SplitWhitespace(self.as_parallel_string())
218 }
219
220 /// Returns a parallel iterator over substrings that match a
221 /// given character or predicate, similar to `str::matches`.
222 ///
223 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
224 /// It is implemented for `char`, `&[char]`, and any function or closure
225 /// `F: Fn(char) -> bool + Sync + Send`.
226 ///
227 /// # Examples
228 ///
229 /// ```
230 /// use rayon::prelude::*;
231 /// let total = "1, 2, buckle, 3, 4, door"
232 /// .par_matches(char::is_numeric)
233 /// .map(|s| s.parse::<i32>().expect("digit"))
234 /// .sum();
235 /// assert_eq!(10, total);
236 /// ```
par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P>237 fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
238 Matches {
239 chars: self.as_parallel_string(),
240 pattern,
241 }
242 }
243
244 /// Returns a parallel iterator over substrings that match a given character
245 /// or predicate, with their positions, similar to `str::match_indices`.
246 ///
247 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
248 /// It is implemented for `char`, `&[char]`, and any function or closure
249 /// `F: Fn(char) -> bool + Sync + Send`.
250 ///
251 /// # Examples
252 ///
253 /// ```
254 /// use rayon::prelude::*;
255 /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
256 /// .par_match_indices(char::is_numeric)
257 /// .collect();
258 /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
259 /// ```
par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P>260 fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
261 MatchIndices {
262 chars: self.as_parallel_string(),
263 pattern,
264 }
265 }
266 }
267
268 impl ParallelString for str {
269 #[inline]
as_parallel_string(&self) -> &str270 fn as_parallel_string(&self) -> &str {
271 self
272 }
273 }
274
275 // /////////////////////////////////////////////////////////////////////////
276
277 /// We hide the `Pattern` trait in a private module, as its API is not meant
278 /// for general consumption. If we could have privacy on trait items, then it
279 /// would be nicer to have its basic existence and implementors public while
280 /// keeping all of the methods private.
281 mod private {
282 use crate::iter::plumbing::Folder;
283
284 /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
285 /// `std::str::pattern::{Pattern, Searcher}`.
286 ///
287 /// Implementing this trait is not permitted outside of `rayon`.
288 pub trait Pattern: Sized + Sync + Send {
289 private_decl! {}
find_in(&self, haystack: &str) -> Option<usize>290 fn find_in(&self, haystack: &str) -> Option<usize>;
rfind_in(&self, haystack: &str) -> Option<usize>291 fn rfind_in(&self, haystack: &str) -> Option<usize>;
is_suffix_of(&self, haystack: &str) -> bool292 fn is_suffix_of(&self, haystack: &str) -> bool;
fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F where F: Folder<&'ch str>293 fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
294 where
295 F: Folder<&'ch str>;
fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F where F: Folder<&'ch str>296 fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
297 where
298 F: Folder<&'ch str>;
fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F where F: Folder<(usize, &'ch str)>299 fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
300 where
301 F: Folder<(usize, &'ch str)>;
302 }
303 }
304 use self::private::Pattern;
305
306 #[inline]
offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T)307 fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
308 move |(i, x)| (base + i, x)
309 }
310
311 macro_rules! impl_pattern {
312 (&$self:ident => $pattern:expr) => {
313 private_impl! {}
314
315 #[inline]
316 fn find_in(&$self, chars: &str) -> Option<usize> {
317 chars.find($pattern)
318 }
319
320 #[inline]
321 fn rfind_in(&$self, chars: &str) -> Option<usize> {
322 chars.rfind($pattern)
323 }
324
325 #[inline]
326 fn is_suffix_of(&$self, chars: &str) -> bool {
327 chars.ends_with($pattern)
328 }
329
330 fn fold_splits<'ch, F>(&$self, chars: &'ch str, folder: F, skip_last: bool) -> F
331 where
332 F: Folder<&'ch str>,
333 {
334 let mut split = chars.split($pattern);
335 if skip_last {
336 split.next_back();
337 }
338 folder.consume_iter(split)
339 }
340
341 fn fold_matches<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
342 where
343 F: Folder<&'ch str>,
344 {
345 folder.consume_iter(chars.matches($pattern))
346 }
347
348 fn fold_match_indices<'ch, F>(&$self, chars: &'ch str, folder: F, base: usize) -> F
349 where
350 F: Folder<(usize, &'ch str)>,
351 {
352 folder.consume_iter(chars.match_indices($pattern).map(offset(base)))
353 }
354 }
355 }
356
357 impl Pattern for char {
358 impl_pattern!(&self => *self);
359 }
360
361 impl Pattern for &[char] {
362 impl_pattern!(&self => *self);
363 }
364
365 impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
366 impl_pattern!(&self => self);
367 }
368
369 // /////////////////////////////////////////////////////////////////////////
370
371 /// Parallel iterator over the characters of a string
372 #[derive(Debug, Clone)]
373 pub struct Chars<'ch> {
374 chars: &'ch str,
375 }
376
377 struct CharsProducer<'ch> {
378 chars: &'ch str,
379 }
380
381 impl<'ch> ParallelIterator for Chars<'ch> {
382 type Item = char;
383
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,384 fn drive_unindexed<C>(self, consumer: C) -> C::Result
385 where
386 C: UnindexedConsumer<Self::Item>,
387 {
388 bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
389 }
390 }
391
392 impl<'ch> UnindexedProducer for CharsProducer<'ch> {
393 type Item = char;
394
split(self) -> (Self, Option<Self>)395 fn split(self) -> (Self, Option<Self>) {
396 match split(self.chars) {
397 Some((left, right)) => (
398 CharsProducer { chars: left },
399 Some(CharsProducer { chars: right }),
400 ),
401 None => (self, None),
402 }
403 }
404
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,405 fn fold_with<F>(self, folder: F) -> F
406 where
407 F: Folder<Self::Item>,
408 {
409 folder.consume_iter(self.chars.chars())
410 }
411 }
412
413 // /////////////////////////////////////////////////////////////////////////
414
415 /// Parallel iterator over the characters of a string, with their positions
416 #[derive(Debug, Clone)]
417 pub struct CharIndices<'ch> {
418 chars: &'ch str,
419 }
420
421 struct CharIndicesProducer<'ch> {
422 index: usize,
423 chars: &'ch str,
424 }
425
426 impl<'ch> ParallelIterator for CharIndices<'ch> {
427 type Item = (usize, char);
428
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,429 fn drive_unindexed<C>(self, consumer: C) -> C::Result
430 where
431 C: UnindexedConsumer<Self::Item>,
432 {
433 let producer = CharIndicesProducer {
434 index: 0,
435 chars: self.chars,
436 };
437 bridge_unindexed(producer, consumer)
438 }
439 }
440
441 impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
442 type Item = (usize, char);
443
split(self) -> (Self, Option<Self>)444 fn split(self) -> (Self, Option<Self>) {
445 match split(self.chars) {
446 Some((left, right)) => (
447 CharIndicesProducer {
448 chars: left,
449 ..self
450 },
451 Some(CharIndicesProducer {
452 chars: right,
453 index: self.index + left.len(),
454 }),
455 ),
456 None => (self, None),
457 }
458 }
459
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,460 fn fold_with<F>(self, folder: F) -> F
461 where
462 F: Folder<Self::Item>,
463 {
464 let base = self.index;
465 folder.consume_iter(self.chars.char_indices().map(offset(base)))
466 }
467 }
468
469 // /////////////////////////////////////////////////////////////////////////
470
471 /// Parallel iterator over the bytes of a string
472 #[derive(Debug, Clone)]
473 pub struct Bytes<'ch> {
474 chars: &'ch str,
475 }
476
477 struct BytesProducer<'ch> {
478 chars: &'ch str,
479 }
480
481 impl<'ch> ParallelIterator for Bytes<'ch> {
482 type Item = u8;
483
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,484 fn drive_unindexed<C>(self, consumer: C) -> C::Result
485 where
486 C: UnindexedConsumer<Self::Item>,
487 {
488 bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
489 }
490 }
491
492 impl<'ch> UnindexedProducer for BytesProducer<'ch> {
493 type Item = u8;
494
split(self) -> (Self, Option<Self>)495 fn split(self) -> (Self, Option<Self>) {
496 match split(self.chars) {
497 Some((left, right)) => (
498 BytesProducer { chars: left },
499 Some(BytesProducer { chars: right }),
500 ),
501 None => (self, None),
502 }
503 }
504
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,505 fn fold_with<F>(self, folder: F) -> F
506 where
507 F: Folder<Self::Item>,
508 {
509 folder.consume_iter(self.chars.bytes())
510 }
511 }
512
513 // /////////////////////////////////////////////////////////////////////////
514
515 /// Parallel iterator over a string encoded as UTF-16
516 #[derive(Debug, Clone)]
517 pub struct EncodeUtf16<'ch> {
518 chars: &'ch str,
519 }
520
521 struct EncodeUtf16Producer<'ch> {
522 chars: &'ch str,
523 }
524
525 impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
526 type Item = u16;
527
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,528 fn drive_unindexed<C>(self, consumer: C) -> C::Result
529 where
530 C: UnindexedConsumer<Self::Item>,
531 {
532 bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
533 }
534 }
535
536 impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
537 type Item = u16;
538
split(self) -> (Self, Option<Self>)539 fn split(self) -> (Self, Option<Self>) {
540 match split(self.chars) {
541 Some((left, right)) => (
542 EncodeUtf16Producer { chars: left },
543 Some(EncodeUtf16Producer { chars: right }),
544 ),
545 None => (self, None),
546 }
547 }
548
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,549 fn fold_with<F>(self, folder: F) -> F
550 where
551 F: Folder<Self::Item>,
552 {
553 folder.consume_iter(self.chars.encode_utf16())
554 }
555 }
556
557 // /////////////////////////////////////////////////////////////////////////
558
559 /// Parallel iterator over substrings separated by a pattern
560 #[derive(Debug, Clone)]
561 pub struct Split<'ch, P: Pattern> {
562 chars: &'ch str,
563 separator: P,
564 }
565
566 impl<'ch, P: Pattern> Split<'ch, P> {
new(chars: &'ch str, separator: P) -> Self567 fn new(chars: &'ch str, separator: P) -> Self {
568 Split { chars, separator }
569 }
570 }
571
572 impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
573 type Item = &'ch str;
574
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,575 fn drive_unindexed<C>(self, consumer: C) -> C::Result
576 where
577 C: UnindexedConsumer<Self::Item>,
578 {
579 let producer = SplitProducer::new(self.chars, &self.separator);
580 bridge_unindexed(producer, consumer)
581 }
582 }
583
584 /// Implement support for `SplitProducer`.
585 impl<'ch, P: Pattern> Fissile<P> for &'ch str {
length(&self) -> usize586 fn length(&self) -> usize {
587 self.len()
588 }
589
midpoint(&self, end: usize) -> usize590 fn midpoint(&self, end: usize) -> usize {
591 // First find a suitable UTF-8 boundary.
592 find_char_midpoint(&self[..end])
593 }
594
find(&self, separator: &P, start: usize, end: usize) -> Option<usize>595 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
596 separator.find_in(&self[start..end])
597 }
598
rfind(&self, separator: &P, end: usize) -> Option<usize>599 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
600 separator.rfind_in(&self[..end])
601 }
602
split_once(self, index: usize) -> (Self, Self)603 fn split_once(self, index: usize) -> (Self, Self) {
604 let (left, right) = self.split_at(index);
605 let mut right_iter = right.chars();
606 right_iter.next(); // skip the separator
607 (left, right_iter.as_str())
608 }
609
fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F where F: Folder<Self>,610 fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
611 where
612 F: Folder<Self>,
613 {
614 separator.fold_splits(self, folder, skip_last)
615 }
616 }
617
618 // /////////////////////////////////////////////////////////////////////////
619
620 /// Parallel iterator over substrings separated by a terminator pattern
621 #[derive(Debug, Clone)]
622 pub struct SplitTerminator<'ch, P: Pattern> {
623 chars: &'ch str,
624 terminator: P,
625 }
626
627 struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
628 splitter: SplitProducer<'sep, P, &'ch str>,
629 skip_last: bool,
630 }
631
632 impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
new(chars: &'ch str, terminator: P) -> Self633 fn new(chars: &'ch str, terminator: P) -> Self {
634 SplitTerminator { chars, terminator }
635 }
636 }
637
638 impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
new(chars: &'ch str, terminator: &'sep P) -> Self639 fn new(chars: &'ch str, terminator: &'sep P) -> Self {
640 SplitTerminatorProducer {
641 splitter: SplitProducer::new(chars, terminator),
642 skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
643 }
644 }
645 }
646
647 impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
648 type Item = &'ch str;
649
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,650 fn drive_unindexed<C>(self, consumer: C) -> C::Result
651 where
652 C: UnindexedConsumer<Self::Item>,
653 {
654 let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
655 bridge_unindexed(producer, consumer)
656 }
657 }
658
659 impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
660 type Item = &'ch str;
661
split(mut self) -> (Self, Option<Self>)662 fn split(mut self) -> (Self, Option<Self>) {
663 let (left, right) = self.splitter.split();
664 self.splitter = left;
665 let right = right.map(|right| {
666 let skip_last = self.skip_last;
667 self.skip_last = false;
668 SplitTerminatorProducer {
669 splitter: right,
670 skip_last,
671 }
672 });
673 (self, right)
674 }
675
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,676 fn fold_with<F>(self, folder: F) -> F
677 where
678 F: Folder<Self::Item>,
679 {
680 self.splitter.fold_with(folder, self.skip_last)
681 }
682 }
683
684 // /////////////////////////////////////////////////////////////////////////
685
686 /// Parallel iterator over lines in a string
687 #[derive(Debug, Clone)]
688 pub struct Lines<'ch>(&'ch str);
689
690 #[inline]
no_carriage_return(line: &str) -> &str691 fn no_carriage_return(line: &str) -> &str {
692 line.strip_suffix('\r').unwrap_or(line)
693 }
694
695 impl<'ch> ParallelIterator for Lines<'ch> {
696 type Item = &'ch str;
697
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,698 fn drive_unindexed<C>(self, consumer: C) -> C::Result
699 where
700 C: UnindexedConsumer<Self::Item>,
701 {
702 self.0
703 .par_split_terminator('\n')
704 .map(no_carriage_return)
705 .drive_unindexed(consumer)
706 }
707 }
708
709 // /////////////////////////////////////////////////////////////////////////
710
711 /// Parallel iterator over substrings separated by whitespace
712 #[derive(Debug, Clone)]
713 pub struct SplitWhitespace<'ch>(&'ch str);
714
715 #[inline]
not_empty(s: &&str) -> bool716 fn not_empty(s: &&str) -> bool {
717 !s.is_empty()
718 }
719
720 impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
721 type Item = &'ch str;
722
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,723 fn drive_unindexed<C>(self, consumer: C) -> C::Result
724 where
725 C: UnindexedConsumer<Self::Item>,
726 {
727 self.0
728 .par_split(char::is_whitespace)
729 .filter(not_empty)
730 .drive_unindexed(consumer)
731 }
732 }
733
734 // /////////////////////////////////////////////////////////////////////////
735
736 /// Parallel iterator over substrings that match a pattern
737 #[derive(Debug, Clone)]
738 pub struct Matches<'ch, P: Pattern> {
739 chars: &'ch str,
740 pattern: P,
741 }
742
743 struct MatchesProducer<'ch, 'pat, P: Pattern> {
744 chars: &'ch str,
745 pattern: &'pat P,
746 }
747
748 impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
749 type Item = &'ch str;
750
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,751 fn drive_unindexed<C>(self, consumer: C) -> C::Result
752 where
753 C: UnindexedConsumer<Self::Item>,
754 {
755 let producer = MatchesProducer {
756 chars: self.chars,
757 pattern: &self.pattern,
758 };
759 bridge_unindexed(producer, consumer)
760 }
761 }
762
763 impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
764 type Item = &'ch str;
765
split(self) -> (Self, Option<Self>)766 fn split(self) -> (Self, Option<Self>) {
767 match split(self.chars) {
768 Some((left, right)) => (
769 MatchesProducer {
770 chars: left,
771 ..self
772 },
773 Some(MatchesProducer {
774 chars: right,
775 ..self
776 }),
777 ),
778 None => (self, None),
779 }
780 }
781
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,782 fn fold_with<F>(self, folder: F) -> F
783 where
784 F: Folder<Self::Item>,
785 {
786 self.pattern.fold_matches(self.chars, folder)
787 }
788 }
789
790 // /////////////////////////////////////////////////////////////////////////
791
792 /// Parallel iterator over substrings that match a pattern, with their positions
793 #[derive(Debug, Clone)]
794 pub struct MatchIndices<'ch, P: Pattern> {
795 chars: &'ch str,
796 pattern: P,
797 }
798
799 struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
800 index: usize,
801 chars: &'ch str,
802 pattern: &'pat P,
803 }
804
805 impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
806 type Item = (usize, &'ch str);
807
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,808 fn drive_unindexed<C>(self, consumer: C) -> C::Result
809 where
810 C: UnindexedConsumer<Self::Item>,
811 {
812 let producer = MatchIndicesProducer {
813 index: 0,
814 chars: self.chars,
815 pattern: &self.pattern,
816 };
817 bridge_unindexed(producer, consumer)
818 }
819 }
820
821 impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
822 type Item = (usize, &'ch str);
823
split(self) -> (Self, Option<Self>)824 fn split(self) -> (Self, Option<Self>) {
825 match split(self.chars) {
826 Some((left, right)) => (
827 MatchIndicesProducer {
828 chars: left,
829 ..self
830 },
831 Some(MatchIndicesProducer {
832 chars: right,
833 index: self.index + left.len(),
834 ..self
835 }),
836 ),
837 None => (self, None),
838 }
839 }
840
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,841 fn fold_with<F>(self, folder: F) -> F
842 where
843 F: Folder<Self::Item>,
844 {
845 self.pattern
846 .fold_match_indices(self.chars, folder, self.index)
847 }
848 }
849