1 //! Parallel iterator types for [strings][std::str]
2 //!
3 //! You will rarely need to interact with this module directly unless you need
4 //! to name one of the iterator types.
5 //!
6 //! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
7 //! reference a `Pattern` trait which is not visible outside this crate.
8 //! This trait is intentionally kept private, for use only by Rayon itself.
9 //! It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
10 //!
11 //! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
12 //! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
13 //!
14 //! [std::str]: https://doc.rust-lang.org/stable/std/str/
15
16 use crate::iter::plumbing::*;
17 use crate::iter::*;
18 use crate::split_producer::*;
19
20 /// Test if a byte is the start of a UTF-8 character.
21 /// (extracted from `str::is_char_boundary`)
22 #[inline]
is_char_boundary(b: u8) -> bool23 fn is_char_boundary(b: u8) -> bool {
24 // This is bit magic equivalent to: b < 128 || b >= 192
25 (b as i8) >= -0x40
26 }
27
28 /// Find the index of a character boundary near the midpoint.
29 #[inline]
find_char_midpoint(chars: &str) -> usize30 fn find_char_midpoint(chars: &str) -> usize {
31 let mid = chars.len() / 2;
32
33 // We want to split near the midpoint, but we need to find an actual
34 // character boundary. So we look at the raw bytes, first scanning
35 // forward from the midpoint for a boundary, then trying backward.
36 let (left, right) = chars.as_bytes().split_at(mid);
37 match right.iter().cloned().position(is_char_boundary) {
38 Some(i) => mid + i,
39 None => left
40 .iter()
41 .cloned()
42 .rposition(is_char_boundary)
43 .unwrap_or(0),
44 }
45 }
46
47 /// Try to split a string near the midpoint.
48 #[inline]
split(chars: &str) -> Option<(&str, &str)>49 fn split(chars: &str) -> Option<(&str, &str)> {
50 let index = find_char_midpoint(chars);
51 if index > 0 {
52 Some(chars.split_at(index))
53 } else {
54 None
55 }
56 }
57
58 /// Parallel extensions for strings.
59 pub trait ParallelString {
60 /// Returns a plain string slice, which is used to implement the rest of
61 /// the parallel methods.
as_parallel_string(&self) -> &str62 fn as_parallel_string(&self) -> &str;
63
64 /// Returns a parallel iterator over the characters of a string.
65 ///
66 /// # Examples
67 ///
68 /// ```
69 /// use rayon::prelude::*;
70 /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
71 /// assert_eq!(Some('o'), max);
72 /// ```
par_chars(&self) -> Chars<'_>73 fn par_chars(&self) -> Chars<'_> {
74 Chars {
75 chars: self.as_parallel_string(),
76 }
77 }
78
79 /// Returns a parallel iterator over the characters of a string, with their positions.
80 ///
81 /// # Examples
82 ///
83 /// ```
84 /// use rayon::prelude::*;
85 /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
86 /// assert_eq!(Some((1, 'e')), min);
87 /// ```
par_char_indices(&self) -> CharIndices<'_>88 fn par_char_indices(&self) -> CharIndices<'_> {
89 CharIndices {
90 chars: self.as_parallel_string(),
91 }
92 }
93
94 /// Returns a parallel iterator over the bytes of a string.
95 ///
96 /// Note that multi-byte sequences (for code points greater than `U+007F`)
97 /// are produced as separate items, but will not be split across threads.
98 /// If you would prefer an indexed iterator without that guarantee, consider
99 /// `string.as_bytes().par_iter().cloned()` instead.
100 ///
101 /// # Examples
102 ///
103 /// ```
104 /// use rayon::prelude::*;
105 /// let max = "hello".par_bytes().max();
106 /// assert_eq!(Some(b'o'), max);
107 /// ```
par_bytes(&self) -> Bytes<'_>108 fn par_bytes(&self) -> Bytes<'_> {
109 Bytes {
110 chars: self.as_parallel_string(),
111 }
112 }
113
114 /// Returns a parallel iterator over a string encoded as UTF-16.
115 ///
116 /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
117 /// produced as separate items, but will not be split across threads.
118 ///
119 /// # Examples
120 ///
121 /// ```
122 /// use rayon::prelude::*;
123 ///
124 /// let max = "hello".par_encode_utf16().max();
125 /// assert_eq!(Some(b'o' as u16), max);
126 ///
127 /// let text = "Zażółć gęślą jaźń";
128 /// let utf8_len = text.len();
129 /// let utf16_len = text.par_encode_utf16().count();
130 /// assert!(utf16_len <= utf8_len);
131 /// ```
par_encode_utf16(&self) -> EncodeUtf16<'_>132 fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
133 EncodeUtf16 {
134 chars: self.as_parallel_string(),
135 }
136 }
137
138 /// Returns a parallel iterator over substrings separated by a
139 /// given character or predicate, similar to `str::split`.
140 ///
141 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
142 /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
143 ///
144 /// # Examples
145 ///
146 /// ```
147 /// use rayon::prelude::*;
148 /// let total = "1, 2, buckle, 3, 4, door"
149 /// .par_split(',')
150 /// .filter_map(|s| s.trim().parse::<i32>().ok())
151 /// .sum();
152 /// assert_eq!(10, total);
153 /// ```
par_split<P: Pattern>(&self, separator: P) -> Split<'_, P>154 fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
155 Split::new(self.as_parallel_string(), separator)
156 }
157
158 /// Returns a parallel iterator over substrings terminated by a
159 /// given character or predicate, similar to `str::split_terminator`.
160 /// It's equivalent to `par_split`, except it doesn't produce an empty
161 /// substring after a trailing terminator.
162 ///
163 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
164 /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
165 ///
166 /// # Examples
167 ///
168 /// ```
169 /// use rayon::prelude::*;
170 /// let parts: Vec<_> = "((1 + 3) * 2)"
171 /// .par_split_terminator(|c| c == '(' || c == ')')
172 /// .collect();
173 /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
174 /// ```
par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P>175 fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
176 SplitTerminator::new(self.as_parallel_string(), terminator)
177 }
178
179 /// Returns a parallel iterator over the lines of a string, ending with an
180 /// optional carriage return and with a newline (`\r\n` or just `\n`).
181 /// The final line ending is optional, and line endings are not included in
182 /// the output strings.
183 ///
184 /// # Examples
185 ///
186 /// ```
187 /// use rayon::prelude::*;
188 /// let lengths: Vec<_> = "hello world\nfizbuzz"
189 /// .par_lines()
190 /// .map(|l| l.len())
191 /// .collect();
192 /// assert_eq!(vec![11, 7], lengths);
193 /// ```
par_lines(&self) -> Lines<'_>194 fn par_lines(&self) -> Lines<'_> {
195 Lines(self.as_parallel_string())
196 }
197
198 /// Returns a parallel iterator over the sub-slices of a string that are
199 /// separated by any amount of whitespace.
200 ///
201 /// As with `str::split_whitespace`, 'whitespace' is defined according to
202 /// the terms of the Unicode Derived Core Property `White_Space`.
203 ///
204 /// # Examples
205 ///
206 /// ```
207 /// use rayon::prelude::*;
208 /// let longest = "which is the longest word?"
209 /// .par_split_whitespace()
210 /// .max_by_key(|word| word.len());
211 /// assert_eq!(Some("longest"), longest);
212 /// ```
par_split_whitespace(&self) -> SplitWhitespace<'_>213 fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
214 SplitWhitespace(self.as_parallel_string())
215 }
216
217 /// Returns a parallel iterator over substrings that match a
218 /// given character or predicate, similar to `str::matches`.
219 ///
220 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
221 /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
222 ///
223 /// # Examples
224 ///
225 /// ```
226 /// use rayon::prelude::*;
227 /// let total = "1, 2, buckle, 3, 4, door"
228 /// .par_matches(char::is_numeric)
229 /// .map(|s| s.parse::<i32>().expect("digit"))
230 /// .sum();
231 /// assert_eq!(10, total);
232 /// ```
par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P>233 fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
234 Matches {
235 chars: self.as_parallel_string(),
236 pattern,
237 }
238 }
239
240 /// Returns a parallel iterator over substrings that match a given character
241 /// or predicate, with their positions, similar to `str::match_indices`.
242 ///
243 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
244 /// It is implemented for `char` and any `F: Fn(char) -> bool + Sync + Send`.
245 ///
246 /// # Examples
247 ///
248 /// ```
249 /// use rayon::prelude::*;
250 /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
251 /// .par_match_indices(char::is_numeric)
252 /// .collect();
253 /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
254 /// ```
par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P>255 fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
256 MatchIndices {
257 chars: self.as_parallel_string(),
258 pattern,
259 }
260 }
261 }
262
263 impl ParallelString for str {
264 #[inline]
as_parallel_string(&self) -> &str265 fn as_parallel_string(&self) -> &str {
266 self
267 }
268 }
269
270 // /////////////////////////////////////////////////////////////////////////
271
272 /// We hide the `Pattern` trait in a private module, as its API is not meant
273 /// for general consumption. If we could have privacy on trait items, then it
274 /// would be nicer to have its basic existence and implementors public while
275 /// keeping all of the methods private.
276 mod private {
277 use crate::iter::plumbing::Folder;
278
279 /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
280 /// `std::str::pattern::{Pattern, Searcher}`.
281 ///
282 /// Implementing this trait is not permitted outside of `rayon`.
283 pub trait Pattern: Sized + Sync + Send {
284 private_decl! {}
find_in(&self, haystack: &str) -> Option<usize>285 fn find_in(&self, haystack: &str) -> Option<usize>;
rfind_in(&self, haystack: &str) -> Option<usize>286 fn rfind_in(&self, haystack: &str) -> Option<usize>;
is_suffix_of(&self, haystack: &str) -> bool287 fn is_suffix_of(&self, haystack: &str) -> bool;
fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F where F: Folder<&'ch str>288 fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
289 where
290 F: Folder<&'ch str>;
fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F where F: Folder<&'ch str>291 fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
292 where
293 F: Folder<&'ch str>;
fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F where F: Folder<(usize, &'ch str)>294 fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
295 where
296 F: Folder<(usize, &'ch str)>;
297 }
298 }
299 use self::private::Pattern;
300
301 #[inline]
offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T)302 fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
303 move |(i, x)| (base + i, x)
304 }
305
306 impl Pattern for char {
307 private_impl! {}
308
309 #[inline]
find_in(&self, chars: &str) -> Option<usize>310 fn find_in(&self, chars: &str) -> Option<usize> {
311 chars.find(*self)
312 }
313
314 #[inline]
rfind_in(&self, chars: &str) -> Option<usize>315 fn rfind_in(&self, chars: &str) -> Option<usize> {
316 chars.rfind(*self)
317 }
318
319 #[inline]
is_suffix_of(&self, chars: &str) -> bool320 fn is_suffix_of(&self, chars: &str) -> bool {
321 chars.ends_with(*self)
322 }
323
fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F where F: Folder<&'ch str>,324 fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F
325 where
326 F: Folder<&'ch str>,
327 {
328 let mut split = chars.split(*self);
329 if skip_last {
330 split.next_back();
331 }
332 folder.consume_iter(split)
333 }
334
fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F where F: Folder<&'ch str>,335 fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F
336 where
337 F: Folder<&'ch str>,
338 {
339 folder.consume_iter(chars.matches(*self))
340 }
341
fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F where F: Folder<(usize, &'ch str)>,342 fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F
343 where
344 F: Folder<(usize, &'ch str)>,
345 {
346 folder.consume_iter(chars.match_indices(*self).map(offset(base)))
347 }
348 }
349
350 impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
351 private_impl! {}
352
find_in(&self, chars: &str) -> Option<usize>353 fn find_in(&self, chars: &str) -> Option<usize> {
354 chars.find(self)
355 }
356
rfind_in(&self, chars: &str) -> Option<usize>357 fn rfind_in(&self, chars: &str) -> Option<usize> {
358 chars.rfind(self)
359 }
360
is_suffix_of(&self, chars: &str) -> bool361 fn is_suffix_of(&self, chars: &str) -> bool {
362 chars.ends_with(self)
363 }
364
fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F where F: Folder<&'ch str>,365 fn fold_splits<'ch, F>(&self, chars: &'ch str, folder: F, skip_last: bool) -> F
366 where
367 F: Folder<&'ch str>,
368 {
369 let mut split = chars.split(self);
370 if skip_last {
371 split.next_back();
372 }
373 folder.consume_iter(split)
374 }
375
fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F where F: Folder<&'ch str>,376 fn fold_matches<'ch, F>(&self, chars: &'ch str, folder: F) -> F
377 where
378 F: Folder<&'ch str>,
379 {
380 folder.consume_iter(chars.matches(self))
381 }
382
fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F where F: Folder<(usize, &'ch str)>,383 fn fold_match_indices<'ch, F>(&self, chars: &'ch str, folder: F, base: usize) -> F
384 where
385 F: Folder<(usize, &'ch str)>,
386 {
387 folder.consume_iter(chars.match_indices(self).map(offset(base)))
388 }
389 }
390
391 // /////////////////////////////////////////////////////////////////////////
392
393 /// Parallel iterator over the characters of a string
394 #[derive(Debug, Clone)]
395 pub struct Chars<'ch> {
396 chars: &'ch str,
397 }
398
399 struct CharsProducer<'ch> {
400 chars: &'ch str,
401 }
402
403 impl<'ch> ParallelIterator for Chars<'ch> {
404 type Item = char;
405
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,406 fn drive_unindexed<C>(self, consumer: C) -> C::Result
407 where
408 C: UnindexedConsumer<Self::Item>,
409 {
410 bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
411 }
412 }
413
414 impl<'ch> UnindexedProducer for CharsProducer<'ch> {
415 type Item = char;
416
split(self) -> (Self, Option<Self>)417 fn split(self) -> (Self, Option<Self>) {
418 match split(self.chars) {
419 Some((left, right)) => (
420 CharsProducer { chars: left },
421 Some(CharsProducer { chars: right }),
422 ),
423 None => (self, None),
424 }
425 }
426
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,427 fn fold_with<F>(self, folder: F) -> F
428 where
429 F: Folder<Self::Item>,
430 {
431 folder.consume_iter(self.chars.chars())
432 }
433 }
434
435 // /////////////////////////////////////////////////////////////////////////
436
437 /// Parallel iterator over the characters of a string, with their positions
438 #[derive(Debug, Clone)]
439 pub struct CharIndices<'ch> {
440 chars: &'ch str,
441 }
442
443 struct CharIndicesProducer<'ch> {
444 index: usize,
445 chars: &'ch str,
446 }
447
448 impl<'ch> ParallelIterator for CharIndices<'ch> {
449 type Item = (usize, char);
450
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,451 fn drive_unindexed<C>(self, consumer: C) -> C::Result
452 where
453 C: UnindexedConsumer<Self::Item>,
454 {
455 let producer = CharIndicesProducer {
456 index: 0,
457 chars: self.chars,
458 };
459 bridge_unindexed(producer, consumer)
460 }
461 }
462
463 impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
464 type Item = (usize, char);
465
split(self) -> (Self, Option<Self>)466 fn split(self) -> (Self, Option<Self>) {
467 match split(self.chars) {
468 Some((left, right)) => (
469 CharIndicesProducer {
470 chars: left,
471 ..self
472 },
473 Some(CharIndicesProducer {
474 chars: right,
475 index: self.index + left.len(),
476 }),
477 ),
478 None => (self, None),
479 }
480 }
481
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,482 fn fold_with<F>(self, folder: F) -> F
483 where
484 F: Folder<Self::Item>,
485 {
486 let base = self.index;
487 folder.consume_iter(self.chars.char_indices().map(offset(base)))
488 }
489 }
490
491 // /////////////////////////////////////////////////////////////////////////
492
493 /// Parallel iterator over the bytes of a string
494 #[derive(Debug, Clone)]
495 pub struct Bytes<'ch> {
496 chars: &'ch str,
497 }
498
499 struct BytesProducer<'ch> {
500 chars: &'ch str,
501 }
502
503 impl<'ch> ParallelIterator for Bytes<'ch> {
504 type Item = u8;
505
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,506 fn drive_unindexed<C>(self, consumer: C) -> C::Result
507 where
508 C: UnindexedConsumer<Self::Item>,
509 {
510 bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
511 }
512 }
513
514 impl<'ch> UnindexedProducer for BytesProducer<'ch> {
515 type Item = u8;
516
split(self) -> (Self, Option<Self>)517 fn split(self) -> (Self, Option<Self>) {
518 match split(self.chars) {
519 Some((left, right)) => (
520 BytesProducer { chars: left },
521 Some(BytesProducer { chars: right }),
522 ),
523 None => (self, None),
524 }
525 }
526
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,527 fn fold_with<F>(self, folder: F) -> F
528 where
529 F: Folder<Self::Item>,
530 {
531 folder.consume_iter(self.chars.bytes())
532 }
533 }
534
535 // /////////////////////////////////////////////////////////////////////////
536
537 /// Parallel iterator over a string encoded as UTF-16
538 #[derive(Debug, Clone)]
539 pub struct EncodeUtf16<'ch> {
540 chars: &'ch str,
541 }
542
543 struct EncodeUtf16Producer<'ch> {
544 chars: &'ch str,
545 }
546
547 impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
548 type Item = u16;
549
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,550 fn drive_unindexed<C>(self, consumer: C) -> C::Result
551 where
552 C: UnindexedConsumer<Self::Item>,
553 {
554 bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
555 }
556 }
557
558 impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
559 type Item = u16;
560
split(self) -> (Self, Option<Self>)561 fn split(self) -> (Self, Option<Self>) {
562 match split(self.chars) {
563 Some((left, right)) => (
564 EncodeUtf16Producer { chars: left },
565 Some(EncodeUtf16Producer { chars: right }),
566 ),
567 None => (self, None),
568 }
569 }
570
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,571 fn fold_with<F>(self, folder: F) -> F
572 where
573 F: Folder<Self::Item>,
574 {
575 folder.consume_iter(self.chars.encode_utf16())
576 }
577 }
578
579 // /////////////////////////////////////////////////////////////////////////
580
581 /// Parallel iterator over substrings separated by a pattern
582 #[derive(Debug, Clone)]
583 pub struct Split<'ch, P: Pattern> {
584 chars: &'ch str,
585 separator: P,
586 }
587
588 impl<'ch, P: Pattern> Split<'ch, P> {
new(chars: &'ch str, separator: P) -> Self589 fn new(chars: &'ch str, separator: P) -> Self {
590 Split { chars, separator }
591 }
592 }
593
594 impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
595 type Item = &'ch str;
596
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,597 fn drive_unindexed<C>(self, consumer: C) -> C::Result
598 where
599 C: UnindexedConsumer<Self::Item>,
600 {
601 let producer = SplitProducer::new(self.chars, &self.separator);
602 bridge_unindexed(producer, consumer)
603 }
604 }
605
606 /// Implement support for `SplitProducer`.
607 impl<'ch, P: Pattern> Fissile<P> for &'ch str {
length(&self) -> usize608 fn length(&self) -> usize {
609 self.len()
610 }
611
midpoint(&self, end: usize) -> usize612 fn midpoint(&self, end: usize) -> usize {
613 // First find a suitable UTF-8 boundary.
614 find_char_midpoint(&self[..end])
615 }
616
find(&self, separator: &P, start: usize, end: usize) -> Option<usize>617 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
618 separator.find_in(&self[start..end])
619 }
620
rfind(&self, separator: &P, end: usize) -> Option<usize>621 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
622 separator.rfind_in(&self[..end])
623 }
624
split_once(self, index: usize) -> (Self, Self)625 fn split_once(self, index: usize) -> (Self, Self) {
626 let (left, right) = self.split_at(index);
627 let mut right_iter = right.chars();
628 right_iter.next(); // skip the separator
629 (left, right_iter.as_str())
630 }
631
fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F where F: Folder<Self>,632 fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
633 where
634 F: Folder<Self>,
635 {
636 separator.fold_splits(self, folder, skip_last)
637 }
638 }
639
640 // /////////////////////////////////////////////////////////////////////////
641
642 /// Parallel iterator over substrings separated by a terminator pattern
643 #[derive(Debug, Clone)]
644 pub struct SplitTerminator<'ch, P: Pattern> {
645 chars: &'ch str,
646 terminator: P,
647 }
648
649 struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
650 splitter: SplitProducer<'sep, P, &'ch str>,
651 skip_last: bool,
652 }
653
654 impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
new(chars: &'ch str, terminator: P) -> Self655 fn new(chars: &'ch str, terminator: P) -> Self {
656 SplitTerminator { chars, terminator }
657 }
658 }
659
660 impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
new(chars: &'ch str, terminator: &'sep P) -> Self661 fn new(chars: &'ch str, terminator: &'sep P) -> Self {
662 SplitTerminatorProducer {
663 splitter: SplitProducer::new(chars, terminator),
664 skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
665 }
666 }
667 }
668
669 impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
670 type Item = &'ch str;
671
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,672 fn drive_unindexed<C>(self, consumer: C) -> C::Result
673 where
674 C: UnindexedConsumer<Self::Item>,
675 {
676 let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
677 bridge_unindexed(producer, consumer)
678 }
679 }
680
681 impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
682 type Item = &'ch str;
683
split(mut self) -> (Self, Option<Self>)684 fn split(mut self) -> (Self, Option<Self>) {
685 let (left, right) = self.splitter.split();
686 self.splitter = left;
687 let right = right.map(|right| {
688 let skip_last = self.skip_last;
689 self.skip_last = false;
690 SplitTerminatorProducer {
691 splitter: right,
692 skip_last,
693 }
694 });
695 (self, right)
696 }
697
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,698 fn fold_with<F>(self, folder: F) -> F
699 where
700 F: Folder<Self::Item>,
701 {
702 self.splitter.fold_with(folder, self.skip_last)
703 }
704 }
705
706 // /////////////////////////////////////////////////////////////////////////
707
708 /// Parallel iterator over lines in a string
709 #[derive(Debug, Clone)]
710 pub struct Lines<'ch>(&'ch str);
711
712 #[inline]
no_carriage_return(line: &str) -> &str713 fn no_carriage_return(line: &str) -> &str {
714 if line.ends_with('\r') {
715 &line[..line.len() - 1]
716 } else {
717 line
718 }
719 }
720
721 impl<'ch> ParallelIterator for Lines<'ch> {
722 type Item = &'ch str;
723
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,724 fn drive_unindexed<C>(self, consumer: C) -> C::Result
725 where
726 C: UnindexedConsumer<Self::Item>,
727 {
728 self.0
729 .par_split_terminator('\n')
730 .map(no_carriage_return)
731 .drive_unindexed(consumer)
732 }
733 }
734
735 // /////////////////////////////////////////////////////////////////////////
736
737 /// Parallel iterator over substrings separated by whitespace
738 #[derive(Debug, Clone)]
739 pub struct SplitWhitespace<'ch>(&'ch str);
740
741 #[inline]
not_empty(s: &&str) -> bool742 fn not_empty(s: &&str) -> bool {
743 !s.is_empty()
744 }
745
746 impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
747 type Item = &'ch str;
748
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,749 fn drive_unindexed<C>(self, consumer: C) -> C::Result
750 where
751 C: UnindexedConsumer<Self::Item>,
752 {
753 self.0
754 .par_split(char::is_whitespace)
755 .filter(not_empty)
756 .drive_unindexed(consumer)
757 }
758 }
759
760 // /////////////////////////////////////////////////////////////////////////
761
762 /// Parallel iterator over substrings that match a pattern
763 #[derive(Debug, Clone)]
764 pub struct Matches<'ch, P: Pattern> {
765 chars: &'ch str,
766 pattern: P,
767 }
768
769 struct MatchesProducer<'ch, 'pat, P: Pattern> {
770 chars: &'ch str,
771 pattern: &'pat P,
772 }
773
774 impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
775 type Item = &'ch str;
776
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,777 fn drive_unindexed<C>(self, consumer: C) -> C::Result
778 where
779 C: UnindexedConsumer<Self::Item>,
780 {
781 let producer = MatchesProducer {
782 chars: self.chars,
783 pattern: &self.pattern,
784 };
785 bridge_unindexed(producer, consumer)
786 }
787 }
788
789 impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
790 type Item = &'ch str;
791
split(self) -> (Self, Option<Self>)792 fn split(self) -> (Self, Option<Self>) {
793 match split(self.chars) {
794 Some((left, right)) => (
795 MatchesProducer {
796 chars: left,
797 ..self
798 },
799 Some(MatchesProducer {
800 chars: right,
801 ..self
802 }),
803 ),
804 None => (self, None),
805 }
806 }
807
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,808 fn fold_with<F>(self, folder: F) -> F
809 where
810 F: Folder<Self::Item>,
811 {
812 self.pattern.fold_matches(self.chars, folder)
813 }
814 }
815
816 // /////////////////////////////////////////////////////////////////////////
817
818 /// Parallel iterator over substrings that match a pattern, with their positions
819 #[derive(Debug, Clone)]
820 pub struct MatchIndices<'ch, P: Pattern> {
821 chars: &'ch str,
822 pattern: P,
823 }
824
825 struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
826 index: usize,
827 chars: &'ch str,
828 pattern: &'pat P,
829 }
830
831 impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
832 type Item = (usize, &'ch str);
833
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,834 fn drive_unindexed<C>(self, consumer: C) -> C::Result
835 where
836 C: UnindexedConsumer<Self::Item>,
837 {
838 let producer = MatchIndicesProducer {
839 index: 0,
840 chars: self.chars,
841 pattern: &self.pattern,
842 };
843 bridge_unindexed(producer, consumer)
844 }
845 }
846
847 impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
848 type Item = (usize, &'ch str);
849
split(self) -> (Self, Option<Self>)850 fn split(self) -> (Self, Option<Self>) {
851 match split(self.chars) {
852 Some((left, right)) => (
853 MatchIndicesProducer {
854 chars: left,
855 ..self
856 },
857 Some(MatchIndicesProducer {
858 chars: right,
859 index: self.index + left.len(),
860 ..self
861 }),
862 ),
863 None => (self, None),
864 }
865 }
866
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,867 fn fold_with<F>(self, folder: F) -> F
868 where
869 F: Folder<Self::Item>,
870 {
871 self.pattern
872 .fold_match_indices(self.chars, folder, self.index)
873 }
874 }
875