1 //! Parallel iterator types for [strings][std::str]
2 //!
3 //! You will rarely need to interact with this module directly unless you need
4 //! to name one of the iterator types.
5 //!
6 //! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
7 //! reference a `Pattern` trait which is not visible outside this crate.
8 //! This trait is intentionally kept private, for use only by Rayon itself.
9 //! It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
10 //! and any function or closure `F: Fn(char) -> bool + Sync + Send`.
11 //!
12 //! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
13 //! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
14 //!
15 //! [std::str]: https://doc.rust-lang.org/stable/std/str/
16
17 use crate::iter::plumbing::*;
18 use crate::iter::*;
19 use crate::split_producer::*;
20
21 /// Test if a byte is the start of a UTF-8 character.
22 /// (extracted from `str::is_char_boundary`)
23 #[inline]
is_char_boundary(b: u8) -> bool24 fn is_char_boundary(b: u8) -> bool {
25 // This is bit magic equivalent to: b < 128 || b >= 192
26 (b as i8) >= -0x40
27 }
28
29 /// Find the index of a character boundary near the midpoint.
30 #[inline]
find_char_midpoint(chars: &str) -> usize31 fn find_char_midpoint(chars: &str) -> usize {
32 let mid = chars.len() / 2;
33
34 // We want to split near the midpoint, but we need to find an actual
35 // character boundary. So we look at the raw bytes, first scanning
36 // forward from the midpoint for a boundary, then trying backward.
37 let (left, right) = chars.as_bytes().split_at(mid);
38 match right.iter().copied().position(is_char_boundary) {
39 Some(i) => mid + i,
40 None => left
41 .iter()
42 .copied()
43 .rposition(is_char_boundary)
44 .unwrap_or(0),
45 }
46 }
47
48 /// Try to split a string near the midpoint.
49 #[inline]
split(chars: &str) -> Option<(&str, &str)>50 fn split(chars: &str) -> Option<(&str, &str)> {
51 let index = find_char_midpoint(chars);
52 if index > 0 {
53 Some(chars.split_at(index))
54 } else {
55 None
56 }
57 }
58
59 /// Parallel extensions for strings.
60 pub trait ParallelString {
61 /// Returns a plain string slice, which is used to implement the rest of
62 /// the parallel methods.
as_parallel_string(&self) -> &str63 fn as_parallel_string(&self) -> &str;
64
65 /// Returns a parallel iterator over the characters of a string.
66 ///
67 /// # Examples
68 ///
69 /// ```
70 /// use rayon::prelude::*;
71 /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
72 /// assert_eq!(Some('o'), max);
73 /// ```
par_chars(&self) -> Chars<'_>74 fn par_chars(&self) -> Chars<'_> {
75 Chars {
76 chars: self.as_parallel_string(),
77 }
78 }
79
80 /// Returns a parallel iterator over the characters of a string, with their positions.
81 ///
82 /// # Examples
83 ///
84 /// ```
85 /// use rayon::prelude::*;
86 /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
87 /// assert_eq!(Some((1, 'e')), min);
88 /// ```
par_char_indices(&self) -> CharIndices<'_>89 fn par_char_indices(&self) -> CharIndices<'_> {
90 CharIndices {
91 chars: self.as_parallel_string(),
92 }
93 }
94
95 /// Returns a parallel iterator over the bytes of a string.
96 ///
97 /// Note that multi-byte sequences (for code points greater than `U+007F`)
98 /// are produced as separate items, but will not be split across threads.
99 /// If you would prefer an indexed iterator without that guarantee, consider
100 /// `string.as_bytes().par_iter().copied()` instead.
101 ///
102 /// # Examples
103 ///
104 /// ```
105 /// use rayon::prelude::*;
106 /// let max = "hello".par_bytes().max();
107 /// assert_eq!(Some(b'o'), max);
108 /// ```
par_bytes(&self) -> Bytes<'_>109 fn par_bytes(&self) -> Bytes<'_> {
110 Bytes {
111 chars: self.as_parallel_string(),
112 }
113 }
114
115 /// Returns a parallel iterator over a string encoded as UTF-16.
116 ///
117 /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
118 /// produced as separate items, but will not be split across threads.
119 ///
120 /// # Examples
121 ///
122 /// ```
123 /// use rayon::prelude::*;
124 ///
125 /// let max = "hello".par_encode_utf16().max();
126 /// assert_eq!(Some(b'o' as u16), max);
127 ///
128 /// let text = "Zażółć gęślą jaźń";
129 /// let utf8_len = text.len();
130 /// let utf16_len = text.par_encode_utf16().count();
131 /// assert!(utf16_len <= utf8_len);
132 /// ```
par_encode_utf16(&self) -> EncodeUtf16<'_>133 fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
134 EncodeUtf16 {
135 chars: self.as_parallel_string(),
136 }
137 }
138
139 /// Returns a parallel iterator over substrings separated by a
140 /// given character or predicate, similar to `str::split`.
141 ///
142 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
143 /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
144 /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
145 ///
146 /// # Examples
147 ///
148 /// ```
149 /// use rayon::prelude::*;
150 /// let total = "1, 2, buckle, 3, 4, door"
151 /// .par_split(',')
152 /// .filter_map(|s| s.trim().parse::<i32>().ok())
153 /// .sum();
154 /// assert_eq!(10, total);
155 /// ```
par_split<P: Pattern>(&self, separator: P) -> Split<'_, P>156 fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
157 Split::new(self.as_parallel_string(), separator)
158 }
159
160 /// Returns a parallel iterator over substrings separated by a
161 /// given character or predicate, keeping the matched part as a terminator
162 /// of the substring similar to `str::split_inclusive`.
163 ///
164 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
165 /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
166 /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
167 ///
168 /// # Examples
169 ///
170 /// ```
171 /// use rayon::prelude::*;
172 /// let lines: Vec<_> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
173 /// .par_split_inclusive('\n')
174 /// .collect();
175 /// assert_eq!(lines, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
176 /// ```
par_split_inclusive<P: Pattern>(&self, separator: P) -> SplitInclusive<'_, P>177 fn par_split_inclusive<P: Pattern>(&self, separator: P) -> SplitInclusive<'_, P> {
178 SplitInclusive::new(self.as_parallel_string(), separator)
179 }
180
181 /// Returns a parallel iterator over substrings terminated by a
182 /// given character or predicate, similar to `str::split_terminator`.
183 /// It's equivalent to `par_split`, except it doesn't produce an empty
184 /// substring after a trailing terminator.
185 ///
186 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
187 /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
188 /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
189 ///
190 /// # Examples
191 ///
192 /// ```
193 /// use rayon::prelude::*;
194 /// let parts: Vec<_> = "((1 + 3) * 2)"
195 /// .par_split_terminator(|c| c == '(' || c == ')')
196 /// .collect();
197 /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
198 /// ```
par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P>199 fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
200 SplitTerminator::new(self.as_parallel_string(), terminator)
201 }
202
203 /// Returns a parallel iterator over the lines of a string, ending with an
204 /// optional carriage return and with a newline (`\r\n` or just `\n`).
205 /// The final line ending is optional, and line endings are not included in
206 /// the output strings.
207 ///
208 /// # Examples
209 ///
210 /// ```
211 /// use rayon::prelude::*;
212 /// let lengths: Vec<_> = "hello world\nfizbuzz"
213 /// .par_lines()
214 /// .map(|l| l.len())
215 /// .collect();
216 /// assert_eq!(vec![11, 7], lengths);
217 /// ```
par_lines(&self) -> Lines<'_>218 fn par_lines(&self) -> Lines<'_> {
219 Lines(self.as_parallel_string())
220 }
221
222 /// Returns a parallel iterator over the sub-slices of a string that are
223 /// separated by any amount of whitespace.
224 ///
225 /// As with `str::split_whitespace`, 'whitespace' is defined according to
226 /// the terms of the Unicode Derived Core Property `White_Space`.
227 /// If you only want to split on ASCII whitespace instead, use
228 /// [`par_split_ascii_whitespace`][`ParallelString::par_split_ascii_whitespace`].
229 ///
230 /// # Examples
231 ///
232 /// ```
233 /// use rayon::prelude::*;
234 /// let longest = "which is the longest word?"
235 /// .par_split_whitespace()
236 /// .max_by_key(|word| word.len());
237 /// assert_eq!(Some("longest"), longest);
238 /// ```
239 ///
240 /// All kinds of whitespace are considered:
241 ///
242 /// ```
243 /// use rayon::prelude::*;
244 /// let words: Vec<&str> = " Mary had\ta\u{2009}little \n\t lamb"
245 /// .par_split_whitespace()
246 /// .collect();
247 /// assert_eq!(words, ["Mary", "had", "a", "little", "lamb"]);
248 /// ```
249 ///
250 /// If the string is empty or all whitespace, the iterator yields no string slices:
251 ///
252 /// ```
253 /// use rayon::prelude::*;
254 /// assert_eq!("".par_split_whitespace().count(), 0);
255 /// assert_eq!(" ".par_split_whitespace().count(), 0);
256 /// ```
par_split_whitespace(&self) -> SplitWhitespace<'_>257 fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
258 SplitWhitespace(self.as_parallel_string())
259 }
260
261 /// Returns a parallel iterator over the sub-slices of a string that are
262 /// separated by any amount of ASCII whitespace.
263 ///
264 /// To split by Unicode `White_Space` instead, use
265 /// [`par_split_whitespace`][`ParallelString::par_split_whitespace`].
266 ///
267 /// # Examples
268 ///
269 /// ```
270 /// use rayon::prelude::*;
271 /// let longest = "which is the longest word?"
272 /// .par_split_ascii_whitespace()
273 /// .max_by_key(|word| word.len());
274 /// assert_eq!(Some("longest"), longest);
275 /// ```
276 ///
277 /// All kinds of ASCII whitespace are considered, but not Unicode `White_Space`:
278 ///
279 /// ```
280 /// use rayon::prelude::*;
281 /// let words: Vec<&str> = " Mary had\ta\u{2009}little \n\t lamb"
282 /// .par_split_ascii_whitespace()
283 /// .collect();
284 /// assert_eq!(words, ["Mary", "had", "a\u{2009}little", "lamb"]);
285 /// ```
286 ///
287 /// If the string is empty or all ASCII whitespace, the iterator yields no string slices:
288 ///
289 /// ```
290 /// use rayon::prelude::*;
291 /// assert_eq!("".par_split_whitespace().count(), 0);
292 /// assert_eq!(" ".par_split_whitespace().count(), 0);
293 /// ```
par_split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_>294 fn par_split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
295 SplitAsciiWhitespace(self.as_parallel_string())
296 }
297
298 /// Returns a parallel iterator over substrings that match a
299 /// given character or predicate, similar to `str::matches`.
300 ///
301 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
302 /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
303 /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
304 ///
305 /// # Examples
306 ///
307 /// ```
308 /// use rayon::prelude::*;
309 /// let total = "1, 2, buckle, 3, 4, door"
310 /// .par_matches(char::is_numeric)
311 /// .map(|s| s.parse::<i32>().expect("digit"))
312 /// .sum();
313 /// assert_eq!(10, total);
314 /// ```
par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P>315 fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
316 Matches {
317 chars: self.as_parallel_string(),
318 pattern,
319 }
320 }
321
322 /// Returns a parallel iterator over substrings that match a given character
323 /// or predicate, with their positions, similar to `str::match_indices`.
324 ///
325 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
326 /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
327 /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
328 ///
329 /// # Examples
330 ///
331 /// ```
332 /// use rayon::prelude::*;
333 /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
334 /// .par_match_indices(char::is_numeric)
335 /// .collect();
336 /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
337 /// ```
par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P>338 fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
339 MatchIndices {
340 chars: self.as_parallel_string(),
341 pattern,
342 }
343 }
344 }
345
346 impl ParallelString for str {
347 #[inline]
as_parallel_string(&self) -> &str348 fn as_parallel_string(&self) -> &str {
349 self
350 }
351 }
352
353 // /////////////////////////////////////////////////////////////////////////
354
355 /// We hide the `Pattern` trait in a private module, as its API is not meant
356 /// for general consumption. If we could have privacy on trait items, then it
357 /// would be nicer to have its basic existence and implementors public while
358 /// keeping all of the methods private.
359 mod private {
360 use crate::iter::plumbing::Folder;
361
362 /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
363 /// `std::str::pattern::{Pattern, Searcher}`.
364 ///
365 /// Implementing this trait is not permitted outside of `rayon`.
366 pub trait Pattern: Sized + Sync + Send {
367 private_decl! {}
find_in(&self, haystack: &str) -> Option<usize>368 fn find_in(&self, haystack: &str) -> Option<usize>;
rfind_in(&self, haystack: &str) -> Option<usize>369 fn rfind_in(&self, haystack: &str) -> Option<usize>;
is_suffix_of(&self, haystack: &str) -> bool370 fn is_suffix_of(&self, haystack: &str) -> bool;
fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F where F: Folder<&'ch str>371 fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
372 where
373 F: Folder<&'ch str>;
fold_inclusive_splits<'ch, F>(&self, haystack: &'ch str, folder: F) -> F where F: Folder<&'ch str>374 fn fold_inclusive_splits<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
375 where
376 F: Folder<&'ch str>;
fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F where F: Folder<&'ch str>377 fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
378 where
379 F: Folder<&'ch str>;
fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F where F: Folder<(usize, &'ch str)>380 fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
381 where
382 F: Folder<(usize, &'ch str)>;
383 }
384 }
385 use self::private::Pattern;
386
387 #[inline]
offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T)388 fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
389 move |(i, x)| (base + i, x)
390 }
391
392 macro_rules! impl_pattern {
393 (&$self:ident => $pattern:expr) => {
394 private_impl! {}
395
396 #[inline]
397 fn find_in(&$self, chars: &str) -> Option<usize> {
398 chars.find($pattern)
399 }
400
401 #[inline]
402 fn rfind_in(&$self, chars: &str) -> Option<usize> {
403 chars.rfind($pattern)
404 }
405
406 #[inline]
407 fn is_suffix_of(&$self, chars: &str) -> bool {
408 chars.ends_with($pattern)
409 }
410
411 fn fold_splits<'ch, F>(&$self, chars: &'ch str, folder: F, skip_last: bool) -> F
412 where
413 F: Folder<&'ch str>,
414 {
415 let mut split = chars.split($pattern);
416 if skip_last {
417 split.next_back();
418 }
419 folder.consume_iter(split)
420 }
421
422 fn fold_inclusive_splits<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
423 where
424 F: Folder<&'ch str>,
425 {
426 folder.consume_iter(chars.split_inclusive($pattern))
427 }
428
429 fn fold_matches<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
430 where
431 F: Folder<&'ch str>,
432 {
433 folder.consume_iter(chars.matches($pattern))
434 }
435
436 fn fold_match_indices<'ch, F>(&$self, chars: &'ch str, folder: F, base: usize) -> F
437 where
438 F: Folder<(usize, &'ch str)>,
439 {
440 folder.consume_iter(chars.match_indices($pattern).map(offset(base)))
441 }
442 }
443 }
444
445 impl Pattern for char {
446 impl_pattern!(&self => *self);
447 }
448
449 impl Pattern for &[char] {
450 impl_pattern!(&self => *self);
451 }
452
453 // TODO (MSRV 1.75): use `*self` for array patterns too.
454 // - Needs `DoubleEndedSearcher` so `split.next_back()` works.
455
456 impl<const N: usize> Pattern for [char; N] {
457 impl_pattern!(&self => self.as_slice());
458 }
459
460 impl<const N: usize> Pattern for &[char; N] {
461 impl_pattern!(&self => self.as_slice());
462 }
463
464 impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
465 impl_pattern!(&self => self);
466 }
467
468 // /////////////////////////////////////////////////////////////////////////
469
470 /// Parallel iterator over the characters of a string
471 #[derive(Debug, Clone)]
472 pub struct Chars<'ch> {
473 chars: &'ch str,
474 }
475
476 struct CharsProducer<'ch> {
477 chars: &'ch str,
478 }
479
480 impl<'ch> ParallelIterator for Chars<'ch> {
481 type Item = char;
482
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,483 fn drive_unindexed<C>(self, consumer: C) -> C::Result
484 where
485 C: UnindexedConsumer<Self::Item>,
486 {
487 bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
488 }
489 }
490
491 impl<'ch> UnindexedProducer for CharsProducer<'ch> {
492 type Item = char;
493
split(self) -> (Self, Option<Self>)494 fn split(self) -> (Self, Option<Self>) {
495 match split(self.chars) {
496 Some((left, right)) => (
497 CharsProducer { chars: left },
498 Some(CharsProducer { chars: right }),
499 ),
500 None => (self, None),
501 }
502 }
503
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,504 fn fold_with<F>(self, folder: F) -> F
505 where
506 F: Folder<Self::Item>,
507 {
508 folder.consume_iter(self.chars.chars())
509 }
510 }
511
512 // /////////////////////////////////////////////////////////////////////////
513
514 /// Parallel iterator over the characters of a string, with their positions
515 #[derive(Debug, Clone)]
516 pub struct CharIndices<'ch> {
517 chars: &'ch str,
518 }
519
520 struct CharIndicesProducer<'ch> {
521 index: usize,
522 chars: &'ch str,
523 }
524
525 impl<'ch> ParallelIterator for CharIndices<'ch> {
526 type Item = (usize, char);
527
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,528 fn drive_unindexed<C>(self, consumer: C) -> C::Result
529 where
530 C: UnindexedConsumer<Self::Item>,
531 {
532 let producer = CharIndicesProducer {
533 index: 0,
534 chars: self.chars,
535 };
536 bridge_unindexed(producer, consumer)
537 }
538 }
539
540 impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
541 type Item = (usize, char);
542
split(self) -> (Self, Option<Self>)543 fn split(self) -> (Self, Option<Self>) {
544 match split(self.chars) {
545 Some((left, right)) => (
546 CharIndicesProducer {
547 chars: left,
548 ..self
549 },
550 Some(CharIndicesProducer {
551 chars: right,
552 index: self.index + left.len(),
553 }),
554 ),
555 None => (self, None),
556 }
557 }
558
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,559 fn fold_with<F>(self, folder: F) -> F
560 where
561 F: Folder<Self::Item>,
562 {
563 let base = self.index;
564 folder.consume_iter(self.chars.char_indices().map(offset(base)))
565 }
566 }
567
568 // /////////////////////////////////////////////////////////////////////////
569
570 /// Parallel iterator over the bytes of a string
571 #[derive(Debug, Clone)]
572 pub struct Bytes<'ch> {
573 chars: &'ch str,
574 }
575
576 struct BytesProducer<'ch> {
577 chars: &'ch str,
578 }
579
580 impl<'ch> ParallelIterator for Bytes<'ch> {
581 type Item = u8;
582
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,583 fn drive_unindexed<C>(self, consumer: C) -> C::Result
584 where
585 C: UnindexedConsumer<Self::Item>,
586 {
587 bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
588 }
589 }
590
591 impl<'ch> UnindexedProducer for BytesProducer<'ch> {
592 type Item = u8;
593
split(self) -> (Self, Option<Self>)594 fn split(self) -> (Self, Option<Self>) {
595 match split(self.chars) {
596 Some((left, right)) => (
597 BytesProducer { chars: left },
598 Some(BytesProducer { chars: right }),
599 ),
600 None => (self, None),
601 }
602 }
603
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,604 fn fold_with<F>(self, folder: F) -> F
605 where
606 F: Folder<Self::Item>,
607 {
608 folder.consume_iter(self.chars.bytes())
609 }
610 }
611
612 // /////////////////////////////////////////////////////////////////////////
613
614 /// Parallel iterator over a string encoded as UTF-16
615 #[derive(Debug, Clone)]
616 pub struct EncodeUtf16<'ch> {
617 chars: &'ch str,
618 }
619
620 struct EncodeUtf16Producer<'ch> {
621 chars: &'ch str,
622 }
623
624 impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
625 type Item = u16;
626
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,627 fn drive_unindexed<C>(self, consumer: C) -> C::Result
628 where
629 C: UnindexedConsumer<Self::Item>,
630 {
631 bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
632 }
633 }
634
635 impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
636 type Item = u16;
637
split(self) -> (Self, Option<Self>)638 fn split(self) -> (Self, Option<Self>) {
639 match split(self.chars) {
640 Some((left, right)) => (
641 EncodeUtf16Producer { chars: left },
642 Some(EncodeUtf16Producer { chars: right }),
643 ),
644 None => (self, None),
645 }
646 }
647
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,648 fn fold_with<F>(self, folder: F) -> F
649 where
650 F: Folder<Self::Item>,
651 {
652 folder.consume_iter(self.chars.encode_utf16())
653 }
654 }
655
656 // /////////////////////////////////////////////////////////////////////////
657
658 /// Parallel iterator over substrings separated by a pattern
659 #[derive(Debug, Clone)]
660 pub struct Split<'ch, P: Pattern> {
661 chars: &'ch str,
662 separator: P,
663 }
664
665 impl<'ch, P: Pattern> Split<'ch, P> {
new(chars: &'ch str, separator: P) -> Self666 fn new(chars: &'ch str, separator: P) -> Self {
667 Split { chars, separator }
668 }
669 }
670
671 impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
672 type Item = &'ch str;
673
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,674 fn drive_unindexed<C>(self, consumer: C) -> C::Result
675 where
676 C: UnindexedConsumer<Self::Item>,
677 {
678 let producer = SplitProducer::new(self.chars, &self.separator);
679 bridge_unindexed(producer, consumer)
680 }
681 }
682
683 /// Implement support for `SplitProducer`.
684 impl<'ch, P: Pattern> Fissile<P> for &'ch str {
length(&self) -> usize685 fn length(&self) -> usize {
686 self.len()
687 }
688
midpoint(&self, end: usize) -> usize689 fn midpoint(&self, end: usize) -> usize {
690 // First find a suitable UTF-8 boundary.
691 find_char_midpoint(&self[..end])
692 }
693
find(&self, separator: &P, start: usize, end: usize) -> Option<usize>694 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
695 separator.find_in(&self[start..end])
696 }
697
rfind(&self, separator: &P, end: usize) -> Option<usize>698 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
699 separator.rfind_in(&self[..end])
700 }
701
split_once<const INCL: bool>(self, index: usize) -> (Self, Self)702 fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
703 if INCL {
704 // include the separator in the left side
705 let separator = self[index..].chars().next().unwrap();
706 self.split_at(index + separator.len_utf8())
707 } else {
708 let (left, right) = self.split_at(index);
709 let mut right_iter = right.chars();
710 right_iter.next(); // skip the separator
711 (left, right_iter.as_str())
712 }
713 }
714
fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F where F: Folder<Self>,715 fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
716 where
717 F: Folder<Self>,
718 {
719 if INCL {
720 debug_assert!(!skip_last);
721 separator.fold_inclusive_splits(self, folder)
722 } else {
723 separator.fold_splits(self, folder, skip_last)
724 }
725 }
726 }
727
728 // /////////////////////////////////////////////////////////////////////////
729
730 /// Parallel iterator over substrings separated by a pattern
731 #[derive(Debug, Clone)]
732 pub struct SplitInclusive<'ch, P: Pattern> {
733 chars: &'ch str,
734 separator: P,
735 }
736
737 impl<'ch, P: Pattern> SplitInclusive<'ch, P> {
new(chars: &'ch str, separator: P) -> Self738 fn new(chars: &'ch str, separator: P) -> Self {
739 SplitInclusive { chars, separator }
740 }
741 }
742
743 impl<'ch, P: Pattern> ParallelIterator for SplitInclusive<'ch, P> {
744 type Item = &'ch str;
745
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,746 fn drive_unindexed<C>(self, consumer: C) -> C::Result
747 where
748 C: UnindexedConsumer<Self::Item>,
749 {
750 let producer = SplitInclusiveProducer::new_incl(self.chars, &self.separator);
751 bridge_unindexed(producer, consumer)
752 }
753 }
754
755 // /////////////////////////////////////////////////////////////////////////
756
757 /// Parallel iterator over substrings separated by a terminator pattern
758 #[derive(Debug, Clone)]
759 pub struct SplitTerminator<'ch, P: Pattern> {
760 chars: &'ch str,
761 terminator: P,
762 }
763
764 struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
765 splitter: SplitProducer<'sep, P, &'ch str>,
766 skip_last: bool,
767 }
768
769 impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
new(chars: &'ch str, terminator: P) -> Self770 fn new(chars: &'ch str, terminator: P) -> Self {
771 SplitTerminator { chars, terminator }
772 }
773 }
774
775 impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
new(chars: &'ch str, terminator: &'sep P) -> Self776 fn new(chars: &'ch str, terminator: &'sep P) -> Self {
777 SplitTerminatorProducer {
778 splitter: SplitProducer::new(chars, terminator),
779 skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
780 }
781 }
782 }
783
784 impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
785 type Item = &'ch str;
786
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,787 fn drive_unindexed<C>(self, consumer: C) -> C::Result
788 where
789 C: UnindexedConsumer<Self::Item>,
790 {
791 let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
792 bridge_unindexed(producer, consumer)
793 }
794 }
795
796 impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
797 type Item = &'ch str;
798
split(mut self) -> (Self, Option<Self>)799 fn split(mut self) -> (Self, Option<Self>) {
800 let (left, right) = self.splitter.split();
801 self.splitter = left;
802 let right = right.map(|right| {
803 let skip_last = self.skip_last;
804 self.skip_last = false;
805 SplitTerminatorProducer {
806 splitter: right,
807 skip_last,
808 }
809 });
810 (self, right)
811 }
812
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,813 fn fold_with<F>(self, folder: F) -> F
814 where
815 F: Folder<Self::Item>,
816 {
817 self.splitter.fold_with(folder, self.skip_last)
818 }
819 }
820
821 // /////////////////////////////////////////////////////////////////////////
822
823 /// Parallel iterator over lines in a string
824 #[derive(Debug, Clone)]
825 pub struct Lines<'ch>(&'ch str);
826
827 #[inline]
no_carriage_return(line: &str) -> &str828 fn no_carriage_return(line: &str) -> &str {
829 line.strip_suffix('\r').unwrap_or(line)
830 }
831
832 impl<'ch> ParallelIterator for Lines<'ch> {
833 type Item = &'ch str;
834
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,835 fn drive_unindexed<C>(self, consumer: C) -> C::Result
836 where
837 C: UnindexedConsumer<Self::Item>,
838 {
839 self.0
840 .par_split_terminator('\n')
841 .map(no_carriage_return)
842 .drive_unindexed(consumer)
843 }
844 }
845
846 // /////////////////////////////////////////////////////////////////////////
847
848 /// Parallel iterator over substrings separated by whitespace
849 #[derive(Debug, Clone)]
850 pub struct SplitWhitespace<'ch>(&'ch str);
851
852 #[inline]
not_empty(s: &&str) -> bool853 fn not_empty(s: &&str) -> bool {
854 !s.is_empty()
855 }
856
857 impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
858 type Item = &'ch str;
859
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,860 fn drive_unindexed<C>(self, consumer: C) -> C::Result
861 where
862 C: UnindexedConsumer<Self::Item>,
863 {
864 self.0
865 .par_split(char::is_whitespace)
866 .filter(not_empty)
867 .drive_unindexed(consumer)
868 }
869 }
870
871 // /////////////////////////////////////////////////////////////////////////
872
873 /// Parallel iterator over substrings separated by ASCII whitespace
874 #[derive(Debug, Clone)]
875 pub struct SplitAsciiWhitespace<'ch>(&'ch str);
876
877 #[inline]
is_ascii_whitespace(c: char) -> bool878 fn is_ascii_whitespace(c: char) -> bool {
879 c.is_ascii_whitespace()
880 }
881
882 impl<'ch> ParallelIterator for SplitAsciiWhitespace<'ch> {
883 type Item = &'ch str;
884
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,885 fn drive_unindexed<C>(self, consumer: C) -> C::Result
886 where
887 C: UnindexedConsumer<Self::Item>,
888 {
889 self.0
890 .par_split(is_ascii_whitespace)
891 .filter(not_empty)
892 .drive_unindexed(consumer)
893 }
894 }
895
896 // /////////////////////////////////////////////////////////////////////////
897
898 /// Parallel iterator over substrings that match a pattern
899 #[derive(Debug, Clone)]
900 pub struct Matches<'ch, P: Pattern> {
901 chars: &'ch str,
902 pattern: P,
903 }
904
905 struct MatchesProducer<'ch, 'pat, P: Pattern> {
906 chars: &'ch str,
907 pattern: &'pat P,
908 }
909
910 impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
911 type Item = &'ch str;
912
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,913 fn drive_unindexed<C>(self, consumer: C) -> C::Result
914 where
915 C: UnindexedConsumer<Self::Item>,
916 {
917 let producer = MatchesProducer {
918 chars: self.chars,
919 pattern: &self.pattern,
920 };
921 bridge_unindexed(producer, consumer)
922 }
923 }
924
925 impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
926 type Item = &'ch str;
927
split(self) -> (Self, Option<Self>)928 fn split(self) -> (Self, Option<Self>) {
929 match split(self.chars) {
930 Some((left, right)) => (
931 MatchesProducer {
932 chars: left,
933 ..self
934 },
935 Some(MatchesProducer {
936 chars: right,
937 ..self
938 }),
939 ),
940 None => (self, None),
941 }
942 }
943
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,944 fn fold_with<F>(self, folder: F) -> F
945 where
946 F: Folder<Self::Item>,
947 {
948 self.pattern.fold_matches(self.chars, folder)
949 }
950 }
951
952 // /////////////////////////////////////////////////////////////////////////
953
954 /// Parallel iterator over substrings that match a pattern, with their positions
955 #[derive(Debug, Clone)]
956 pub struct MatchIndices<'ch, P: Pattern> {
957 chars: &'ch str,
958 pattern: P,
959 }
960
961 struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
962 index: usize,
963 chars: &'ch str,
964 pattern: &'pat P,
965 }
966
967 impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
968 type Item = (usize, &'ch str);
969
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,970 fn drive_unindexed<C>(self, consumer: C) -> C::Result
971 where
972 C: UnindexedConsumer<Self::Item>,
973 {
974 let producer = MatchIndicesProducer {
975 index: 0,
976 chars: self.chars,
977 pattern: &self.pattern,
978 };
979 bridge_unindexed(producer, consumer)
980 }
981 }
982
983 impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
984 type Item = (usize, &'ch str);
985
split(self) -> (Self, Option<Self>)986 fn split(self) -> (Self, Option<Self>) {
987 match split(self.chars) {
988 Some((left, right)) => (
989 MatchIndicesProducer {
990 chars: left,
991 ..self
992 },
993 Some(MatchIndicesProducer {
994 chars: right,
995 index: self.index + left.len(),
996 ..self
997 }),
998 ),
999 None => (self, None),
1000 }
1001 }
1002
fold_with<F>(self, folder: F) -> F where F: Folder<Self::Item>,1003 fn fold_with<F>(self, folder: F) -> F
1004 where
1005 F: Folder<Self::Item>,
1006 {
1007 self.pattern
1008 .fold_match_indices(self.chars, folder, self.index)
1009 }
1010 }
1011