• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Parallel iterator types for [slices][std::slice]
2 //!
3 //! You will rarely need to interact with this module directly unless you need
4 //! to name one of the iterator types.
5 //!
6 //! [std::slice]: https://doc.rust-lang.org/stable/std/slice/
7 
8 mod chunks;
9 mod mergesort;
10 mod quicksort;
11 mod rchunks;
12 
13 mod test;
14 
15 use self::mergesort::par_mergesort;
16 use self::quicksort::par_quicksort;
17 use crate::iter::plumbing::*;
18 use crate::iter::*;
19 use crate::split_producer::*;
20 use std::cmp;
21 use std::cmp::Ordering;
22 use std::fmt::{self, Debug};
23 use std::mem;
24 
25 pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut};
26 pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut};
27 
28 /// Parallel extensions for slices.
29 pub trait ParallelSlice<T: Sync> {
30     /// Returns a plain slice, which is used to implement the rest of the
31     /// parallel methods.
as_parallel_slice(&self) -> &[T]32     fn as_parallel_slice(&self) -> &[T];
33 
34     /// Returns a parallel iterator over subslices separated by elements that
35     /// match the separator.
36     ///
37     /// # Examples
38     ///
39     /// ```
40     /// use rayon::prelude::*;
41     /// let smallest = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
42     ///     .par_split(|i| *i == 0)
43     ///     .map(|numbers| numbers.iter().min().unwrap())
44     ///     .min();
45     /// assert_eq!(Some(&1), smallest);
46     /// ```
par_split<P>(&self, separator: P) -> Split<'_, T, P> where P: Fn(&T) -> bool + Sync + Send,47     fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
48     where
49         P: Fn(&T) -> bool + Sync + Send,
50     {
51         Split {
52             slice: self.as_parallel_slice(),
53             separator,
54         }
55     }
56 
57     /// Returns a parallel iterator over all contiguous windows of length
58     /// `window_size`. The windows overlap.
59     ///
60     /// # Examples
61     ///
62     /// ```
63     /// use rayon::prelude::*;
64     /// let windows: Vec<_> = [1, 2, 3].par_windows(2).collect();
65     /// assert_eq!(vec![[1, 2], [2, 3]], windows);
66     /// ```
par_windows(&self, window_size: usize) -> Windows<'_, T>67     fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
68         Windows {
69             window_size,
70             slice: self.as_parallel_slice(),
71         }
72     }
73 
74     /// Returns a parallel iterator over at most `chunk_size` elements of
75     /// `self` at a time. The chunks do not overlap.
76     ///
77     /// If the number of elements in the iterator is not divisible by
78     /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
79     /// other chunks will have that exact length.
80     ///
81     /// # Examples
82     ///
83     /// ```
84     /// use rayon::prelude::*;
85     /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect();
86     /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]);
87     /// ```
88     #[track_caller]
par_chunks(&self, chunk_size: usize) -> Chunks<'_, T>89     fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
90         assert!(chunk_size != 0, "chunk_size must not be zero");
91         Chunks::new(chunk_size, self.as_parallel_slice())
92     }
93 
94     /// Returns a parallel iterator over `chunk_size` elements of
95     /// `self` at a time. The chunks do not overlap.
96     ///
97     /// If `chunk_size` does not divide the length of the slice, then the
98     /// last up to `chunk_size-1` elements will be omitted and can be
99     /// retrieved from the remainder function of the iterator.
100     ///
101     /// # Examples
102     ///
103     /// ```
104     /// use rayon::prelude::*;
105     /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect();
106     /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]);
107     /// ```
108     #[track_caller]
par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T>109     fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
110         assert!(chunk_size != 0, "chunk_size must not be zero");
111         ChunksExact::new(chunk_size, self.as_parallel_slice())
112     }
113 
114     /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
115     /// starting at the end. The chunks do not overlap.
116     ///
117     /// If the number of elements in the iterator is not divisible by
118     /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
119     /// other chunks will have that exact length.
120     ///
121     /// # Examples
122     ///
123     /// ```
124     /// use rayon::prelude::*;
125     /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks(2).collect();
126     /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3], &[1]]);
127     /// ```
128     #[track_caller]
par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T>129     fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
130         assert!(chunk_size != 0, "chunk_size must not be zero");
131         RChunks::new(chunk_size, self.as_parallel_slice())
132     }
133 
134     /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
135     /// starting at the end. The chunks do not overlap.
136     ///
137     /// If `chunk_size` does not divide the length of the slice, then the
138     /// last up to `chunk_size-1` elements will be omitted and can be
139     /// retrieved from the remainder function of the iterator.
140     ///
141     /// # Examples
142     ///
143     /// ```
144     /// use rayon::prelude::*;
145     /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks_exact(2).collect();
146     /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3]]);
147     /// ```
148     #[track_caller]
par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T>149     fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
150         assert!(chunk_size != 0, "chunk_size must not be zero");
151         RChunksExact::new(chunk_size, self.as_parallel_slice())
152     }
153 }
154 
155 impl<T: Sync> ParallelSlice<T> for [T] {
156     #[inline]
as_parallel_slice(&self) -> &[T]157     fn as_parallel_slice(&self) -> &[T] {
158         self
159     }
160 }
161 
162 /// Parallel extensions for mutable slices.
163 pub trait ParallelSliceMut<T: Send> {
164     /// Returns a plain mutable slice, which is used to implement the rest of
165     /// the parallel methods.
as_parallel_slice_mut(&mut self) -> &mut [T]166     fn as_parallel_slice_mut(&mut self) -> &mut [T];
167 
168     /// Returns a parallel iterator over mutable subslices separated by
169     /// elements that match the separator.
170     ///
171     /// # Examples
172     ///
173     /// ```
174     /// use rayon::prelude::*;
175     /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
176     /// array.par_split_mut(|i| *i == 0)
177     ///      .for_each(|slice| slice.reverse());
178     /// assert_eq!(array, [3, 2, 1, 0, 8, 4, 2, 0, 9, 6, 3]);
179     /// ```
par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P> where P: Fn(&T) -> bool + Sync + Send,180     fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
181     where
182         P: Fn(&T) -> bool + Sync + Send,
183     {
184         SplitMut {
185             slice: self.as_parallel_slice_mut(),
186             separator,
187         }
188     }
189 
190     /// Returns a parallel iterator over at most `chunk_size` elements of
191     /// `self` at a time. The chunks are mutable and do not overlap.
192     ///
193     /// If the number of elements in the iterator is not divisible by
194     /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
195     /// other chunks will have that exact length.
196     ///
197     /// # Examples
198     ///
199     /// ```
200     /// use rayon::prelude::*;
201     /// let mut array = [1, 2, 3, 4, 5];
202     /// array.par_chunks_mut(2)
203     ///      .for_each(|slice| slice.reverse());
204     /// assert_eq!(array, [2, 1, 4, 3, 5]);
205     /// ```
206     #[track_caller]
par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T>207     fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
208         assert!(chunk_size != 0, "chunk_size must not be zero");
209         ChunksMut::new(chunk_size, self.as_parallel_slice_mut())
210     }
211 
212     /// Returns a parallel iterator over `chunk_size` elements of
213     /// `self` at a time. The chunks are mutable and do not overlap.
214     ///
215     /// If `chunk_size` does not divide the length of the slice, then the
216     /// last up to `chunk_size-1` elements will be omitted and can be
217     /// retrieved from the remainder function of the iterator.
218     ///
219     /// # Examples
220     ///
221     /// ```
222     /// use rayon::prelude::*;
223     /// let mut array = [1, 2, 3, 4, 5];
224     /// array.par_chunks_exact_mut(3)
225     ///      .for_each(|slice| slice.reverse());
226     /// assert_eq!(array, [3, 2, 1, 4, 5]);
227     /// ```
228     #[track_caller]
par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T>229     fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
230         assert!(chunk_size != 0, "chunk_size must not be zero");
231         ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
232     }
233 
234     /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
235     /// starting at the end. The chunks are mutable and do not overlap.
236     ///
237     /// If the number of elements in the iterator is not divisible by
238     /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
239     /// other chunks will have that exact length.
240     ///
241     /// # Examples
242     ///
243     /// ```
244     /// use rayon::prelude::*;
245     /// let mut array = [1, 2, 3, 4, 5];
246     /// array.par_rchunks_mut(2)
247     ///      .for_each(|slice| slice.reverse());
248     /// assert_eq!(array, [1, 3, 2, 5, 4]);
249     /// ```
250     #[track_caller]
par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T>251     fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
252         assert!(chunk_size != 0, "chunk_size must not be zero");
253         RChunksMut::new(chunk_size, self.as_parallel_slice_mut())
254     }
255 
256     /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
257     /// starting at the end. The chunks are mutable and do not overlap.
258     ///
259     /// If `chunk_size` does not divide the length of the slice, then the
260     /// last up to `chunk_size-1` elements will be omitted and can be
261     /// retrieved from the remainder function of the iterator.
262     ///
263     /// # Examples
264     ///
265     /// ```
266     /// use rayon::prelude::*;
267     /// let mut array = [1, 2, 3, 4, 5];
268     /// array.par_rchunks_exact_mut(3)
269     ///      .for_each(|slice| slice.reverse());
270     /// assert_eq!(array, [1, 2, 5, 4, 3]);
271     /// ```
272     #[track_caller]
par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T>273     fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
274         assert!(chunk_size != 0, "chunk_size must not be zero");
275         RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
276     }
277 
278     /// Sorts the slice in parallel.
279     ///
280     /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
281     ///
282     /// When applicable, unstable sorting is preferred because it is generally faster than stable
283     /// sorting and it doesn't allocate auxiliary memory.
284     /// See [`par_sort_unstable`](#method.par_sort_unstable).
285     ///
286     /// # Current implementation
287     ///
288     /// The current algorithm is an adaptive merge sort inspired by
289     /// [timsort](https://en.wikipedia.org/wiki/Timsort).
290     /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
291     /// two or more sorted sequences concatenated one after another.
292     ///
293     /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
294     /// non-allocating insertion sort is used instead.
295     ///
296     /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
297     /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
298     /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
299     /// parallel subdivision of chunks and parallel merge operation.
300     ///
301     /// # Examples
302     ///
303     /// ```
304     /// use rayon::prelude::*;
305     ///
306     /// let mut v = [-5, 4, 1, -3, 2];
307     ///
308     /// v.par_sort();
309     /// assert_eq!(v, [-5, -3, 1, 2, 4]);
310     /// ```
par_sort(&mut self) where T: Ord,311     fn par_sort(&mut self)
312     where
313         T: Ord,
314     {
315         par_mergesort(self.as_parallel_slice_mut(), T::lt);
316     }
317 
318     /// Sorts the slice in parallel with a comparator function.
319     ///
320     /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
321     ///
322     /// The comparator function must define a total ordering for the elements in the slice. If
323     /// the ordering is not total, the order of the elements is unspecified. An order is a
324     /// total order if it is (for all `a`, `b` and `c`):
325     ///
326     /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
327     /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
328     ///
329     /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
330     /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
331     ///
332     /// ```
333     /// use rayon::prelude::*;
334     ///
335     /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
336     /// floats.par_sort_by(|a, b| a.partial_cmp(b).unwrap());
337     /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
338     /// ```
339     ///
340     /// When applicable, unstable sorting is preferred because it is generally faster than stable
341     /// sorting and it doesn't allocate auxiliary memory.
342     /// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
343     ///
344     /// # Current implementation
345     ///
346     /// The current algorithm is an adaptive merge sort inspired by
347     /// [timsort](https://en.wikipedia.org/wiki/Timsort).
348     /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
349     /// two or more sorted sequences concatenated one after another.
350     ///
351     /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
352     /// non-allocating insertion sort is used instead.
353     ///
354     /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
355     /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
356     /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
357     /// parallel subdivision of chunks and parallel merge operation.
358     ///
359     /// # Examples
360     ///
361     /// ```
362     /// use rayon::prelude::*;
363     ///
364     /// let mut v = [5, 4, 1, 3, 2];
365     /// v.par_sort_by(|a, b| a.cmp(b));
366     /// assert_eq!(v, [1, 2, 3, 4, 5]);
367     ///
368     /// // reverse sorting
369     /// v.par_sort_by(|a, b| b.cmp(a));
370     /// assert_eq!(v, [5, 4, 3, 2, 1]);
371     /// ```
par_sort_by<F>(&mut self, compare: F) where F: Fn(&T, &T) -> Ordering + Sync,372     fn par_sort_by<F>(&mut self, compare: F)
373     where
374         F: Fn(&T, &T) -> Ordering + Sync,
375     {
376         par_mergesort(self.as_parallel_slice_mut(), |a, b| {
377             compare(a, b) == Ordering::Less
378         });
379     }
380 
381     /// Sorts the slice in parallel with a key extraction function.
382     ///
383     /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* \* log(*n*))
384     /// worst-case, where the key function is *O*(*m*).
385     ///
386     /// For expensive key functions (e.g. functions that are not simple property accesses or
387     /// basic operations), [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to
388     /// be significantly faster, as it does not recompute element keys.
389     ///
390     /// When applicable, unstable sorting is preferred because it is generally faster than stable
391     /// sorting and it doesn't allocate auxiliary memory.
392     /// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
393     ///
394     /// # Current implementation
395     ///
396     /// The current algorithm is an adaptive merge sort inspired by
397     /// [timsort](https://en.wikipedia.org/wiki/Timsort).
398     /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
399     /// two or more sorted sequences concatenated one after another.
400     ///
401     /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
402     /// non-allocating insertion sort is used instead.
403     ///
404     /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
405     /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
406     /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
407     /// parallel subdivision of chunks and parallel merge operation.
408     ///
409     /// # Examples
410     ///
411     /// ```
412     /// use rayon::prelude::*;
413     ///
414     /// let mut v = [-5i32, 4, 1, -3, 2];
415     ///
416     /// v.par_sort_by_key(|k| k.abs());
417     /// assert_eq!(v, [1, 2, -3, 4, -5]);
418     /// ```
par_sort_by_key<K, F>(&mut self, f: F) where K: Ord, F: Fn(&T) -> K + Sync,419     fn par_sort_by_key<K, F>(&mut self, f: F)
420     where
421         K: Ord,
422         F: Fn(&T) -> K + Sync,
423     {
424         par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
425     }
426 
427     /// Sorts the slice in parallel with a key extraction function.
428     ///
429     /// During sorting, the key function is called at most once per element, by using
430     /// temporary storage to remember the results of key evaluation.
431     /// The key function is called in parallel, so the order of calls is completely unspecified.
432     ///
433     /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \* log(*n*))
434     /// worst-case, where the key function is *O*(*m*).
435     ///
436     /// For simple key functions (e.g., functions that are property accesses or
437     /// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is likely to be
438     /// faster.
439     ///
440     /// # Current implementation
441     ///
442     /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
443     /// which combines the fast average case of randomized quicksort with the fast worst case of
444     /// heapsort, while achieving linear time on slices with certain patterns. It uses some
445     /// randomization to avoid degenerate cases, but with a fixed seed to always provide
446     /// deterministic behavior.
447     ///
448     /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the
449     /// length of the slice.
450     ///
451     /// All quicksorts work in two stages: partitioning into two halves followed by recursive
452     /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
453     /// parallel. Finally, after sorting the cached keys, the item positions are updated sequentially.
454     ///
455     /// [pdqsort]: https://github.com/orlp/pdqsort
456     ///
457     /// # Examples
458     ///
459     /// ```
460     /// use rayon::prelude::*;
461     ///
462     /// let mut v = [-5i32, 4, 32, -3, 2];
463     ///
464     /// v.par_sort_by_cached_key(|k| k.to_string());
465     /// assert!(v == [-3, -5, 2, 32, 4]);
466     /// ```
par_sort_by_cached_key<K, F>(&mut self, f: F) where F: Fn(&T) -> K + Sync, K: Ord + Send,467     fn par_sort_by_cached_key<K, F>(&mut self, f: F)
468     where
469         F: Fn(&T) -> K + Sync,
470         K: Ord + Send,
471     {
472         let slice = self.as_parallel_slice_mut();
473         let len = slice.len();
474         if len < 2 {
475             return;
476         }
477 
478         // Helper macro for indexing our vector by the smallest possible type, to reduce allocation.
479         macro_rules! sort_by_key {
480             ($t:ty) => {{
481                 let mut indices: Vec<_> = slice
482                     .par_iter_mut()
483                     .enumerate()
484                     .map(|(i, x)| (f(&*x), i as $t))
485                     .collect();
486                 // The elements of `indices` are unique, as they are indexed, so any sort will be
487                 // stable with respect to the original slice. We use `sort_unstable` here because
488                 // it requires less memory allocation.
489                 indices.par_sort_unstable();
490                 for i in 0..len {
491                     let mut index = indices[i].1;
492                     while (index as usize) < i {
493                         index = indices[index as usize].1;
494                     }
495                     indices[i].1 = index;
496                     slice.swap(i, index as usize);
497                 }
498             }};
499         }
500 
501         let sz_u8 = mem::size_of::<(K, u8)>();
502         let sz_u16 = mem::size_of::<(K, u16)>();
503         let sz_u32 = mem::size_of::<(K, u32)>();
504         let sz_usize = mem::size_of::<(K, usize)>();
505 
506         if sz_u8 < sz_u16 && len <= (std::u8::MAX as usize) {
507             return sort_by_key!(u8);
508         }
509         if sz_u16 < sz_u32 && len <= (std::u16::MAX as usize) {
510             return sort_by_key!(u16);
511         }
512         if sz_u32 < sz_usize && len <= (std::u32::MAX as usize) {
513             return sort_by_key!(u32);
514         }
515         sort_by_key!(usize)
516     }
517 
518     /// Sorts the slice in parallel, but might not preserve the order of equal elements.
519     ///
520     /// This sort is unstable (i.e., may reorder equal elements), in-place
521     /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
522     ///
523     /// # Current implementation
524     ///
525     /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
526     /// which combines the fast average case of randomized quicksort with the fast worst case of
527     /// heapsort, while achieving linear time on slices with certain patterns. It uses some
528     /// randomization to avoid degenerate cases, but with a fixed seed to always provide
529     /// deterministic behavior.
530     ///
531     /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
532     /// slice consists of several concatenated sorted sequences.
533     ///
534     /// All quicksorts work in two stages: partitioning into two halves followed by recursive
535     /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
536     /// parallel.
537     ///
538     /// [pdqsort]: https://github.com/orlp/pdqsort
539     ///
540     /// # Examples
541     ///
542     /// ```
543     /// use rayon::prelude::*;
544     ///
545     /// let mut v = [-5, 4, 1, -3, 2];
546     ///
547     /// v.par_sort_unstable();
548     /// assert_eq!(v, [-5, -3, 1, 2, 4]);
549     /// ```
par_sort_unstable(&mut self) where T: Ord,550     fn par_sort_unstable(&mut self)
551     where
552         T: Ord,
553     {
554         par_quicksort(self.as_parallel_slice_mut(), T::lt);
555     }
556 
557     /// Sorts the slice in parallel with a comparator function, but might not preserve the order of
558     /// equal elements.
559     ///
560     /// This sort is unstable (i.e., may reorder equal elements), in-place
561     /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
562     ///
563     /// The comparator function must define a total ordering for the elements in the slice. If
564     /// the ordering is not total, the order of the elements is unspecified. An order is a
565     /// total order if it is (for all `a`, `b` and `c`):
566     ///
567     /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
568     /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
569     ///
570     /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
571     /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
572     ///
573     /// ```
574     /// use rayon::prelude::*;
575     ///
576     /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
577     /// floats.par_sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
578     /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
579     /// ```
580     ///
581     /// # Current implementation
582     ///
583     /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
584     /// which combines the fast average case of randomized quicksort with the fast worst case of
585     /// heapsort, while achieving linear time on slices with certain patterns. It uses some
586     /// randomization to avoid degenerate cases, but with a fixed seed to always provide
587     /// deterministic behavior.
588     ///
589     /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
590     /// slice consists of several concatenated sorted sequences.
591     ///
592     /// All quicksorts work in two stages: partitioning into two halves followed by recursive
593     /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
594     /// parallel.
595     ///
596     /// [pdqsort]: https://github.com/orlp/pdqsort
597     ///
598     /// # Examples
599     ///
600     /// ```
601     /// use rayon::prelude::*;
602     ///
603     /// let mut v = [5, 4, 1, 3, 2];
604     /// v.par_sort_unstable_by(|a, b| a.cmp(b));
605     /// assert_eq!(v, [1, 2, 3, 4, 5]);
606     ///
607     /// // reverse sorting
608     /// v.par_sort_unstable_by(|a, b| b.cmp(a));
609     /// assert_eq!(v, [5, 4, 3, 2, 1]);
610     /// ```
par_sort_unstable_by<F>(&mut self, compare: F) where F: Fn(&T, &T) -> Ordering + Sync,611     fn par_sort_unstable_by<F>(&mut self, compare: F)
612     where
613         F: Fn(&T, &T) -> Ordering + Sync,
614     {
615         par_quicksort(self.as_parallel_slice_mut(), |a, b| {
616             compare(a, b) == Ordering::Less
617         });
618     }
619 
620     /// Sorts the slice in parallel with a key extraction function, but might not preserve the order
621     /// of equal elements.
622     ///
623     /// This sort is unstable (i.e., may reorder equal elements), in-place
624     /// (i.e., does not allocate), and *O*(m \* *n* \* log(*n*)) worst-case,
625     /// where the key function is *O*(*m*).
626     ///
627     /// # Current implementation
628     ///
629     /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
630     /// which combines the fast average case of randomized quicksort with the fast worst case of
631     /// heapsort, while achieving linear time on slices with certain patterns. It uses some
632     /// randomization to avoid degenerate cases, but with a fixed seed to always provide
633     /// deterministic behavior.
634     ///
635     /// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to be slower than
636     /// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) in cases where the key function
637     /// is expensive.
638     ///
639     /// All quicksorts work in two stages: partitioning into two halves followed by recursive
640     /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
641     /// parallel.
642     ///
643     /// [pdqsort]: https://github.com/orlp/pdqsort
644     ///
645     /// # Examples
646     ///
647     /// ```
648     /// use rayon::prelude::*;
649     ///
650     /// let mut v = [-5i32, 4, 1, -3, 2];
651     ///
652     /// v.par_sort_unstable_by_key(|k| k.abs());
653     /// assert_eq!(v, [1, 2, -3, 4, -5]);
654     /// ```
par_sort_unstable_by_key<K, F>(&mut self, f: F) where K: Ord, F: Fn(&T) -> K + Sync,655     fn par_sort_unstable_by_key<K, F>(&mut self, f: F)
656     where
657         K: Ord,
658         F: Fn(&T) -> K + Sync,
659     {
660         par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
661     }
662 }
663 
664 impl<T: Send> ParallelSliceMut<T> for [T] {
665     #[inline]
as_parallel_slice_mut(&mut self) -> &mut [T]666     fn as_parallel_slice_mut(&mut self) -> &mut [T] {
667         self
668     }
669 }
670 
671 impl<'data, T: Sync + 'data> IntoParallelIterator for &'data [T] {
672     type Item = &'data T;
673     type Iter = Iter<'data, T>;
674 
into_par_iter(self) -> Self::Iter675     fn into_par_iter(self) -> Self::Iter {
676         Iter { slice: self }
677     }
678 }
679 
680 impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut [T] {
681     type Item = &'data mut T;
682     type Iter = IterMut<'data, T>;
683 
into_par_iter(self) -> Self::Iter684     fn into_par_iter(self) -> Self::Iter {
685         IterMut { slice: self }
686     }
687 }
688 
689 /// Parallel iterator over immutable items in a slice
690 #[derive(Debug)]
691 pub struct Iter<'data, T: Sync> {
692     slice: &'data [T],
693 }
694 
695 impl<'data, T: Sync> Clone for Iter<'data, T> {
clone(&self) -> Self696     fn clone(&self) -> Self {
697         Iter { ..*self }
698     }
699 }
700 
701 impl<'data, T: Sync + 'data> ParallelIterator for Iter<'data, T> {
702     type Item = &'data T;
703 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,704     fn drive_unindexed<C>(self, consumer: C) -> C::Result
705     where
706         C: UnindexedConsumer<Self::Item>,
707     {
708         bridge(self, consumer)
709     }
710 
opt_len(&self) -> Option<usize>711     fn opt_len(&self) -> Option<usize> {
712         Some(self.len())
713     }
714 }
715 
716 impl<'data, T: Sync + 'data> IndexedParallelIterator for Iter<'data, T> {
drive<C>(self, consumer: C) -> C::Result where C: Consumer<Self::Item>,717     fn drive<C>(self, consumer: C) -> C::Result
718     where
719         C: Consumer<Self::Item>,
720     {
721         bridge(self, consumer)
722     }
723 
len(&self) -> usize724     fn len(&self) -> usize {
725         self.slice.len()
726     }
727 
with_producer<CB>(self, callback: CB) -> CB::Output where CB: ProducerCallback<Self::Item>,728     fn with_producer<CB>(self, callback: CB) -> CB::Output
729     where
730         CB: ProducerCallback<Self::Item>,
731     {
732         callback.callback(IterProducer { slice: self.slice })
733     }
734 }
735 
736 struct IterProducer<'data, T: Sync> {
737     slice: &'data [T],
738 }
739 
740 impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
741     type Item = &'data T;
742     type IntoIter = ::std::slice::Iter<'data, T>;
743 
into_iter(self) -> Self::IntoIter744     fn into_iter(self) -> Self::IntoIter {
745         self.slice.iter()
746     }
747 
split_at(self, index: usize) -> (Self, Self)748     fn split_at(self, index: usize) -> (Self, Self) {
749         let (left, right) = self.slice.split_at(index);
750         (IterProducer { slice: left }, IterProducer { slice: right })
751     }
752 }
753 
754 /// Parallel iterator over immutable overlapping windows of a slice
755 #[derive(Debug)]
756 pub struct Windows<'data, T: Sync> {
757     window_size: usize,
758     slice: &'data [T],
759 }
760 
761 impl<'data, T: Sync> Clone for Windows<'data, T> {
clone(&self) -> Self762     fn clone(&self) -> Self {
763         Windows { ..*self }
764     }
765 }
766 
767 impl<'data, T: Sync + 'data> ParallelIterator for Windows<'data, T> {
768     type Item = &'data [T];
769 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,770     fn drive_unindexed<C>(self, consumer: C) -> C::Result
771     where
772         C: UnindexedConsumer<Self::Item>,
773     {
774         bridge(self, consumer)
775     }
776 
opt_len(&self) -> Option<usize>777     fn opt_len(&self) -> Option<usize> {
778         Some(self.len())
779     }
780 }
781 
782 impl<'data, T: Sync + 'data> IndexedParallelIterator for Windows<'data, T> {
drive<C>(self, consumer: C) -> C::Result where C: Consumer<Self::Item>,783     fn drive<C>(self, consumer: C) -> C::Result
784     where
785         C: Consumer<Self::Item>,
786     {
787         bridge(self, consumer)
788     }
789 
len(&self) -> usize790     fn len(&self) -> usize {
791         assert!(self.window_size >= 1);
792         self.slice.len().saturating_sub(self.window_size - 1)
793     }
794 
with_producer<CB>(self, callback: CB) -> CB::Output where CB: ProducerCallback<Self::Item>,795     fn with_producer<CB>(self, callback: CB) -> CB::Output
796     where
797         CB: ProducerCallback<Self::Item>,
798     {
799         callback.callback(WindowsProducer {
800             window_size: self.window_size,
801             slice: self.slice,
802         })
803     }
804 }
805 
806 struct WindowsProducer<'data, T: Sync> {
807     window_size: usize,
808     slice: &'data [T],
809 }
810 
811 impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> {
812     type Item = &'data [T];
813     type IntoIter = ::std::slice::Windows<'data, T>;
814 
into_iter(self) -> Self::IntoIter815     fn into_iter(self) -> Self::IntoIter {
816         self.slice.windows(self.window_size)
817     }
818 
split_at(self, index: usize) -> (Self, Self)819     fn split_at(self, index: usize) -> (Self, Self) {
820         let left_index = cmp::min(self.slice.len(), index + (self.window_size - 1));
821         let left = &self.slice[..left_index];
822         let right = &self.slice[index..];
823         (
824             WindowsProducer {
825                 window_size: self.window_size,
826                 slice: left,
827             },
828             WindowsProducer {
829                 window_size: self.window_size,
830                 slice: right,
831             },
832         )
833     }
834 }
835 
836 /// Parallel iterator over mutable items in a slice
837 #[derive(Debug)]
838 pub struct IterMut<'data, T: Send> {
839     slice: &'data mut [T],
840 }
841 
842 impl<'data, T: Send + 'data> ParallelIterator for IterMut<'data, T> {
843     type Item = &'data mut T;
844 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,845     fn drive_unindexed<C>(self, consumer: C) -> C::Result
846     where
847         C: UnindexedConsumer<Self::Item>,
848     {
849         bridge(self, consumer)
850     }
851 
opt_len(&self) -> Option<usize>852     fn opt_len(&self) -> Option<usize> {
853         Some(self.len())
854     }
855 }
856 
857 impl<'data, T: Send + 'data> IndexedParallelIterator for IterMut<'data, T> {
drive<C>(self, consumer: C) -> C::Result where C: Consumer<Self::Item>,858     fn drive<C>(self, consumer: C) -> C::Result
859     where
860         C: Consumer<Self::Item>,
861     {
862         bridge(self, consumer)
863     }
864 
len(&self) -> usize865     fn len(&self) -> usize {
866         self.slice.len()
867     }
868 
with_producer<CB>(self, callback: CB) -> CB::Output where CB: ProducerCallback<Self::Item>,869     fn with_producer<CB>(self, callback: CB) -> CB::Output
870     where
871         CB: ProducerCallback<Self::Item>,
872     {
873         callback.callback(IterMutProducer { slice: self.slice })
874     }
875 }
876 
877 struct IterMutProducer<'data, T: Send> {
878     slice: &'data mut [T],
879 }
880 
881 impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
882     type Item = &'data mut T;
883     type IntoIter = ::std::slice::IterMut<'data, T>;
884 
into_iter(self) -> Self::IntoIter885     fn into_iter(self) -> Self::IntoIter {
886         self.slice.iter_mut()
887     }
888 
split_at(self, index: usize) -> (Self, Self)889     fn split_at(self, index: usize) -> (Self, Self) {
890         let (left, right) = self.slice.split_at_mut(index);
891         (
892             IterMutProducer { slice: left },
893             IterMutProducer { slice: right },
894         )
895     }
896 }
897 
898 /// Parallel iterator over slices separated by a predicate
899 pub struct Split<'data, T, P> {
900     slice: &'data [T],
901     separator: P,
902 }
903 
904 impl<'data, T, P: Clone> Clone for Split<'data, T, P> {
clone(&self) -> Self905     fn clone(&self) -> Self {
906         Split {
907             separator: self.separator.clone(),
908             ..*self
909         }
910     }
911 }
912 
913 impl<'data, T: Debug, P> Debug for Split<'data, T, P> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result914     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
915         f.debug_struct("Split").field("slice", &self.slice).finish()
916     }
917 }
918 
919 impl<'data, T, P> ParallelIterator for Split<'data, T, P>
920 where
921     P: Fn(&T) -> bool + Sync + Send,
922     T: Sync,
923 {
924     type Item = &'data [T];
925 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,926     fn drive_unindexed<C>(self, consumer: C) -> C::Result
927     where
928         C: UnindexedConsumer<Self::Item>,
929     {
930         let producer = SplitProducer::new(self.slice, &self.separator);
931         bridge_unindexed(producer, consumer)
932     }
933 }
934 
935 /// Implement support for `SplitProducer`.
936 impl<'data, T, P> Fissile<P> for &'data [T]
937 where
938     P: Fn(&T) -> bool,
939 {
length(&self) -> usize940     fn length(&self) -> usize {
941         self.len()
942     }
943 
midpoint(&self, end: usize) -> usize944     fn midpoint(&self, end: usize) -> usize {
945         end / 2
946     }
947 
find(&self, separator: &P, start: usize, end: usize) -> Option<usize>948     fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
949         self[start..end].iter().position(separator)
950     }
951 
rfind(&self, separator: &P, end: usize) -> Option<usize>952     fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
953         self[..end].iter().rposition(separator)
954     }
955 
split_once(self, index: usize) -> (Self, Self)956     fn split_once(self, index: usize) -> (Self, Self) {
957         let (left, right) = self.split_at(index);
958         (left, &right[1..]) // skip the separator
959     }
960 
fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F where F: Folder<Self>, Self: Send,961     fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
962     where
963         F: Folder<Self>,
964         Self: Send,
965     {
966         let mut split = self.split(separator);
967         if skip_last {
968             split.next_back();
969         }
970         folder.consume_iter(split)
971     }
972 }
973 
974 /// Parallel iterator over mutable slices separated by a predicate
975 pub struct SplitMut<'data, T, P> {
976     slice: &'data mut [T],
977     separator: P,
978 }
979 
980 impl<'data, T: Debug, P> Debug for SplitMut<'data, T, P> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result981     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
982         f.debug_struct("SplitMut")
983             .field("slice", &self.slice)
984             .finish()
985     }
986 }
987 
988 impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
989 where
990     P: Fn(&T) -> bool + Sync + Send,
991     T: Send,
992 {
993     type Item = &'data mut [T];
994 
drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,995     fn drive_unindexed<C>(self, consumer: C) -> C::Result
996     where
997         C: UnindexedConsumer<Self::Item>,
998     {
999         let producer = SplitProducer::new(self.slice, &self.separator);
1000         bridge_unindexed(producer, consumer)
1001     }
1002 }
1003 
1004 /// Implement support for `SplitProducer`.
1005 impl<'data, T, P> Fissile<P> for &'data mut [T]
1006 where
1007     P: Fn(&T) -> bool,
1008 {
length(&self) -> usize1009     fn length(&self) -> usize {
1010         self.len()
1011     }
1012 
midpoint(&self, end: usize) -> usize1013     fn midpoint(&self, end: usize) -> usize {
1014         end / 2
1015     }
1016 
find(&self, separator: &P, start: usize, end: usize) -> Option<usize>1017     fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1018         self[start..end].iter().position(separator)
1019     }
1020 
rfind(&self, separator: &P, end: usize) -> Option<usize>1021     fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1022         self[..end].iter().rposition(separator)
1023     }
1024 
split_once(self, index: usize) -> (Self, Self)1025     fn split_once(self, index: usize) -> (Self, Self) {
1026         let (left, right) = self.split_at_mut(index);
1027         (left, &mut right[1..]) // skip the separator
1028     }
1029 
fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F where F: Folder<Self>, Self: Send,1030     fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F
1031     where
1032         F: Folder<Self>,
1033         Self: Send,
1034     {
1035         let mut split = self.split_mut(separator);
1036         if skip_last {
1037             split.next_back();
1038         }
1039         folder.consume_iter(split)
1040     }
1041 }
1042