1 //! Parallel iterator types for [slices][std::slice] 2 //! 3 //! You will rarely need to interact with this module directly unless you need 4 //! to name one of the iterator types. 5 //! 6 //! [std::slice]: https://doc.rust-lang.org/stable/std/slice/ 7 8 mod chunks; 9 mod mergesort; 10 mod quicksort; 11 mod rchunks; 12 13 mod test; 14 15 use self::mergesort::par_mergesort; 16 use self::quicksort::par_quicksort; 17 use crate::iter::plumbing::*; 18 use crate::iter::*; 19 use crate::split_producer::*; 20 use std::cmp; 21 use std::cmp::Ordering; 22 use std::fmt::{self, Debug}; 23 use std::mem; 24 25 pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut}; 26 pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut}; 27 28 /// Parallel extensions for slices. 29 pub trait ParallelSlice<T: Sync> { 30 /// Returns a plain slice, which is used to implement the rest of the 31 /// parallel methods. as_parallel_slice(&self) -> &[T]32 fn as_parallel_slice(&self) -> &[T]; 33 34 /// Returns a parallel iterator over subslices separated by elements that 35 /// match the separator. 36 /// 37 /// # Examples 38 /// 39 /// ``` 40 /// use rayon::prelude::*; 41 /// let smallest = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9] 42 /// .par_split(|i| *i == 0) 43 /// .map(|numbers| numbers.iter().min().unwrap()) 44 /// .min(); 45 /// assert_eq!(Some(&1), smallest); 46 /// ``` par_split<P>(&self, separator: P) -> Split<'_, T, P> where P: Fn(&T) -> bool + Sync + Send,47 fn par_split<P>(&self, separator: P) -> Split<'_, T, P> 48 where 49 P: Fn(&T) -> bool + Sync + Send, 50 { 51 Split { 52 slice: self.as_parallel_slice(), 53 separator, 54 } 55 } 56 57 /// Returns a parallel iterator over all contiguous windows of length 58 /// `window_size`. The windows overlap. 59 /// 60 /// # Examples 61 /// 62 /// ``` 63 /// use rayon::prelude::*; 64 /// let windows: Vec<_> = [1, 2, 3].par_windows(2).collect(); 65 /// assert_eq!(vec![[1, 2], [2, 3]], windows); 66 /// ``` par_windows(&self, window_size: usize) -> Windows<'_, T>67 fn par_windows(&self, window_size: usize) -> Windows<'_, T> { 68 Windows { 69 window_size, 70 slice: self.as_parallel_slice(), 71 } 72 } 73 74 /// Returns a parallel iterator over at most `chunk_size` elements of 75 /// `self` at a time. The chunks do not overlap. 76 /// 77 /// If the number of elements in the iterator is not divisible by 78 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All 79 /// other chunks will have that exact length. 80 /// 81 /// # Examples 82 /// 83 /// ``` 84 /// use rayon::prelude::*; 85 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect(); 86 /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]); 87 /// ``` 88 #[track_caller] par_chunks(&self, chunk_size: usize) -> Chunks<'_, T>89 fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> { 90 assert!(chunk_size != 0, "chunk_size must not be zero"); 91 Chunks::new(chunk_size, self.as_parallel_slice()) 92 } 93 94 /// Returns a parallel iterator over `chunk_size` elements of 95 /// `self` at a time. The chunks do not overlap. 96 /// 97 /// If `chunk_size` does not divide the length of the slice, then the 98 /// last up to `chunk_size-1` elements will be omitted and can be 99 /// retrieved from the remainder function of the iterator. 100 /// 101 /// # Examples 102 /// 103 /// ``` 104 /// use rayon::prelude::*; 105 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect(); 106 /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]); 107 /// ``` 108 #[track_caller] par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T>109 fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> { 110 assert!(chunk_size != 0, "chunk_size must not be zero"); 111 ChunksExact::new(chunk_size, self.as_parallel_slice()) 112 } 113 114 /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time, 115 /// starting at the end. The chunks do not overlap. 116 /// 117 /// If the number of elements in the iterator is not divisible by 118 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All 119 /// other chunks will have that exact length. 120 /// 121 /// # Examples 122 /// 123 /// ``` 124 /// use rayon::prelude::*; 125 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks(2).collect(); 126 /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3], &[1]]); 127 /// ``` 128 #[track_caller] par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T>129 fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> { 130 assert!(chunk_size != 0, "chunk_size must not be zero"); 131 RChunks::new(chunk_size, self.as_parallel_slice()) 132 } 133 134 /// Returns a parallel iterator over `chunk_size` elements of `self` at a time, 135 /// starting at the end. The chunks do not overlap. 136 /// 137 /// If `chunk_size` does not divide the length of the slice, then the 138 /// last up to `chunk_size-1` elements will be omitted and can be 139 /// retrieved from the remainder function of the iterator. 140 /// 141 /// # Examples 142 /// 143 /// ``` 144 /// use rayon::prelude::*; 145 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks_exact(2).collect(); 146 /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3]]); 147 /// ``` 148 #[track_caller] par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T>149 fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> { 150 assert!(chunk_size != 0, "chunk_size must not be zero"); 151 RChunksExact::new(chunk_size, self.as_parallel_slice()) 152 } 153 } 154 155 impl<T: Sync> ParallelSlice<T> for [T] { 156 #[inline] as_parallel_slice(&self) -> &[T]157 fn as_parallel_slice(&self) -> &[T] { 158 self 159 } 160 } 161 162 /// Parallel extensions for mutable slices. 163 pub trait ParallelSliceMut<T: Send> { 164 /// Returns a plain mutable slice, which is used to implement the rest of 165 /// the parallel methods. as_parallel_slice_mut(&mut self) -> &mut [T]166 fn as_parallel_slice_mut(&mut self) -> &mut [T]; 167 168 /// Returns a parallel iterator over mutable subslices separated by 169 /// elements that match the separator. 170 /// 171 /// # Examples 172 /// 173 /// ``` 174 /// use rayon::prelude::*; 175 /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]; 176 /// array.par_split_mut(|i| *i == 0) 177 /// .for_each(|slice| slice.reverse()); 178 /// assert_eq!(array, [3, 2, 1, 0, 8, 4, 2, 0, 9, 6, 3]); 179 /// ``` par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P> where P: Fn(&T) -> bool + Sync + Send,180 fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P> 181 where 182 P: Fn(&T) -> bool + Sync + Send, 183 { 184 SplitMut { 185 slice: self.as_parallel_slice_mut(), 186 separator, 187 } 188 } 189 190 /// Returns a parallel iterator over at most `chunk_size` elements of 191 /// `self` at a time. The chunks are mutable and do not overlap. 192 /// 193 /// If the number of elements in the iterator is not divisible by 194 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All 195 /// other chunks will have that exact length. 196 /// 197 /// # Examples 198 /// 199 /// ``` 200 /// use rayon::prelude::*; 201 /// let mut array = [1, 2, 3, 4, 5]; 202 /// array.par_chunks_mut(2) 203 /// .for_each(|slice| slice.reverse()); 204 /// assert_eq!(array, [2, 1, 4, 3, 5]); 205 /// ``` 206 #[track_caller] par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T>207 fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> { 208 assert!(chunk_size != 0, "chunk_size must not be zero"); 209 ChunksMut::new(chunk_size, self.as_parallel_slice_mut()) 210 } 211 212 /// Returns a parallel iterator over `chunk_size` elements of 213 /// `self` at a time. The chunks are mutable and do not overlap. 214 /// 215 /// If `chunk_size` does not divide the length of the slice, then the 216 /// last up to `chunk_size-1` elements will be omitted and can be 217 /// retrieved from the remainder function of the iterator. 218 /// 219 /// # Examples 220 /// 221 /// ``` 222 /// use rayon::prelude::*; 223 /// let mut array = [1, 2, 3, 4, 5]; 224 /// array.par_chunks_exact_mut(3) 225 /// .for_each(|slice| slice.reverse()); 226 /// assert_eq!(array, [3, 2, 1, 4, 5]); 227 /// ``` 228 #[track_caller] par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T>229 fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> { 230 assert!(chunk_size != 0, "chunk_size must not be zero"); 231 ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut()) 232 } 233 234 /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time, 235 /// starting at the end. The chunks are mutable and do not overlap. 236 /// 237 /// If the number of elements in the iterator is not divisible by 238 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All 239 /// other chunks will have that exact length. 240 /// 241 /// # Examples 242 /// 243 /// ``` 244 /// use rayon::prelude::*; 245 /// let mut array = [1, 2, 3, 4, 5]; 246 /// array.par_rchunks_mut(2) 247 /// .for_each(|slice| slice.reverse()); 248 /// assert_eq!(array, [1, 3, 2, 5, 4]); 249 /// ``` 250 #[track_caller] par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T>251 fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> { 252 assert!(chunk_size != 0, "chunk_size must not be zero"); 253 RChunksMut::new(chunk_size, self.as_parallel_slice_mut()) 254 } 255 256 /// Returns a parallel iterator over `chunk_size` elements of `self` at a time, 257 /// starting at the end. The chunks are mutable and do not overlap. 258 /// 259 /// If `chunk_size` does not divide the length of the slice, then the 260 /// last up to `chunk_size-1` elements will be omitted and can be 261 /// retrieved from the remainder function of the iterator. 262 /// 263 /// # Examples 264 /// 265 /// ``` 266 /// use rayon::prelude::*; 267 /// let mut array = [1, 2, 3, 4, 5]; 268 /// array.par_rchunks_exact_mut(3) 269 /// .for_each(|slice| slice.reverse()); 270 /// assert_eq!(array, [1, 2, 5, 4, 3]); 271 /// ``` 272 #[track_caller] par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T>273 fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> { 274 assert!(chunk_size != 0, "chunk_size must not be zero"); 275 RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut()) 276 } 277 278 /// Sorts the slice in parallel. 279 /// 280 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case. 281 /// 282 /// When applicable, unstable sorting is preferred because it is generally faster than stable 283 /// sorting and it doesn't allocate auxiliary memory. 284 /// See [`par_sort_unstable`](#method.par_sort_unstable). 285 /// 286 /// # Current implementation 287 /// 288 /// The current algorithm is an adaptive merge sort inspired by 289 /// [timsort](https://en.wikipedia.org/wiki/Timsort). 290 /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of 291 /// two or more sorted sequences concatenated one after another. 292 /// 293 /// Also, it allocates temporary storage the same size as `self`, but for very short slices a 294 /// non-allocating insertion sort is used instead. 295 /// 296 /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and 297 /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending 298 /// or descending runs are concatenated. Finally, the remaining chunks are merged together using 299 /// parallel subdivision of chunks and parallel merge operation. 300 /// 301 /// # Examples 302 /// 303 /// ``` 304 /// use rayon::prelude::*; 305 /// 306 /// let mut v = [-5, 4, 1, -3, 2]; 307 /// 308 /// v.par_sort(); 309 /// assert_eq!(v, [-5, -3, 1, 2, 4]); 310 /// ``` par_sort(&mut self) where T: Ord,311 fn par_sort(&mut self) 312 where 313 T: Ord, 314 { 315 par_mergesort(self.as_parallel_slice_mut(), T::lt); 316 } 317 318 /// Sorts the slice in parallel with a comparator function. 319 /// 320 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case. 321 /// 322 /// The comparator function must define a total ordering for the elements in the slice. If 323 /// the ordering is not total, the order of the elements is unspecified. An order is a 324 /// total order if it is (for all `a`, `b` and `c`): 325 /// 326 /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and 327 /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`. 328 /// 329 /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use 330 /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`. 331 /// 332 /// ``` 333 /// use rayon::prelude::*; 334 /// 335 /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0]; 336 /// floats.par_sort_by(|a, b| a.partial_cmp(b).unwrap()); 337 /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]); 338 /// ``` 339 /// 340 /// When applicable, unstable sorting is preferred because it is generally faster than stable 341 /// sorting and it doesn't allocate auxiliary memory. 342 /// See [`par_sort_unstable_by`](#method.par_sort_unstable_by). 343 /// 344 /// # Current implementation 345 /// 346 /// The current algorithm is an adaptive merge sort inspired by 347 /// [timsort](https://en.wikipedia.org/wiki/Timsort). 348 /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of 349 /// two or more sorted sequences concatenated one after another. 350 /// 351 /// Also, it allocates temporary storage the same size as `self`, but for very short slices a 352 /// non-allocating insertion sort is used instead. 353 /// 354 /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and 355 /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending 356 /// or descending runs are concatenated. Finally, the remaining chunks are merged together using 357 /// parallel subdivision of chunks and parallel merge operation. 358 /// 359 /// # Examples 360 /// 361 /// ``` 362 /// use rayon::prelude::*; 363 /// 364 /// let mut v = [5, 4, 1, 3, 2]; 365 /// v.par_sort_by(|a, b| a.cmp(b)); 366 /// assert_eq!(v, [1, 2, 3, 4, 5]); 367 /// 368 /// // reverse sorting 369 /// v.par_sort_by(|a, b| b.cmp(a)); 370 /// assert_eq!(v, [5, 4, 3, 2, 1]); 371 /// ``` par_sort_by<F>(&mut self, compare: F) where F: Fn(&T, &T) -> Ordering + Sync,372 fn par_sort_by<F>(&mut self, compare: F) 373 where 374 F: Fn(&T, &T) -> Ordering + Sync, 375 { 376 par_mergesort(self.as_parallel_slice_mut(), |a, b| { 377 compare(a, b) == Ordering::Less 378 }); 379 } 380 381 /// Sorts the slice in parallel with a key extraction function. 382 /// 383 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* \* log(*n*)) 384 /// worst-case, where the key function is *O*(*m*). 385 /// 386 /// For expensive key functions (e.g. functions that are not simple property accesses or 387 /// basic operations), [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to 388 /// be significantly faster, as it does not recompute element keys. 389 /// 390 /// When applicable, unstable sorting is preferred because it is generally faster than stable 391 /// sorting and it doesn't allocate auxiliary memory. 392 /// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key). 393 /// 394 /// # Current implementation 395 /// 396 /// The current algorithm is an adaptive merge sort inspired by 397 /// [timsort](https://en.wikipedia.org/wiki/Timsort). 398 /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of 399 /// two or more sorted sequences concatenated one after another. 400 /// 401 /// Also, it allocates temporary storage the same size as `self`, but for very short slices a 402 /// non-allocating insertion sort is used instead. 403 /// 404 /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and 405 /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending 406 /// or descending runs are concatenated. Finally, the remaining chunks are merged together using 407 /// parallel subdivision of chunks and parallel merge operation. 408 /// 409 /// # Examples 410 /// 411 /// ``` 412 /// use rayon::prelude::*; 413 /// 414 /// let mut v = [-5i32, 4, 1, -3, 2]; 415 /// 416 /// v.par_sort_by_key(|k| k.abs()); 417 /// assert_eq!(v, [1, 2, -3, 4, -5]); 418 /// ``` par_sort_by_key<K, F>(&mut self, f: F) where K: Ord, F: Fn(&T) -> K + Sync,419 fn par_sort_by_key<K, F>(&mut self, f: F) 420 where 421 K: Ord, 422 F: Fn(&T) -> K + Sync, 423 { 424 par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b))); 425 } 426 427 /// Sorts the slice in parallel with a key extraction function. 428 /// 429 /// During sorting, the key function is called at most once per element, by using 430 /// temporary storage to remember the results of key evaluation. 431 /// The key function is called in parallel, so the order of calls is completely unspecified. 432 /// 433 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \* log(*n*)) 434 /// worst-case, where the key function is *O*(*m*). 435 /// 436 /// For simple key functions (e.g., functions that are property accesses or 437 /// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is likely to be 438 /// faster. 439 /// 440 /// # Current implementation 441 /// 442 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters, 443 /// which combines the fast average case of randomized quicksort with the fast worst case of 444 /// heapsort, while achieving linear time on slices with certain patterns. It uses some 445 /// randomization to avoid degenerate cases, but with a fixed seed to always provide 446 /// deterministic behavior. 447 /// 448 /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the 449 /// length of the slice. 450 /// 451 /// All quicksorts work in two stages: partitioning into two halves followed by recursive 452 /// calls. The partitioning phase is sequential, but the two recursive calls are performed in 453 /// parallel. Finally, after sorting the cached keys, the item positions are updated sequentially. 454 /// 455 /// [pdqsort]: https://github.com/orlp/pdqsort 456 /// 457 /// # Examples 458 /// 459 /// ``` 460 /// use rayon::prelude::*; 461 /// 462 /// let mut v = [-5i32, 4, 32, -3, 2]; 463 /// 464 /// v.par_sort_by_cached_key(|k| k.to_string()); 465 /// assert!(v == [-3, -5, 2, 32, 4]); 466 /// ``` par_sort_by_cached_key<K, F>(&mut self, f: F) where F: Fn(&T) -> K + Sync, K: Ord + Send,467 fn par_sort_by_cached_key<K, F>(&mut self, f: F) 468 where 469 F: Fn(&T) -> K + Sync, 470 K: Ord + Send, 471 { 472 let slice = self.as_parallel_slice_mut(); 473 let len = slice.len(); 474 if len < 2 { 475 return; 476 } 477 478 // Helper macro for indexing our vector by the smallest possible type, to reduce allocation. 479 macro_rules! sort_by_key { 480 ($t:ty) => {{ 481 let mut indices: Vec<_> = slice 482 .par_iter_mut() 483 .enumerate() 484 .map(|(i, x)| (f(&*x), i as $t)) 485 .collect(); 486 // The elements of `indices` are unique, as they are indexed, so any sort will be 487 // stable with respect to the original slice. We use `sort_unstable` here because 488 // it requires less memory allocation. 489 indices.par_sort_unstable(); 490 for i in 0..len { 491 let mut index = indices[i].1; 492 while (index as usize) < i { 493 index = indices[index as usize].1; 494 } 495 indices[i].1 = index; 496 slice.swap(i, index as usize); 497 } 498 }}; 499 } 500 501 let sz_u8 = mem::size_of::<(K, u8)>(); 502 let sz_u16 = mem::size_of::<(K, u16)>(); 503 let sz_u32 = mem::size_of::<(K, u32)>(); 504 let sz_usize = mem::size_of::<(K, usize)>(); 505 506 if sz_u8 < sz_u16 && len <= (std::u8::MAX as usize) { 507 return sort_by_key!(u8); 508 } 509 if sz_u16 < sz_u32 && len <= (std::u16::MAX as usize) { 510 return sort_by_key!(u16); 511 } 512 if sz_u32 < sz_usize && len <= (std::u32::MAX as usize) { 513 return sort_by_key!(u32); 514 } 515 sort_by_key!(usize) 516 } 517 518 /// Sorts the slice in parallel, but might not preserve the order of equal elements. 519 /// 520 /// This sort is unstable (i.e., may reorder equal elements), in-place 521 /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case. 522 /// 523 /// # Current implementation 524 /// 525 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters, 526 /// which combines the fast average case of randomized quicksort with the fast worst case of 527 /// heapsort, while achieving linear time on slices with certain patterns. It uses some 528 /// randomization to avoid degenerate cases, but with a fixed seed to always provide 529 /// deterministic behavior. 530 /// 531 /// It is typically faster than stable sorting, except in a few special cases, e.g., when the 532 /// slice consists of several concatenated sorted sequences. 533 /// 534 /// All quicksorts work in two stages: partitioning into two halves followed by recursive 535 /// calls. The partitioning phase is sequential, but the two recursive calls are performed in 536 /// parallel. 537 /// 538 /// [pdqsort]: https://github.com/orlp/pdqsort 539 /// 540 /// # Examples 541 /// 542 /// ``` 543 /// use rayon::prelude::*; 544 /// 545 /// let mut v = [-5, 4, 1, -3, 2]; 546 /// 547 /// v.par_sort_unstable(); 548 /// assert_eq!(v, [-5, -3, 1, 2, 4]); 549 /// ``` par_sort_unstable(&mut self) where T: Ord,550 fn par_sort_unstable(&mut self) 551 where 552 T: Ord, 553 { 554 par_quicksort(self.as_parallel_slice_mut(), T::lt); 555 } 556 557 /// Sorts the slice in parallel with a comparator function, but might not preserve the order of 558 /// equal elements. 559 /// 560 /// This sort is unstable (i.e., may reorder equal elements), in-place 561 /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case. 562 /// 563 /// The comparator function must define a total ordering for the elements in the slice. If 564 /// the ordering is not total, the order of the elements is unspecified. An order is a 565 /// total order if it is (for all `a`, `b` and `c`): 566 /// 567 /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and 568 /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`. 569 /// 570 /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use 571 /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`. 572 /// 573 /// ``` 574 /// use rayon::prelude::*; 575 /// 576 /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0]; 577 /// floats.par_sort_unstable_by(|a, b| a.partial_cmp(b).unwrap()); 578 /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]); 579 /// ``` 580 /// 581 /// # Current implementation 582 /// 583 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters, 584 /// which combines the fast average case of randomized quicksort with the fast worst case of 585 /// heapsort, while achieving linear time on slices with certain patterns. It uses some 586 /// randomization to avoid degenerate cases, but with a fixed seed to always provide 587 /// deterministic behavior. 588 /// 589 /// It is typically faster than stable sorting, except in a few special cases, e.g., when the 590 /// slice consists of several concatenated sorted sequences. 591 /// 592 /// All quicksorts work in two stages: partitioning into two halves followed by recursive 593 /// calls. The partitioning phase is sequential, but the two recursive calls are performed in 594 /// parallel. 595 /// 596 /// [pdqsort]: https://github.com/orlp/pdqsort 597 /// 598 /// # Examples 599 /// 600 /// ``` 601 /// use rayon::prelude::*; 602 /// 603 /// let mut v = [5, 4, 1, 3, 2]; 604 /// v.par_sort_unstable_by(|a, b| a.cmp(b)); 605 /// assert_eq!(v, [1, 2, 3, 4, 5]); 606 /// 607 /// // reverse sorting 608 /// v.par_sort_unstable_by(|a, b| b.cmp(a)); 609 /// assert_eq!(v, [5, 4, 3, 2, 1]); 610 /// ``` par_sort_unstable_by<F>(&mut self, compare: F) where F: Fn(&T, &T) -> Ordering + Sync,611 fn par_sort_unstable_by<F>(&mut self, compare: F) 612 where 613 F: Fn(&T, &T) -> Ordering + Sync, 614 { 615 par_quicksort(self.as_parallel_slice_mut(), |a, b| { 616 compare(a, b) == Ordering::Less 617 }); 618 } 619 620 /// Sorts the slice in parallel with a key extraction function, but might not preserve the order 621 /// of equal elements. 622 /// 623 /// This sort is unstable (i.e., may reorder equal elements), in-place 624 /// (i.e., does not allocate), and *O*(m \* *n* \* log(*n*)) worst-case, 625 /// where the key function is *O*(*m*). 626 /// 627 /// # Current implementation 628 /// 629 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters, 630 /// which combines the fast average case of randomized quicksort with the fast worst case of 631 /// heapsort, while achieving linear time on slices with certain patterns. It uses some 632 /// randomization to avoid degenerate cases, but with a fixed seed to always provide 633 /// deterministic behavior. 634 /// 635 /// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to be slower than 636 /// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) in cases where the key function 637 /// is expensive. 638 /// 639 /// All quicksorts work in two stages: partitioning into two halves followed by recursive 640 /// calls. The partitioning phase is sequential, but the two recursive calls are performed in 641 /// parallel. 642 /// 643 /// [pdqsort]: https://github.com/orlp/pdqsort 644 /// 645 /// # Examples 646 /// 647 /// ``` 648 /// use rayon::prelude::*; 649 /// 650 /// let mut v = [-5i32, 4, 1, -3, 2]; 651 /// 652 /// v.par_sort_unstable_by_key(|k| k.abs()); 653 /// assert_eq!(v, [1, 2, -3, 4, -5]); 654 /// ``` par_sort_unstable_by_key<K, F>(&mut self, f: F) where K: Ord, F: Fn(&T) -> K + Sync,655 fn par_sort_unstable_by_key<K, F>(&mut self, f: F) 656 where 657 K: Ord, 658 F: Fn(&T) -> K + Sync, 659 { 660 par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b))); 661 } 662 } 663 664 impl<T: Send> ParallelSliceMut<T> for [T] { 665 #[inline] as_parallel_slice_mut(&mut self) -> &mut [T]666 fn as_parallel_slice_mut(&mut self) -> &mut [T] { 667 self 668 } 669 } 670 671 impl<'data, T: Sync + 'data> IntoParallelIterator for &'data [T] { 672 type Item = &'data T; 673 type Iter = Iter<'data, T>; 674 into_par_iter(self) -> Self::Iter675 fn into_par_iter(self) -> Self::Iter { 676 Iter { slice: self } 677 } 678 } 679 680 impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut [T] { 681 type Item = &'data mut T; 682 type Iter = IterMut<'data, T>; 683 into_par_iter(self) -> Self::Iter684 fn into_par_iter(self) -> Self::Iter { 685 IterMut { slice: self } 686 } 687 } 688 689 /// Parallel iterator over immutable items in a slice 690 #[derive(Debug)] 691 pub struct Iter<'data, T: Sync> { 692 slice: &'data [T], 693 } 694 695 impl<'data, T: Sync> Clone for Iter<'data, T> { clone(&self) -> Self696 fn clone(&self) -> Self { 697 Iter { ..*self } 698 } 699 } 700 701 impl<'data, T: Sync + 'data> ParallelIterator for Iter<'data, T> { 702 type Item = &'data T; 703 drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,704 fn drive_unindexed<C>(self, consumer: C) -> C::Result 705 where 706 C: UnindexedConsumer<Self::Item>, 707 { 708 bridge(self, consumer) 709 } 710 opt_len(&self) -> Option<usize>711 fn opt_len(&self) -> Option<usize> { 712 Some(self.len()) 713 } 714 } 715 716 impl<'data, T: Sync + 'data> IndexedParallelIterator for Iter<'data, T> { drive<C>(self, consumer: C) -> C::Result where C: Consumer<Self::Item>,717 fn drive<C>(self, consumer: C) -> C::Result 718 where 719 C: Consumer<Self::Item>, 720 { 721 bridge(self, consumer) 722 } 723 len(&self) -> usize724 fn len(&self) -> usize { 725 self.slice.len() 726 } 727 with_producer<CB>(self, callback: CB) -> CB::Output where CB: ProducerCallback<Self::Item>,728 fn with_producer<CB>(self, callback: CB) -> CB::Output 729 where 730 CB: ProducerCallback<Self::Item>, 731 { 732 callback.callback(IterProducer { slice: self.slice }) 733 } 734 } 735 736 struct IterProducer<'data, T: Sync> { 737 slice: &'data [T], 738 } 739 740 impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> { 741 type Item = &'data T; 742 type IntoIter = ::std::slice::Iter<'data, T>; 743 into_iter(self) -> Self::IntoIter744 fn into_iter(self) -> Self::IntoIter { 745 self.slice.iter() 746 } 747 split_at(self, index: usize) -> (Self, Self)748 fn split_at(self, index: usize) -> (Self, Self) { 749 let (left, right) = self.slice.split_at(index); 750 (IterProducer { slice: left }, IterProducer { slice: right }) 751 } 752 } 753 754 /// Parallel iterator over immutable overlapping windows of a slice 755 #[derive(Debug)] 756 pub struct Windows<'data, T: Sync> { 757 window_size: usize, 758 slice: &'data [T], 759 } 760 761 impl<'data, T: Sync> Clone for Windows<'data, T> { clone(&self) -> Self762 fn clone(&self) -> Self { 763 Windows { ..*self } 764 } 765 } 766 767 impl<'data, T: Sync + 'data> ParallelIterator for Windows<'data, T> { 768 type Item = &'data [T]; 769 drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,770 fn drive_unindexed<C>(self, consumer: C) -> C::Result 771 where 772 C: UnindexedConsumer<Self::Item>, 773 { 774 bridge(self, consumer) 775 } 776 opt_len(&self) -> Option<usize>777 fn opt_len(&self) -> Option<usize> { 778 Some(self.len()) 779 } 780 } 781 782 impl<'data, T: Sync + 'data> IndexedParallelIterator for Windows<'data, T> { drive<C>(self, consumer: C) -> C::Result where C: Consumer<Self::Item>,783 fn drive<C>(self, consumer: C) -> C::Result 784 where 785 C: Consumer<Self::Item>, 786 { 787 bridge(self, consumer) 788 } 789 len(&self) -> usize790 fn len(&self) -> usize { 791 assert!(self.window_size >= 1); 792 self.slice.len().saturating_sub(self.window_size - 1) 793 } 794 with_producer<CB>(self, callback: CB) -> CB::Output where CB: ProducerCallback<Self::Item>,795 fn with_producer<CB>(self, callback: CB) -> CB::Output 796 where 797 CB: ProducerCallback<Self::Item>, 798 { 799 callback.callback(WindowsProducer { 800 window_size: self.window_size, 801 slice: self.slice, 802 }) 803 } 804 } 805 806 struct WindowsProducer<'data, T: Sync> { 807 window_size: usize, 808 slice: &'data [T], 809 } 810 811 impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> { 812 type Item = &'data [T]; 813 type IntoIter = ::std::slice::Windows<'data, T>; 814 into_iter(self) -> Self::IntoIter815 fn into_iter(self) -> Self::IntoIter { 816 self.slice.windows(self.window_size) 817 } 818 split_at(self, index: usize) -> (Self, Self)819 fn split_at(self, index: usize) -> (Self, Self) { 820 let left_index = cmp::min(self.slice.len(), index + (self.window_size - 1)); 821 let left = &self.slice[..left_index]; 822 let right = &self.slice[index..]; 823 ( 824 WindowsProducer { 825 window_size: self.window_size, 826 slice: left, 827 }, 828 WindowsProducer { 829 window_size: self.window_size, 830 slice: right, 831 }, 832 ) 833 } 834 } 835 836 /// Parallel iterator over mutable items in a slice 837 #[derive(Debug)] 838 pub struct IterMut<'data, T: Send> { 839 slice: &'data mut [T], 840 } 841 842 impl<'data, T: Send + 'data> ParallelIterator for IterMut<'data, T> { 843 type Item = &'data mut T; 844 drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,845 fn drive_unindexed<C>(self, consumer: C) -> C::Result 846 where 847 C: UnindexedConsumer<Self::Item>, 848 { 849 bridge(self, consumer) 850 } 851 opt_len(&self) -> Option<usize>852 fn opt_len(&self) -> Option<usize> { 853 Some(self.len()) 854 } 855 } 856 857 impl<'data, T: Send + 'data> IndexedParallelIterator for IterMut<'data, T> { drive<C>(self, consumer: C) -> C::Result where C: Consumer<Self::Item>,858 fn drive<C>(self, consumer: C) -> C::Result 859 where 860 C: Consumer<Self::Item>, 861 { 862 bridge(self, consumer) 863 } 864 len(&self) -> usize865 fn len(&self) -> usize { 866 self.slice.len() 867 } 868 with_producer<CB>(self, callback: CB) -> CB::Output where CB: ProducerCallback<Self::Item>,869 fn with_producer<CB>(self, callback: CB) -> CB::Output 870 where 871 CB: ProducerCallback<Self::Item>, 872 { 873 callback.callback(IterMutProducer { slice: self.slice }) 874 } 875 } 876 877 struct IterMutProducer<'data, T: Send> { 878 slice: &'data mut [T], 879 } 880 881 impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> { 882 type Item = &'data mut T; 883 type IntoIter = ::std::slice::IterMut<'data, T>; 884 into_iter(self) -> Self::IntoIter885 fn into_iter(self) -> Self::IntoIter { 886 self.slice.iter_mut() 887 } 888 split_at(self, index: usize) -> (Self, Self)889 fn split_at(self, index: usize) -> (Self, Self) { 890 let (left, right) = self.slice.split_at_mut(index); 891 ( 892 IterMutProducer { slice: left }, 893 IterMutProducer { slice: right }, 894 ) 895 } 896 } 897 898 /// Parallel iterator over slices separated by a predicate 899 pub struct Split<'data, T, P> { 900 slice: &'data [T], 901 separator: P, 902 } 903 904 impl<'data, T, P: Clone> Clone for Split<'data, T, P> { clone(&self) -> Self905 fn clone(&self) -> Self { 906 Split { 907 separator: self.separator.clone(), 908 ..*self 909 } 910 } 911 } 912 913 impl<'data, T: Debug, P> Debug for Split<'data, T, P> { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result914 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 915 f.debug_struct("Split").field("slice", &self.slice).finish() 916 } 917 } 918 919 impl<'data, T, P> ParallelIterator for Split<'data, T, P> 920 where 921 P: Fn(&T) -> bool + Sync + Send, 922 T: Sync, 923 { 924 type Item = &'data [T]; 925 drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,926 fn drive_unindexed<C>(self, consumer: C) -> C::Result 927 where 928 C: UnindexedConsumer<Self::Item>, 929 { 930 let producer = SplitProducer::new(self.slice, &self.separator); 931 bridge_unindexed(producer, consumer) 932 } 933 } 934 935 /// Implement support for `SplitProducer`. 936 impl<'data, T, P> Fissile<P> for &'data [T] 937 where 938 P: Fn(&T) -> bool, 939 { length(&self) -> usize940 fn length(&self) -> usize { 941 self.len() 942 } 943 midpoint(&self, end: usize) -> usize944 fn midpoint(&self, end: usize) -> usize { 945 end / 2 946 } 947 find(&self, separator: &P, start: usize, end: usize) -> Option<usize>948 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> { 949 self[start..end].iter().position(separator) 950 } 951 rfind(&self, separator: &P, end: usize) -> Option<usize>952 fn rfind(&self, separator: &P, end: usize) -> Option<usize> { 953 self[..end].iter().rposition(separator) 954 } 955 split_once(self, index: usize) -> (Self, Self)956 fn split_once(self, index: usize) -> (Self, Self) { 957 let (left, right) = self.split_at(index); 958 (left, &right[1..]) // skip the separator 959 } 960 fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F where F: Folder<Self>, Self: Send,961 fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F 962 where 963 F: Folder<Self>, 964 Self: Send, 965 { 966 let mut split = self.split(separator); 967 if skip_last { 968 split.next_back(); 969 } 970 folder.consume_iter(split) 971 } 972 } 973 974 /// Parallel iterator over mutable slices separated by a predicate 975 pub struct SplitMut<'data, T, P> { 976 slice: &'data mut [T], 977 separator: P, 978 } 979 980 impl<'data, T: Debug, P> Debug for SplitMut<'data, T, P> { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result981 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 982 f.debug_struct("SplitMut") 983 .field("slice", &self.slice) 984 .finish() 985 } 986 } 987 988 impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P> 989 where 990 P: Fn(&T) -> bool + Sync + Send, 991 T: Send, 992 { 993 type Item = &'data mut [T]; 994 drive_unindexed<C>(self, consumer: C) -> C::Result where C: UnindexedConsumer<Self::Item>,995 fn drive_unindexed<C>(self, consumer: C) -> C::Result 996 where 997 C: UnindexedConsumer<Self::Item>, 998 { 999 let producer = SplitProducer::new(self.slice, &self.separator); 1000 bridge_unindexed(producer, consumer) 1001 } 1002 } 1003 1004 /// Implement support for `SplitProducer`. 1005 impl<'data, T, P> Fissile<P> for &'data mut [T] 1006 where 1007 P: Fn(&T) -> bool, 1008 { length(&self) -> usize1009 fn length(&self) -> usize { 1010 self.len() 1011 } 1012 midpoint(&self, end: usize) -> usize1013 fn midpoint(&self, end: usize) -> usize { 1014 end / 2 1015 } 1016 find(&self, separator: &P, start: usize, end: usize) -> Option<usize>1017 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> { 1018 self[start..end].iter().position(separator) 1019 } 1020 rfind(&self, separator: &P, end: usize) -> Option<usize>1021 fn rfind(&self, separator: &P, end: usize) -> Option<usize> { 1022 self[..end].iter().rposition(separator) 1023 } 1024 split_once(self, index: usize) -> (Self, Self)1025 fn split_once(self, index: usize) -> (Self, Self) { 1026 let (left, right) = self.split_at_mut(index); 1027 (left, &mut right[1..]) // skip the separator 1028 } 1029 fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F where F: Folder<Self>, Self: Send,1030 fn fold_splits<F>(self, separator: &P, folder: F, skip_last: bool) -> F 1031 where 1032 F: Folder<Self>, 1033 Self: Send, 1034 { 1035 let mut split = self.split_mut(separator); 1036 if skip_last { 1037 split.next_back(); 1038 } 1039 folder.consume_iter(split) 1040 } 1041 } 1042