• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //! Slice selection
2 //!
3 //! This module contains the implementation for `slice::select_nth_unstable`.
4 //! It uses an introselect algorithm based on Orson Peters' pattern-defeating quicksort,
5 //! published at: <https://github.com/orlp/pdqsort>
6 //!
7 //! The fallback algorithm used for introselect is Median of Medians using Tukey's Ninther
8 //! for pivot selection. Using this as a fallback ensures O(n) worst case running time with
9 //! better performance than one would get using heapsort as fallback.
10 
11 use crate::cmp;
12 use crate::mem::{self, SizedTypeProperties};
13 use crate::slice::sort::{
14     break_patterns, choose_pivot, insertion_sort_shift_left, partition, partition_equal,
15 };
16 
17 // For slices of up to this length it's probably faster to simply sort them.
18 // Defined at the module scope because it's used in multiple functions.
19 const MAX_INSERTION: usize = 10;
20 
partition_at_index_loop<'a, T, F>( mut v: &'a mut [T], mut index: usize, is_less: &mut F, mut pred: Option<&'a T>, ) where F: FnMut(&T, &T) -> bool,21 fn partition_at_index_loop<'a, T, F>(
22     mut v: &'a mut [T],
23     mut index: usize,
24     is_less: &mut F,
25     mut pred: Option<&'a T>,
26 ) where
27     F: FnMut(&T, &T) -> bool,
28 {
29     // Limit the amount of iterations and fall back to fast deterministic selection
30     // to ensure O(n) worst case running time. This limit needs to be constant, because
31     // using `ilog2(len)` like in `sort` would result in O(n log n) time complexity.
32     // The exact value of the limit is chosen somewhat arbitrarily, but for most inputs bad pivot
33     // selections should be relatively rare, so the limit usually shouldn't be reached
34     // anyways.
35     let mut limit = 16;
36 
37     // True if the last partitioning was reasonably balanced.
38     let mut was_balanced = true;
39 
40     loop {
41         if v.len() <= MAX_INSERTION {
42             if v.len() > 1 {
43                 insertion_sort_shift_left(v, 1, is_less);
44             }
45             return;
46         }
47 
48         if limit == 0 {
49             median_of_medians(v, is_less, index);
50             return;
51         }
52 
53         // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
54         // some elements around. Hopefully we'll choose a better pivot this time.
55         if !was_balanced {
56             break_patterns(v);
57             limit -= 1;
58         }
59 
60         // Choose a pivot
61         let (pivot, _) = choose_pivot(v, is_less);
62 
63         // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
64         // slice. Partition the slice into elements equal to and elements greater than the pivot.
65         // This case is usually hit when the slice contains many duplicate elements.
66         if let Some(p) = pred {
67             if !is_less(p, &v[pivot]) {
68                 let mid = partition_equal(v, pivot, is_less);
69 
70                 // If we've passed our index, then we're good.
71                 if mid > index {
72                     return;
73                 }
74 
75                 // Otherwise, continue sorting elements greater than the pivot.
76                 v = &mut v[mid..];
77                 index = index - mid;
78                 pred = None;
79                 continue;
80             }
81         }
82 
83         let (mid, _) = partition(v, pivot, is_less);
84         was_balanced = cmp::min(mid, v.len() - mid) >= v.len() / 8;
85 
86         // Split the slice into `left`, `pivot`, and `right`.
87         let (left, right) = v.split_at_mut(mid);
88         let (pivot, right) = right.split_at_mut(1);
89         let pivot = &pivot[0];
90 
91         if mid < index {
92             v = right;
93             index = index - mid - 1;
94             pred = Some(pivot);
95         } else if mid > index {
96             v = left;
97         } else {
98             // If mid == index, then we're done, since partition() guaranteed that all elements
99             // after mid are greater than or equal to mid.
100             return;
101         }
102     }
103 }
104 
105 /// Helper function that returns the index of the minimum element in the slice using the given
106 /// comparator function
min_index<T, F: FnMut(&T, &T) -> bool>(slice: &[T], is_less: &mut F) -> Option<usize>107 fn min_index<T, F: FnMut(&T, &T) -> bool>(slice: &[T], is_less: &mut F) -> Option<usize> {
108     slice
109         .iter()
110         .enumerate()
111         .reduce(|acc, t| if is_less(t.1, acc.1) { t } else { acc })
112         .map(|(i, _)| i)
113 }
114 
115 /// Helper function that returns the index of the maximum element in the slice using the given
116 /// comparator function
max_index<T, F: FnMut(&T, &T) -> bool>(slice: &[T], is_less: &mut F) -> Option<usize>117 fn max_index<T, F: FnMut(&T, &T) -> bool>(slice: &[T], is_less: &mut F) -> Option<usize> {
118     slice
119         .iter()
120         .enumerate()
121         .reduce(|acc, t| if is_less(acc.1, t.1) { t } else { acc })
122         .map(|(i, _)| i)
123 }
124 
125 /// Reorder the slice such that the element at `index` is at its final sorted position.
partition_at_index<T, F>( v: &mut [T], index: usize, mut is_less: F, ) -> (&mut [T], &mut T, &mut [T]) where F: FnMut(&T, &T) -> bool,126 pub fn partition_at_index<T, F>(
127     v: &mut [T],
128     index: usize,
129     mut is_less: F,
130 ) -> (&mut [T], &mut T, &mut [T])
131 where
132     F: FnMut(&T, &T) -> bool,
133 {
134     if index >= v.len() {
135         panic!("partition_at_index index {} greater than length of slice {}", index, v.len());
136     }
137 
138     if T::IS_ZST {
139         // Sorting has no meaningful behavior on zero-sized types. Do nothing.
140     } else if index == v.len() - 1 {
141         // Find max element and place it in the last position of the array. We're free to use
142         // `unwrap()` here because we know v must not be empty.
143         let max_idx = max_index(v, &mut is_less).unwrap();
144         v.swap(max_idx, index);
145     } else if index == 0 {
146         // Find min element and place it in the first position of the array. We're free to use
147         // `unwrap()` here because we know v must not be empty.
148         let min_idx = min_index(v, &mut is_less).unwrap();
149         v.swap(min_idx, index);
150     } else {
151         partition_at_index_loop(v, index, &mut is_less, None);
152     }
153 
154     let (left, right) = v.split_at_mut(index);
155     let (pivot, right) = right.split_at_mut(1);
156     let pivot = &mut pivot[0];
157     (left, pivot, right)
158 }
159 
160 /// Selection algorithm to select the k-th element from the slice in guaranteed O(n) time.
161 /// This is essentially a quickselect that uses Tukey's Ninther for pivot selection
median_of_medians<T, F: FnMut(&T, &T) -> bool>(mut v: &mut [T], is_less: &mut F, mut k: usize)162 fn median_of_medians<T, F: FnMut(&T, &T) -> bool>(mut v: &mut [T], is_less: &mut F, mut k: usize) {
163     // Since this function isn't public, it should never be called with an out-of-bounds index.
164     debug_assert!(k < v.len());
165 
166     // If T is as ZST, `partition_at_index` will already return early.
167     debug_assert!(!T::IS_ZST);
168 
169     // We now know that `k < v.len() <= isize::MAX`
170     loop {
171         if v.len() <= MAX_INSERTION {
172             if v.len() > 1 {
173                 insertion_sort_shift_left(v, 1, is_less);
174             }
175             return;
176         }
177 
178         // `median_of_{minima,maxima}` can't handle the extreme cases of the first/last element,
179         // so we catch them here and just do a linear search.
180         if k == v.len() - 1 {
181             // Find max element and place it in the last position of the array. We're free to use
182             // `unwrap()` here because we know v must not be empty.
183             let max_idx = max_index(v, is_less).unwrap();
184             v.swap(max_idx, k);
185             return;
186         } else if k == 0 {
187             // Find min element and place it in the first position of the array. We're free to use
188             // `unwrap()` here because we know v must not be empty.
189             let min_idx = min_index(v, is_less).unwrap();
190             v.swap(min_idx, k);
191             return;
192         }
193 
194         let p = median_of_ninthers(v, is_less);
195 
196         if p == k {
197             return;
198         } else if p > k {
199             v = &mut v[..p];
200         } else {
201             // Since `p < k < v.len()`, `p + 1` doesn't overflow and is
202             // a valid index into the slice.
203             v = &mut v[p + 1..];
204             k -= p + 1;
205         }
206     }
207 }
208 
209 // Optimized for when `k` lies somewhere in the middle of the slice. Selects a pivot
210 // as close as possible to the median of the slice. For more details on how the algorithm
211 // operates, refer to the paper <https://drops.dagstuhl.de/opus/volltexte/2017/7612/pdf/LIPIcs-SEA-2017-24.pdf>.
median_of_ninthers<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) -> usize212 fn median_of_ninthers<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) -> usize {
213     // use `saturating_mul` so the multiplication doesn't overflow on 16-bit platforms.
214     let frac = if v.len() <= 1024 {
215         v.len() / 12
216     } else if v.len() <= 128_usize.saturating_mul(1024) {
217         v.len() / 64
218     } else {
219         v.len() / 1024
220     };
221 
222     let pivot = frac / 2;
223     let lo = v.len() / 2 - pivot;
224     let hi = frac + lo;
225     let gap = (v.len() - 9 * frac) / 4;
226     let mut a = lo - 4 * frac - gap;
227     let mut b = hi + gap;
228     for i in lo..hi {
229         ninther(v, is_less, a, i - frac, b, a + 1, i, b + 1, a + 2, i + frac, b + 2);
230         a += 3;
231         b += 3;
232     }
233 
234     median_of_medians(&mut v[lo..lo + frac], is_less, pivot);
235     partition(v, lo + pivot, is_less).0
236 }
237 
238 /// Moves around the 9 elements at the indices a..i, such that
239 /// `v[d]` contains the median of the 9 elements and the other
240 /// elements are partitioned around it.
ninther<T, F: FnMut(&T, &T) -> bool>( v: &mut [T], is_less: &mut F, a: usize, mut b: usize, c: usize, mut d: usize, e: usize, mut f: usize, g: usize, mut h: usize, i: usize, )241 fn ninther<T, F: FnMut(&T, &T) -> bool>(
242     v: &mut [T],
243     is_less: &mut F,
244     a: usize,
245     mut b: usize,
246     c: usize,
247     mut d: usize,
248     e: usize,
249     mut f: usize,
250     g: usize,
251     mut h: usize,
252     i: usize,
253 ) {
254     b = median_idx(v, is_less, a, b, c);
255     h = median_idx(v, is_less, g, h, i);
256     if is_less(&v[h], &v[b]) {
257         mem::swap(&mut b, &mut h);
258     }
259     if is_less(&v[f], &v[d]) {
260         mem::swap(&mut d, &mut f);
261     }
262     if is_less(&v[e], &v[d]) {
263         // do nothing
264     } else if is_less(&v[f], &v[e]) {
265         d = f;
266     } else {
267         if is_less(&v[e], &v[b]) {
268             v.swap(e, b);
269         } else if is_less(&v[h], &v[e]) {
270             v.swap(e, h);
271         }
272         return;
273     }
274     if is_less(&v[d], &v[b]) {
275         d = b;
276     } else if is_less(&v[h], &v[d]) {
277         d = h;
278     }
279 
280     v.swap(d, e);
281 }
282 
283 /// returns the index pointing to the median of the 3
284 /// elements `v[a]`, `v[b]` and `v[c]`
median_idx<T, F: FnMut(&T, &T) -> bool>( v: &[T], is_less: &mut F, mut a: usize, b: usize, mut c: usize, ) -> usize285 fn median_idx<T, F: FnMut(&T, &T) -> bool>(
286     v: &[T],
287     is_less: &mut F,
288     mut a: usize,
289     b: usize,
290     mut c: usize,
291 ) -> usize {
292     if is_less(&v[c], &v[a]) {
293         mem::swap(&mut a, &mut c);
294     }
295     if is_less(&v[c], &v[b]) {
296         return c;
297     }
298     if is_less(&v[b], &v[a]) {
299         return a;
300     }
301     b
302 }
303