1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___ALGORITHM_SORT_H
10 #define _LIBCPP___ALGORITHM_SORT_H
11 
12 #include <__algorithm/comp.h>
13 #include <__algorithm/comp_ref_type.h>
14 #include <__algorithm/iter_swap.h>
15 #include <__algorithm/iterator_operations.h>
16 #include <__algorithm/min_element.h>
17 #include <__algorithm/partial_sort.h>
18 #include <__algorithm/unwrap_iter.h>
19 #include <__assert>
20 #include <__bit/blsr.h>
21 #include <__bit/countl.h>
22 #include <__bit/countr.h>
23 #include <__config>
24 #include <__debug>
25 #include <__debug_utils/randomize_range.h>
26 #include <__functional/operations.h>
27 #include <__functional/ranges_operations.h>
28 #include <__iterator/iterator_traits.h>
29 #include <__type_traits/conditional.h>
30 #include <__type_traits/disjunction.h>
31 #include <__type_traits/is_arithmetic.h>
32 #include <__utility/move.h>
33 #include <__utility/pair.h>
34 #include <climits>
35 #include <cstdint>
36 
37 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
38 #  pragma GCC system_header
39 #endif
40 
41 _LIBCPP_BEGIN_NAMESPACE_STD
42 
43 // stable, 2-3 compares, 0-2 swaps
44 
45 template <class _AlgPolicy, class _Compare, class _ForwardIterator>
46 _LIBCPP_HIDE_FROM_ABI
__sort3(_ForwardIterator __x,_ForwardIterator __y,_ForwardIterator __z,_Compare __c)47 _LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned __sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z,
48                                                _Compare __c) {
49   using _Ops = _IterOps<_AlgPolicy>;
50 
51   unsigned __r = 0;
52   if (!__c(*__y, *__x))   // if x <= y
53   {
54     if (!__c(*__z, *__y)) // if y <= z
55       return __r;         // x <= y && y <= z
56                           // x <= y && y > z
57     _Ops::iter_swap(__y, __z);     // x <= z && y < z
58     __r = 1;
59     if (__c(*__y, *__x))  // if x > y
60     {
61       _Ops::iter_swap(__x, __y);   // x < y && y <= z
62       __r = 2;
63     }
64     return __r;           // x <= y && y < z
65   }
66   if (__c(*__z, *__y))    // x > y, if y > z
67   {
68     _Ops::iter_swap(__x, __z);     // x < y && y < z
69     __r = 1;
70     return __r;
71   }
72   _Ops::iter_swap(__x, __y);       // x > y && y <= z
73   __r = 1;                // x < y && x <= z
74   if (__c(*__z, *__y))    // if y > z
75   {
76     _Ops::iter_swap(__y, __z);     // x <= y && y < z
77     __r = 2;
78   }
79   return __r;
80 }                         // x <= y && y <= z
81 
82 // stable, 3-6 compares, 0-5 swaps
83 
84 template <class _AlgPolicy, class _Compare, class _ForwardIterator>
85 _LIBCPP_HIDE_FROM_ABI
__sort4(_ForwardIterator __x1,_ForwardIterator __x2,_ForwardIterator __x3,_ForwardIterator __x4,_Compare __c)86 void __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4,
87                  _Compare __c) {
88   using _Ops   = _IterOps<_AlgPolicy>;
89   std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
90   if (__c(*__x4, *__x3)) {
91     _Ops::iter_swap(__x3, __x4);
92     if (__c(*__x3, *__x2)) {
93       _Ops::iter_swap(__x2, __x3);
94       if (__c(*__x2, *__x1)) {
95         _Ops::iter_swap(__x1, __x2);
96       }
97     }
98   }
99 }
100 
101 // stable, 4-10 compares, 0-9 swaps
102 
103 template <class _AlgPolicy, class _Comp, class _ForwardIterator>
__sort5(_ForwardIterator __x1,_ForwardIterator __x2,_ForwardIterator __x3,_ForwardIterator __x4,_ForwardIterator __x5,_Comp __comp)104 _LIBCPP_HIDE_FROM_ABI void __sort5(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3,
105                                    _ForwardIterator __x4, _ForwardIterator __x5, _Comp __comp) {
106   using _Ops = _IterOps<_AlgPolicy>;
107 
108   std::__sort4<_AlgPolicy, _Comp>(__x1, __x2, __x3, __x4, __comp);
109   if (__comp(*__x5, *__x4)) {
110     _Ops::iter_swap(__x4, __x5);
111     if (__comp(*__x4, *__x3)) {
112       _Ops::iter_swap(__x3, __x4);
113       if (__comp(*__x3, *__x2)) {
114         _Ops::iter_swap(__x2, __x3);
115         if (__comp(*__x2, *__x1)) {
116           _Ops::iter_swap(__x1, __x2);
117         }
118       }
119     }
120   }
121 }
122 
123 // The comparator being simple is a prerequisite for using the branchless optimization.
124 template <class _Tp>
125 struct __is_simple_comparator : false_type {};
126 template <class _Tp>
127 struct __is_simple_comparator<__less<_Tp>&> : true_type {};
128 template <class _Tp>
129 struct __is_simple_comparator<less<_Tp>&> : true_type {};
130 template <class _Tp>
131 struct __is_simple_comparator<greater<_Tp>&> : true_type {};
132 #if _LIBCPP_STD_VER >= 20
133 template <>
134 struct __is_simple_comparator<ranges::less&> : true_type {};
135 template <>
136 struct __is_simple_comparator<ranges::greater&> : true_type {};
137 #endif
138 
139 template <class _Compare, class _Iter, class _Tp = typename iterator_traits<_Iter>::value_type>
140 using __use_branchless_sort =
141     integral_constant<bool, __is_cpp17_contiguous_iterator<_Iter>::value && sizeof(_Tp) <= sizeof(void*) &&
142                                 is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>;
143 
144 namespace __detail {
145 
146 // Size in bits for the bitset in use.
147 enum { __block_size = sizeof(uint64_t) * 8 };
148 
149 } // namespace __detail
150 
151 // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary.
152 template <class _Compare, class _RandomAccessIterator>
153 inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) {
154   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
155   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
156   bool __r = __c(*__x, *__y);
157   value_type __tmp = __r ? *__x : *__y;
158   *__y = __r ? *__y : *__x;
159   *__x = __tmp;
160 }
161 
162 // Ensures that *__x, *__y and *__z are ordered according to the comparator __c,
163 // under the assumption that *__y and *__z are already ordered.
164 template <class _Compare, class _RandomAccessIterator>
165 inline _LIBCPP_HIDE_FROM_ABI void __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y,
166                                                           _RandomAccessIterator __z, _Compare __c) {
167   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
168   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
169   bool __r = __c(*__z, *__x);
170   value_type __tmp = __r ? *__z : *__x;
171   *__z = __r ? *__x : *__z;
172   __r = __c(__tmp, *__y);
173   *__x = __r ? *__x : *__y;
174   *__y = __r ? *__y : __tmp;
175 }
176 
177 template <class, class _Compare, class _RandomAccessIterator>
178 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
179 __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
180                          _Compare __c) {
181   std::__cond_swap<_Compare>(__x2, __x3, __c);
182   std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c);
183 }
184 
185 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
186 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
187 __sort3_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
188                          _Compare __c) {
189   std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
190 }
191 
192 template <class, class _Compare, class _RandomAccessIterator>
193 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
194 __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
195                          _RandomAccessIterator __x4, _Compare __c) {
196   std::__cond_swap<_Compare>(__x1, __x3, __c);
197   std::__cond_swap<_Compare>(__x2, __x4, __c);
198   std::__cond_swap<_Compare>(__x1, __x2, __c);
199   std::__cond_swap<_Compare>(__x3, __x4, __c);
200   std::__cond_swap<_Compare>(__x2, __x3, __c);
201 }
202 
203 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
204 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
205 __sort4_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
206                          _RandomAccessIterator __x4, _Compare __c) {
207   std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c);
208 }
209 
210 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
211 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
212 __sort5_maybe_branchless(
213     _RandomAccessIterator __x1,
214     _RandomAccessIterator __x2,
215     _RandomAccessIterator __x3,
216     _RandomAccessIterator __x4,
217     _RandomAccessIterator __x5,
218     _Compare __c) {
219   std::__cond_swap<_Compare>(__x1, __x2, __c);
220   std::__cond_swap<_Compare>(__x4, __x5, __c);
221   std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c);
222   std::__cond_swap<_Compare>(__x2, __x5, __c);
223   std::__partially_sorted_swap<_Compare>(__x1, __x3, __x4, __c);
224   std::__partially_sorted_swap<_Compare>(__x2, __x3, __x4, __c);
225 }
226 
227 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
228 inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void>
229 __sort5_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3,
230                          _RandomAccessIterator __x4, _RandomAccessIterator __x5, _Compare __c) {
231   std::__sort5<_AlgPolicy, _Compare, _RandomAccessIterator>(
232       std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __c);
233 }
234 
235 // Assumes size > 0
236 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
237 _LIBCPP_HIDE_FROM_ABI
238 _LIBCPP_CONSTEXPR_SINCE_CXX14 void __selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last,
239                                                     _Compare __comp) {
240   _BidirectionalIterator __lm1 = __last;
241   for (--__lm1; __first != __lm1; ++__first) {
242     _BidirectionalIterator __i = std::__min_element<_Compare>(__first, __last, __comp);
243     if (__i != __first)
244       _IterOps<_AlgPolicy>::iter_swap(__first, __i);
245   }
246 }
247 
248 // Sort the iterator range [__first, __last) using the comparator __comp using
249 // the insertion sort algorithm.
250 template <class _AlgPolicy, class _Compare, class _BidirectionalIterator>
251 _LIBCPP_HIDE_FROM_ABI
252 void __insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) {
253   using _Ops = _IterOps<_AlgPolicy>;
254 
255   typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type;
256   if (__first == __last)
257     return;
258   _BidirectionalIterator __i = __first;
259   for (++__i; __i != __last; ++__i) {
260     _BidirectionalIterator __j = __i;
261     --__j;
262     if (__comp(*__i, *__j)) {
263       value_type __t(_Ops::__iter_move(__i));
264       _BidirectionalIterator __k = __j;
265       __j                        = __i;
266       do {
267         *__j = _Ops::__iter_move(__k);
268         __j  = __k;
269       } while (__j != __first && __comp(__t, *--__k));
270       *__j = std::move(__t);
271     }
272   }
273 }
274 
275 // Sort the iterator range [__first, __last) using the comparator __comp using
276 // the insertion sort algorithm.  Insertion sort has two loops, outer and inner.
277 // The implementation below has no bounds check (unguarded) for the inner loop.
278 // Assumes that there is an element in the position (__first - 1) and that each
279 // element in the input range is greater or equal to the element at __first - 1.
280 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator>
281 _LIBCPP_HIDE_FROM_ABI void
282 __insertion_sort_unguarded(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
283   using _Ops = _IterOps<_AlgPolicy>;
284   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
285   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
286   if (__first == __last)
287     return;
288   for (_RandomAccessIterator __i = __first + difference_type(1); __i != __last; ++__i) {
289     _RandomAccessIterator __j = __i - difference_type(1);
290     if (__comp(*__i, *__j)) {
291       value_type __t(_Ops::__iter_move(__i));
292       _RandomAccessIterator __k = __j;
293       __j = __i;
294       do {
295         *__j = _Ops::__iter_move(__k);
296         __j = __k;
297       } while (__comp(__t, *--__k)); // No need for bounds check due to the assumption stated above.
298       *__j = std::move(__t);
299     }
300   }
301 }
302 
303 template <class _AlgPolicy, class _Comp, class _RandomAccessIterator>
304 _LIBCPP_HIDE_FROM_ABI bool __insertion_sort_incomplete(
305     _RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
306   using _Ops = _IterOps<_AlgPolicy>;
307 
308   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
309   switch (__last - __first) {
310   case 0:
311   case 1:
312     return true;
313   case 2:
314     if (__comp(*--__last, *__first))
315       _Ops::iter_swap(__first, __last);
316     return true;
317   case 3:
318     std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp);
319     return true;
320   case 4:
321     std::__sort4_maybe_branchless<_AlgPolicy, _Comp>(
322         __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
323     return true;
324   case 5:
325     std::__sort5_maybe_branchless<_AlgPolicy, _Comp>(
326         __first, __first + difference_type(1), __first + difference_type(2), __first + difference_type(3),
327         --__last, __comp);
328     return true;
329   }
330   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
331   _RandomAccessIterator __j = __first + difference_type(2);
332   std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp);
333   const unsigned __limit = 8;
334   unsigned __count = 0;
335   for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) {
336     if (__comp(*__i, *__j)) {
337       value_type __t(_Ops::__iter_move(__i));
338       _RandomAccessIterator __k = __j;
339       __j = __i;
340       do {
341         *__j = _Ops::__iter_move(__k);
342         __j = __k;
343       } while (__j != __first && __comp(__t, *--__k));
344       *__j = std::move(__t);
345       if (++__count == __limit)
346         return ++__i == __last;
347     }
348     __j = __i;
349   }
350   return true;
351 }
352 
353 template <class _AlgPolicy, class _RandomAccessIterator>
354 inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
355     _RandomAccessIterator __first, _RandomAccessIterator __last, uint64_t& __left_bitset, uint64_t& __right_bitset) {
356   using _Ops = _IterOps<_AlgPolicy>;
357   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
358   // Swap one pair on each iteration as long as both bitsets have at least one
359   // element for swapping.
360   while (__left_bitset != 0 && __right_bitset != 0) {
361     difference_type __tz_left  = __libcpp_ctz(__left_bitset);
362     __left_bitset              = __libcpp_blsr(__left_bitset);
363     difference_type __tz_right = __libcpp_ctz(__right_bitset);
364     __right_bitset             = __libcpp_blsr(__right_bitset);
365     _Ops::iter_swap(__first + __tz_left, __last - __tz_right);
366   }
367 }
368 
369 template <class _Compare,
370           class _RandomAccessIterator,
371           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
372 inline _LIBCPP_HIDE_FROM_ABI void
373 __populate_left_bitset(_RandomAccessIterator __first, _Compare __comp, _ValueType& __pivot, uint64_t& __left_bitset) {
374   // Possible vectorization. With a proper "-march" flag, the following loop
375   // will be compiled into a set of SIMD instructions.
376   _RandomAccessIterator __iter = __first;
377   for (int __j = 0; __j < __detail::__block_size;) {
378     bool __comp_result = !__comp(*__iter, __pivot);
379     __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
380     __j++;
381     ++__iter;
382   }
383 }
384 
385 template <class _Compare,
386           class _RandomAccessIterator,
387           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
388 inline _LIBCPP_HIDE_FROM_ABI void
389 __populate_right_bitset(_RandomAccessIterator __lm1, _Compare __comp, _ValueType& __pivot, uint64_t& __right_bitset) {
390   // Possible vectorization. With a proper "-march" flag, the following loop
391   // will be compiled into a set of SIMD instructions.
392   _RandomAccessIterator __iter = __lm1;
393   for (int __j = 0; __j < __detail::__block_size;) {
394     bool __comp_result = __comp(*__iter, __pivot);
395     __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
396     __j++;
397     --__iter;
398   }
399 }
400 
401 template <class _AlgPolicy,
402           class _Compare,
403           class _RandomAccessIterator,
404           class _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type>
405 inline _LIBCPP_HIDE_FROM_ABI void __bitset_partition_partial_blocks(
406     _RandomAccessIterator& __first,
407     _RandomAccessIterator& __lm1,
408     _Compare __comp,
409     _ValueType& __pivot,
410     uint64_t& __left_bitset,
411     uint64_t& __right_bitset) {
412   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
413   difference_type __remaining_len = __lm1 - __first + 1;
414   difference_type __l_size;
415   difference_type __r_size;
416   if (__left_bitset == 0 && __right_bitset == 0) {
417     __l_size = __remaining_len / 2;
418     __r_size = __remaining_len - __l_size;
419   } else if (__left_bitset == 0) {
420     // We know at least one side is a full block.
421     __l_size = __remaining_len - __detail::__block_size;
422     __r_size = __detail::__block_size;
423   } else { // if (__right_bitset == 0)
424     __l_size = __detail::__block_size;
425     __r_size = __remaining_len - __detail::__block_size;
426   }
427   // Record the comparison outcomes for the elements currently on the left side.
428   if (__left_bitset == 0) {
429     _RandomAccessIterator __iter = __first;
430     for (int __j = 0; __j < __l_size; __j++) {
431       bool __comp_result = !__comp(*__iter, __pivot);
432       __left_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
433       ++__iter;
434     }
435   }
436   // Record the comparison outcomes for the elements currently on the right
437   // side.
438   if (__right_bitset == 0) {
439     _RandomAccessIterator __iter = __lm1;
440     for (int __j = 0; __j < __r_size; __j++) {
441       bool __comp_result = __comp(*__iter, __pivot);
442       __right_bitset |= (static_cast<uint64_t>(__comp_result) << __j);
443       --__iter;
444     }
445   }
446   std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
447   __first += (__left_bitset == 0) ? __l_size : 0;
448   __lm1 -= (__right_bitset == 0) ? __r_size : 0;
449 }
450 
451 template <class _AlgPolicy, class _RandomAccessIterator>
452 inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
453     _RandomAccessIterator& __first, _RandomAccessIterator& __lm1, uint64_t& __left_bitset, uint64_t& __right_bitset) {
454   using _Ops = _IterOps<_AlgPolicy>;
455   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
456   if (__left_bitset) {
457     // Swap within the left side.  Need to find set positions in the reverse
458     // order.
459     while (__left_bitset != 0) {
460       difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
461       __left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
462       _RandomAccessIterator __it = __first + __tz_left;
463       if (__it != __lm1) {
464         _Ops::iter_swap(__it, __lm1);
465       }
466       --__lm1;
467     }
468     __first = __lm1 + difference_type(1);
469   } else if (__right_bitset) {
470     // Swap within the right side.  Need to find set positions in the reverse
471     // order.
472     while (__right_bitset != 0) {
473       difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
474       __right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
475       _RandomAccessIterator __it = __lm1 - __tz_right;
476       if (__it != __first) {
477         _Ops::iter_swap(__it, __first);
478       }
479       ++__first;
480     }
481   }
482 }
483 
484 // Partition [__first, __last) using the comparator __comp.  *__first has the
485 // chosen pivot.  Elements that are equivalent are kept to the left of the
486 // pivot.  Returns the iterator for the pivot and a bool value which is true if
487 // the provided range is already sorted, false otherwise.  We assume that the
488 // length of the range is at least three elements.
489 //
490 // __bitset_partition uses bitsets for storing outcomes of the comparisons
491 // between the pivot and other elements.
492 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
493 _LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
494 __bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
495   using _Ops = _IterOps<_AlgPolicy>;
496   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
497   typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type;
498   _LIBCPP_ASSERT(__last - __first >= difference_type(3), "");
499 
500   _RandomAccessIterator __begin = __first;
501   value_type __pivot(_Ops::__iter_move(__first));
502   // Find the first element greater than the pivot.
503   if (__comp(__pivot, *(__last - difference_type(1)))) {
504     // Not guarded since we know the last element is greater than the pivot.
505     while (!__comp(__pivot, *++__first)) {
506     }
507   } else {
508     while (++__first < __last && !__comp(__pivot, *__first)) {
509     }
510   }
511   // Find the last element less than or equal to the pivot.
512   if (__first < __last) {
513     // It will be always guarded because __introsort will do the median-of-three
514     // before calling this.
515     while (__comp(__pivot, *--__last)) {
516     }
517   }
518   // If the first element greater than the pivot is at or after the
519   // last element less than or equal to the pivot, then we have covered the
520   // entire range without swapping elements.  This implies the range is already
521   // partitioned.
522   bool __already_partitioned = __first >= __last;
523   if (!__already_partitioned) {
524     _Ops::iter_swap(__first, __last);
525     ++__first;
526   }
527 
528   // In [__first, __last) __last is not inclusive. From now on, it uses last
529   // minus one to be inclusive on both sides.
530   _RandomAccessIterator __lm1 = __last - difference_type(1);
531   uint64_t __left_bitset      = 0;
532   uint64_t __right_bitset     = 0;
533 
534   // Reminder: length = __lm1 - __first + 1.
535   while (__lm1 - __first >= 2 * __detail::__block_size - 1) {
536     // Record the comparison outcomes for the elements currently on the left
537     // side.
538     if (__left_bitset == 0)
539       std::__populate_left_bitset<_Compare>(__first, __comp, __pivot, __left_bitset);
540     // Record the comparison outcomes for the elements currently on the right
541     // side.
542     if (__right_bitset == 0)
543       std::__populate_right_bitset<_Compare>(__lm1, __comp, __pivot, __right_bitset);
544     // Swap the elements recorded to be the candidates for swapping in the
545     // bitsets.
546     std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset);
547     // Only advance the iterator if all the elements that need to be moved to
548     // other side were moved.
549     __first += (__left_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
550     __lm1 -= (__right_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0);
551   }
552   // Now, we have a less-than a block worth of elements on at least one of the
553   // sides.
554   std::__bitset_partition_partial_blocks<_AlgPolicy, _Compare>(
555       __first, __lm1, __comp, __pivot, __left_bitset, __right_bitset);
556   // At least one the bitsets would be empty.  For the non-empty one, we need to
557   // properly partition the elements that appear within that bitset.
558   std::__swap_bitmap_pos_within<_AlgPolicy>(__first, __lm1, __left_bitset, __right_bitset);
559 
560   // Move the pivot to its correct position.
561   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
562   if (__begin != __pivot_pos) {
563     *__begin = _Ops::__iter_move(__pivot_pos);
564   }
565   *__pivot_pos = std::move(__pivot);
566   return std::make_pair(__pivot_pos, __already_partitioned);
567 }
568 
569 // Partition [__first, __last) using the comparator __comp.  *__first has the
570 // chosen pivot.  Elements that are equivalent are kept to the right of the
571 // pivot.  Returns the iterator for the pivot and a bool value which is true if
572 // the provided range is already sorted, false otherwise.  We assume that the
573 // length of the range is at least three elements.
574 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
575 _LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool>
576 __partition_with_equals_on_right(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
577   using _Ops = _IterOps<_AlgPolicy>;
578   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
579   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
580   _LIBCPP_ASSERT(__last - __first >= difference_type(3), "");
581   _RandomAccessIterator __begin = __first;
582   value_type __pivot(_Ops::__iter_move(__first));
583   // Find the first element greater or equal to the pivot.  It will be always
584   // guarded because __introsort will do the median-of-three before calling
585   // this.
586   while (__comp(*++__first, __pivot))
587     ;
588 
589   // Find the last element less than the pivot.
590   if (__begin == __first - difference_type(1)) {
591     while (__first < __last && !__comp(*--__last, __pivot))
592       ;
593   } else {
594     // Guarded.
595     while (!__comp(*--__last, __pivot))
596       ;
597   }
598 
599   // If the first element greater than or equal to the pivot is at or after the
600   // last element less than the pivot, then we have covered the entire range
601   // without swapping elements.  This implies the range is already partitioned.
602   bool __already_partitioned = __first >= __last;
603   // Go through the remaining elements.  Swap pairs of elements (one to the
604   // right of the pivot and the other to left of the pivot) that are not on the
605   // correct side of the pivot.
606   while (__first < __last) {
607     _Ops::iter_swap(__first, __last);
608     while (__comp(*++__first, __pivot))
609       ;
610     while (!__comp(*--__last, __pivot))
611       ;
612   }
613   // Move the pivot to its correct position.
614   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
615   if (__begin != __pivot_pos) {
616     *__begin = _Ops::__iter_move(__pivot_pos);
617   }
618   *__pivot_pos = std::move(__pivot);
619   return std::make_pair(__pivot_pos, __already_partitioned);
620 }
621 
622 // Similar to the above function.  Elements equivalent to the pivot are put to
623 // the left of the pivot.  Returns the iterator to the pivot element.
624 template <class _AlgPolicy, class _RandomAccessIterator, class _Compare>
625 _LIBCPP_HIDE_FROM_ABI _RandomAccessIterator
626 __partition_with_equals_on_left(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
627   using _Ops = _IterOps<_AlgPolicy>;
628   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
629   typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type;
630   _RandomAccessIterator __begin = __first;
631   value_type __pivot(_Ops::__iter_move(__first));
632   if (__comp(__pivot, *(__last - difference_type(1)))) {
633     // Guarded.
634     while (!__comp(__pivot, *++__first)) {
635     }
636   } else {
637     while (++__first < __last && !__comp(__pivot, *__first)) {
638     }
639   }
640 
641   if (__first < __last) {
642     // It will be always guarded because __introsort will do the
643     // median-of-three before calling this.
644     while (__comp(__pivot, *--__last)) {
645     }
646   }
647   while (__first < __last) {
648     _Ops::iter_swap(__first, __last);
649     while (!__comp(__pivot, *++__first))
650       ;
651     while (__comp(__pivot, *--__last))
652       ;
653   }
654   _RandomAccessIterator __pivot_pos = __first - difference_type(1);
655   if (__begin != __pivot_pos) {
656     *__begin = _Ops::__iter_move(__pivot_pos);
657   }
658   *__pivot_pos = std::move(__pivot);
659   return __first;
660 }
661 
662 // The main sorting function.  Implements introsort combined with other ideas:
663 //  - option of using block quick sort for partitioning,
664 //  - guarded and unguarded insertion sort for small lengths,
665 //  - Tuckey's ninther technique for computing the pivot,
666 //  - check on whether partition was not required.
667 // The implementation is partly based on Orson Peters' pattern-defeating
668 // quicksort, published at: <https://github.com/orlp/pdqsort>.
669 template <class _AlgPolicy, class _Compare, class _RandomAccessIterator, bool _UseBitSetPartition>
670 void __introsort(_RandomAccessIterator __first,
671                  _RandomAccessIterator __last,
672                  _Compare __comp,
673                  typename iterator_traits<_RandomAccessIterator>::difference_type __depth,
674                  bool __leftmost = true) {
675   using _Ops = _IterOps<_AlgPolicy>;
676   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
677   using _Comp_ref = __comp_ref_type<_Compare>;
678   // Upper bound for using insertion sort for sorting.
679   _LIBCPP_CONSTEXPR difference_type __limit = 24;
680   // Lower bound for using Tuckey's ninther technique for median computation.
681   _LIBCPP_CONSTEXPR difference_type __ninther_threshold = 128;
682   while (true) {
683     difference_type __len = __last - __first;
684     switch (__len) {
685     case 0:
686     case 1:
687       return;
688     case 2:
689       if (__comp(*--__last, *__first))
690         _Ops::iter_swap(__first, __last);
691       return;
692     case 3:
693       std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp);
694       return;
695     case 4:
696       std::__sort4_maybe_branchless<_AlgPolicy, _Compare>(
697           __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
698       return;
699     case 5:
700       std::__sort5_maybe_branchless<_AlgPolicy, _Compare>(
701           __first, __first + difference_type(1), __first + difference_type(2), __first + difference_type(3),
702           --__last, __comp);
703       return;
704     }
705     // Use insertion sort if the length of the range is below the specified limit.
706     if (__len < __limit) {
707       if (__leftmost) {
708         std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp);
709       } else {
710         std::__insertion_sort_unguarded<_AlgPolicy, _Compare>(__first, __last, __comp);
711       }
712       return;
713     }
714     if (__depth == 0) {
715       // Fallback to heap sort as Introsort suggests.
716       std::__partial_sort<_AlgPolicy, _Compare>(__first, __last, __last, __comp);
717       return;
718     }
719     --__depth;
720     {
721       difference_type __half_len = __len / 2;
722       // Use Tuckey's ninther technique or median of 3 for pivot selection
723       // depending on the length of the range being sorted.
724       if (__len > __ninther_threshold) {
725         std::__sort3<_AlgPolicy, _Compare>(__first, __first + __half_len, __last - difference_type(1), __comp);
726         std::__sort3<_AlgPolicy, _Compare>(
727             __first + difference_type(1), __first + (__half_len - 1), __last - difference_type(2), __comp);
728         std::__sort3<_AlgPolicy, _Compare>(
729             __first + difference_type(2), __first + (__half_len + 1), __last - difference_type(3), __comp);
730         std::__sort3<_AlgPolicy, _Compare>(
731             __first + (__half_len - 1), __first + __half_len, __first + (__half_len + 1), __comp);
732         _Ops::iter_swap(__first, __first + __half_len);
733       } else {
734         std::__sort3<_AlgPolicy, _Compare>(__first + __half_len, __first, __last - difference_type(1), __comp);
735       }
736     }
737     // The elements to the left of the current iterator range are already
738     // sorted.  If the current iterator range to be sorted is not the
739     // leftmost part of the entire iterator range and the pivot is same as
740     // the highest element in the range to the left, then we know that all
741     // the elements in the range [first, pivot] would be equal to the pivot,
742     // assuming the equal elements are put on the left side when
743     // partitioned.  This also means that we do not need to sort the left
744     // side of the partition.
745     if (!__leftmost && !__comp(*(__first - difference_type(1)), *__first)) {
746       __first = std::__partition_with_equals_on_left<_AlgPolicy, _RandomAccessIterator, _Comp_ref>(
747           __first, __last, _Comp_ref(__comp));
748       continue;
749     }
750     // Use bitset partition only if asked for.
751     auto __ret =
752         _UseBitSetPartition
753             ? std::__bitset_partition<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp)
754             : std::__partition_with_equals_on_right<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp);
755     _RandomAccessIterator __i = __ret.first;
756     // [__first, __i) < *__i and *__i <= [__i+1, __last)
757     // If we were given a perfect partition, see if insertion sort is quick...
758     if (__ret.second) {
759       bool __fs = std::__insertion_sort_incomplete<_AlgPolicy, _Compare>(__first, __i, __comp);
760       if (std::__insertion_sort_incomplete<_AlgPolicy, _Compare>(__i + difference_type(1), __last, __comp)) {
761         if (__fs)
762           return;
763         __last = __i;
764         continue;
765       } else {
766         if (__fs) {
767           __first = ++__i;
768           continue;
769         }
770       }
771     }
772     // Sort the left partiton recursively and the right partition with tail recursion elimination.
773     std::__introsort<_AlgPolicy, _Compare, _RandomAccessIterator, _UseBitSetPartition>(
774         __first, __i, __comp, __depth, __leftmost);
775     __leftmost = false;
776     __first    = ++__i;
777   }
778 }
779 
780 template <typename _Number>
781 inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
782   if (__n == 0)
783     return 0;
784   if (sizeof(__n) <= sizeof(unsigned))
785     return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned>(__n));
786   if (sizeof(__n) <= sizeof(unsigned long))
787     return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long>(__n));
788   if (sizeof(__n) <= sizeof(unsigned long long))
789     return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long long>(__n));
790 
791   _Number __log2 = 0;
792   while (__n > 1) {
793     __log2++;
794     __n >>= 1;
795   }
796   return __log2;
797 }
798 
799 template <class _Comp, class _RandomAccessIterator>
800 void __sort(_RandomAccessIterator, _RandomAccessIterator, _Comp);
801 
802 extern template _LIBCPP_FUNC_VIS void __sort<__less<char>&, char*>(char*, char*, __less<char>&);
803 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
804 extern template _LIBCPP_FUNC_VIS void __sort<__less<wchar_t>&, wchar_t*>(wchar_t*, wchar_t*, __less<wchar_t>&);
805 #endif
806 extern template _LIBCPP_FUNC_VIS void __sort<__less<signed char>&, signed char*>(signed char*, signed char*, __less<signed char>&);
807 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned char>&, unsigned char*>(unsigned char*, unsigned char*, __less<unsigned char>&);
808 extern template _LIBCPP_FUNC_VIS void __sort<__less<short>&, short*>(short*, short*, __less<short>&);
809 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned short>&, unsigned short*>(unsigned short*, unsigned short*, __less<unsigned short>&);
810 extern template _LIBCPP_FUNC_VIS void __sort<__less<int>&, int*>(int*, int*, __less<int>&);
811 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned>&, unsigned*>(unsigned*, unsigned*, __less<unsigned>&);
812 extern template _LIBCPP_FUNC_VIS void __sort<__less<long>&, long*>(long*, long*, __less<long>&);
813 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned long>&, unsigned long*>(unsigned long*, unsigned long*, __less<unsigned long>&);
814 extern template _LIBCPP_FUNC_VIS void __sort<__less<long long>&, long long*>(long long*, long long*, __less<long long>&);
815 extern template _LIBCPP_FUNC_VIS void __sort<__less<unsigned long long>&, unsigned long long*>(unsigned long long*, unsigned long long*, __less<unsigned long long>&);
816 extern template _LIBCPP_FUNC_VIS void __sort<__less<float>&, float*>(float*, float*, __less<float>&);
817 extern template _LIBCPP_FUNC_VIS void __sort<__less<double>&, double*>(double*, double*, __less<double>&);
818 extern template _LIBCPP_FUNC_VIS void __sort<__less<long double>&, long double*>(long double*, long double*, __less<long double>&);
819 
820 template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
821 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
822 __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
823   typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
824   difference_type __depth_limit = 2 * std::__log2i(__last - __first);
825 
826   // Only use bitset partitioning for arithmetic types.  We should also check
827   // that the default comparator is in use so that we are sure that there are no
828   // branches in the comparator.
829   std::__introsort<_AlgPolicy,
830                    _Comp&,
831                    _RandomAccessIterator,
832                    __use_branchless_sort<_Comp, _RandomAccessIterator>::value>(
833       __first, __last, __comp, __depth_limit);
834 }
835 
836 template <class _Type, class... _Options>
837 using __is_any_of = _Or<is_same<_Type, _Options>...>;
838 
839 template <class _Type>
840 using __sort_is_specialized_in_library = __is_any_of<
841     _Type,
842     char,
843 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
844     wchar_t,
845 #endif
846     signed char,
847     unsigned char,
848     short,
849     unsigned short,
850     int,
851     unsigned int,
852     long,
853     unsigned long,
854     long long,
855     unsigned long long,
856     float,
857     double,
858     long double>;
859 
860 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
861 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, __less<_Type>& __comp) {
862   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
863 }
864 
865 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
866 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, less<_Type>&) {
867   __less<_Type> __comp;
868   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
869 }
870 
871 #if _LIBCPP_STD_VER >= 14
872 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
873 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, less<>&) {
874   __less<_Type> __comp;
875   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
876 }
877 #endif
878 
879 #if _LIBCPP_STD_VER >= 20
880 template <class _AlgPolicy, class _Type, __enable_if_t<__sort_is_specialized_in_library<_Type>::value, int> = 0>
881 _LIBCPP_HIDE_FROM_ABI void __sort_dispatch(_Type* __first, _Type* __last, ranges::less&) {
882   __less<_Type> __comp;
883   std::__sort<__less<_Type>&, _Type*>(__first, __last, __comp);
884 }
885 #endif
886 
887 template <class _AlgPolicy, class _RandomAccessIterator, class _Comp>
888 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
889 void __sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp& __comp) {
890   std::__debug_randomize_range<_AlgPolicy>(__first, __last);
891 
892   if (__libcpp_is_constant_evaluated()) {
893     std::__partial_sort<_AlgPolicy>(
894         std::__unwrap_iter(__first), std::__unwrap_iter(__last), std::__unwrap_iter(__last), __comp);
895   } else {
896     std::__sort_dispatch<_AlgPolicy>(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __comp);
897   }
898 }
899 
900 template <class _RandomAccessIterator, class _Comp>
901 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
902 void sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp) {
903   std::__sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp);
904 }
905 
906 template <class _RandomAccessIterator>
907 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
908 void sort(_RandomAccessIterator __first, _RandomAccessIterator __last) {
909   std::sort(__first, __last, __less<typename iterator_traits<_RandomAccessIterator>::value_type>());
910 }
911 
912 _LIBCPP_END_NAMESPACE_STD
913 
914 #endif // _LIBCPP___ALGORITHM_SORT_H
915