1 //////////////////////////////////////////////////////////////////////////////
2 //
3 // (C) Copyright Ion Gaztanaga 2015-2016.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // See http://www.boost.org/libs/move for documentation.
9 //
10 //////////////////////////////////////////////////////////////////////////////
11
12 #ifndef BOOST_MOVE_ADAPTIVE_SORT_HPP
13 #define BOOST_MOVE_ADAPTIVE_SORT_HPP
14
15 #include <boost/move/detail/config_begin.hpp>
16 #include <boost/move/algo/detail/adaptive_sort_merge.hpp>
17
18 namespace boost {
19 namespace movelib {
20
21 ///@cond
22 namespace detail_adaptive {
23
24 template<class RandIt>
move_data_backward(RandIt cur_pos,typename iterator_traits<RandIt>::size_type const l_data,RandIt new_pos,bool const xbuf_used)25 void move_data_backward( RandIt cur_pos
26 , typename iterator_traits<RandIt>::size_type const l_data
27 , RandIt new_pos
28 , bool const xbuf_used)
29 {
30 //Move buffer to the total combination right
31 if(xbuf_used){
32 boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
33 }
34 else{
35 boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
36 //Rotate does less moves but it seems slower due to cache issues
37 //rotate_gcd(first-l_block, first+len-l_block, first+len);
38 }
39 }
40
41 template<class RandIt>
move_data_forward(RandIt cur_pos,typename iterator_traits<RandIt>::size_type const l_data,RandIt new_pos,bool const xbuf_used)42 void move_data_forward( RandIt cur_pos
43 , typename iterator_traits<RandIt>::size_type const l_data
44 , RandIt new_pos
45 , bool const xbuf_used)
46 {
47 //Move buffer to the total combination right
48 if(xbuf_used){
49 boost::move(cur_pos, cur_pos+l_data, new_pos);
50 }
51 else{
52 boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos);
53 //Rotate does less moves but it seems slower due to cache issues
54 //rotate_gcd(first-l_block, first+len-l_block, first+len);
55 }
56 }
57
58 // build blocks of length 2*l_build_buf. l_build_buf is power of two
59 // input: [0, l_build_buf) elements are buffer, rest unsorted elements
60 // output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted
61 //
62 // First elements are merged from right to left until elements start
63 // at first. All old elements [first, first + l_build_buf) are placed at the end
64 // [first+len-l_build_buf, first+len). To achieve this:
65 // - If we have external memory to merge, we save elements from the buffer
66 // so that a non-swapping merge is used. Buffer elements are restored
67 // at the end of the buffer from the external memory.
68 //
69 // - When the external memory is not available or it is insufficient
70 // for a merge operation, left swap merging is used.
71 //
72 // Once elements are merged left to right in blocks of l_build_buf, then a single left
73 // to right merge step is performed to achieve merged blocks of size 2K.
74 // If external memory is available, usual merge is used, swap merging otherwise.
75 //
76 // As a last step, if auxiliary memory is available in-place merge is performed.
77 // until all is merged or auxiliary memory is not large enough.
78 template<class RandIt, class Compare, class XBuf>
79 typename iterator_traits<RandIt>::size_type
adaptive_sort_build_blocks(RandIt const first,typename iterator_traits<RandIt>::size_type const len,typename iterator_traits<RandIt>::size_type const l_base,typename iterator_traits<RandIt>::size_type const l_build_buf,XBuf & xbuf,Compare comp)80 adaptive_sort_build_blocks
81 ( RandIt const first
82 , typename iterator_traits<RandIt>::size_type const len
83 , typename iterator_traits<RandIt>::size_type const l_base
84 , typename iterator_traits<RandIt>::size_type const l_build_buf
85 , XBuf & xbuf
86 , Compare comp)
87 {
88 typedef typename iterator_traits<RandIt>::size_type size_type;
89 BOOST_ASSERT(l_build_buf <= len);
90 BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1)));
91
92 //Place the start pointer after the buffer
93 RandIt first_block = first + l_build_buf;
94 size_type const elements_in_blocks = len - l_build_buf;
95
96 //////////////////////////////////
97 // Start of merge to left step
98 //////////////////////////////////
99 size_type l_merged = 0u;
100
101 BOOST_ASSERT(l_build_buf);
102 //If there is no enough buffer for the insertion sort step, just avoid the external buffer
103 size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity()));
104 kbuf = kbuf < l_base ? 0 : kbuf;
105
106 if(kbuf){
107 //Backup internal buffer values in external buffer so they can be overwritten
108 xbuf.move_assign(first+l_build_buf-kbuf, kbuf);
109 l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op());
110
111 //Now combine them using the buffer. Elements from buffer can be
112 //overwritten since they've been saved to xbuf
113 l_merged = op_merge_left_step_multiple
114 ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op());
115
116 //Restore internal buffer from external buffer unless kbuf was l_build_buf,
117 //in that case restoration will happen later
118 if(kbuf != l_build_buf){
119 boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks);
120 }
121 }
122 else{
123 l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp);
124 rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks);
125 }
126
127 //Now combine elements using the buffer. Elements from buffer can't be
128 //overwritten since xbuf was not big enough, so merge swapping elements.
129 l_merged = op_merge_left_step_multiple
130 (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op());
131
132 BOOST_ASSERT(l_merged == l_build_buf);
133
134 //////////////////////////////////
135 // Start of merge to right step
136 //////////////////////////////////
137
138 //If kbuf is l_build_buf then we can merge right without swapping
139 //Saved data is still in xbuf
140 if(kbuf && kbuf == l_build_buf){
141 op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op());
142 //Restore internal buffer from external buffer if kbuf was l_build_buf.
143 //as this operation was previously delayed.
144 boost::move(xbuf.data(), xbuf.data() + kbuf, first);
145 }
146 else{
147 op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op());
148 }
149 xbuf.clear();
150 //2*l_build_buf or total already merged
151 return min_value<size_type>(elements_in_blocks, 2*l_build_buf);
152 }
153
154 template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf>
adaptive_sort_combine_blocks(RandItKeys const keys,KeyCompare key_comp,RandIt const first,typename iterator_traits<RandIt>::size_type const len,typename iterator_traits<RandIt>::size_type const l_prev_merged,typename iterator_traits<RandIt>::size_type const l_block,bool const use_buf,bool const xbuf_used,XBuf & xbuf,Compare comp,bool merge_left)155 void adaptive_sort_combine_blocks
156 ( RandItKeys const keys
157 , KeyCompare key_comp
158 , RandIt const first
159 , typename iterator_traits<RandIt>::size_type const len
160 , typename iterator_traits<RandIt>::size_type const l_prev_merged
161 , typename iterator_traits<RandIt>::size_type const l_block
162 , bool const use_buf
163 , bool const xbuf_used
164 , XBuf & xbuf
165 , Compare comp
166 , bool merge_left)
167 {
168 (void)xbuf;
169 typedef typename iterator_traits<RandIt>::size_type size_type;
170
171 size_type const l_reg_combined = 2*l_prev_merged;
172 size_type l_irreg_combined = 0;
173 size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined);
174 size_type const n_reg_combined = len/l_reg_combined;
175 RandIt combined_first = first;
176
177 (void)l_total_combined;
178 BOOST_ASSERT(l_total_combined <= len);
179
180 size_type const max_i = n_reg_combined + (l_irreg_combined != 0);
181
182 if(merge_left || !use_buf) {
183 for( size_type combined_i = 0; combined_i != max_i; ) {
184 //Now merge blocks
185 bool const is_last = combined_i==n_reg_combined;
186 size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
187
188 range_xbuf<RandIt, size_type, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first);
189 size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
190 combine_params( keys, key_comp, l_cur_combined
191 , l_prev_merged, l_block, rbuf
192 , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
193 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block);
194 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
195 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
196 if(!use_buf){
197 merge_blocks_bufferless
198 (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp);
199 }
200 else{
201 merge_blocks_left
202 (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
203 }
204 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_L: ", len + l_block);
205 ++combined_i;
206 if(combined_i != max_i)
207 combined_first += l_reg_combined;
208 }
209 }
210 else{
211 combined_first += l_reg_combined*(max_i-1);
212 for( size_type combined_i = max_i; combined_i; ) {
213 --combined_i;
214 bool const is_last = combined_i==n_reg_combined;
215 size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
216
217 RandIt const combined_last(combined_first+l_cur_combined);
218 range_xbuf<RandIt, size_type, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last);
219 size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
220 combine_params( keys, key_comp, l_cur_combined
221 , l_prev_merged, l_block, rbuf
222 , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs
223 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block);
224 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
225 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
226 merge_blocks_right
227 (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
228 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_R: ", len + l_block);
229 if(combined_i)
230 combined_first -= l_reg_combined;
231 }
232 }
233 }
234
235 //Returns true if buffer is placed in
236 //[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is
237 //[buffer,buffer+l_intbuf)
238 template<class RandIt, class Compare, class XBuf>
adaptive_sort_combine_all_blocks(RandIt keys,typename iterator_traits<RandIt>::size_type & n_keys,RandIt const buffer,typename iterator_traits<RandIt>::size_type const l_buf_plus_data,typename iterator_traits<RandIt>::size_type l_merged,typename iterator_traits<RandIt>::size_type & l_intbuf,XBuf & xbuf,Compare comp)239 bool adaptive_sort_combine_all_blocks
240 ( RandIt keys
241 , typename iterator_traits<RandIt>::size_type &n_keys
242 , RandIt const buffer
243 , typename iterator_traits<RandIt>::size_type const l_buf_plus_data
244 , typename iterator_traits<RandIt>::size_type l_merged
245 , typename iterator_traits<RandIt>::size_type &l_intbuf
246 , XBuf & xbuf
247 , Compare comp)
248 {
249 typedef typename iterator_traits<RandIt>::size_type size_type;
250 RandIt const first = buffer + l_intbuf;
251 size_type const l_data = l_buf_plus_data - l_intbuf;
252 size_type const l_unique = l_intbuf+n_keys;
253 //Backup data to external buffer once if possible
254 bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity();
255 if(common_xbuf){
256 xbuf.move_assign(buffer, l_intbuf);
257 }
258
259 bool prev_merge_left = true;
260 size_type l_prev_total_combined = l_merged, l_prev_block = 0;
261 bool prev_use_internal_buf = true;
262
263 for( size_type n = 0; l_data > l_merged
264 ; l_merged*=2
265 , ++n){
266 //If l_intbuf is non-zero, use that internal buffer.
267 // Implies l_block == l_intbuf && use_internal_buf == true
268 //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer,
269 // Implies l_block == n_keys/2 && use_internal_buf == true
270 //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false)
271 bool use_internal_buf = false;
272 size_type const l_block = lblock_for_combine(l_intbuf, n_keys, size_type(2*l_merged), use_internal_buf);
273 BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf));
274 BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) );
275 BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) );
276
277 bool const is_merge_left = (n&1) == 0;
278 size_type const l_total_combined = calculate_total_combined(l_data, l_merged);
279 if(n && prev_use_internal_buf && prev_merge_left){
280 if(is_merge_left || !use_internal_buf){
281 move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf);
282 }
283 else{
284 //Put the buffer just after l_total_combined
285 RandIt const buf_end = first+l_prev_total_combined;
286 RandIt const buf_beg = buf_end-l_block;
287 if(l_prev_total_combined > l_total_combined){
288 size_type const l_diff = l_prev_total_combined - l_total_combined;
289 move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf);
290 }
291 else if(l_prev_total_combined < l_total_combined){
292 size_type const l_diff = l_total_combined - l_prev_total_combined;
293 move_data_forward(buf_end, l_diff, buf_beg, common_xbuf);
294 }
295 }
296 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After move_data : ", l_data + l_intbuf);
297 }
298
299 //Combine to form l_merged*2 segments
300 if(n_keys){
301 size_type upper_n_keys_this_iter = 2*l_merged/l_block;
302 if(upper_n_keys_this_iter > 256){
303 adaptive_sort_combine_blocks
304 ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block
305 , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
306 }
307 else{
308 unsigned char uint_keys[256];
309 adaptive_sort_combine_blocks
310 ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
311 , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
312 }
313 }
314 else{
315 size_type *const uint_keys = xbuf.template aligned_trailing<size_type>();
316 adaptive_sort_combine_blocks
317 ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
318 , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
319 }
320
321 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? " After comb blocks L: " : " After comb blocks R: ", l_data + l_intbuf);
322 prev_merge_left = is_merge_left;
323 l_prev_total_combined = l_total_combined;
324 l_prev_block = l_block;
325 prev_use_internal_buf = use_internal_buf;
326 }
327 BOOST_ASSERT(l_prev_total_combined == l_data);
328 bool const buffer_right = prev_use_internal_buf && prev_merge_left;
329
330 l_intbuf = prev_use_internal_buf ? l_prev_block : 0u;
331 n_keys = l_unique - l_intbuf;
332 //Restore data from to external common buffer if used
333 if(common_xbuf){
334 if(buffer_right){
335 boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data);
336 }
337 else{
338 boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer);
339 }
340 }
341 return buffer_right;
342 }
343
344
345 template<class RandIt, class Compare, class XBuf>
adaptive_sort_final_merge(bool buffer_right,RandIt const first,typename iterator_traits<RandIt>::size_type const l_intbuf,typename iterator_traits<RandIt>::size_type const n_keys,typename iterator_traits<RandIt>::size_type const len,XBuf & xbuf,Compare comp)346 void adaptive_sort_final_merge( bool buffer_right
347 , RandIt const first
348 , typename iterator_traits<RandIt>::size_type const l_intbuf
349 , typename iterator_traits<RandIt>::size_type const n_keys
350 , typename iterator_traits<RandIt>::size_type const len
351 , XBuf & xbuf
352 , Compare comp)
353 {
354 //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf);
355 xbuf.clear();
356
357 typedef typename iterator_traits<RandIt>::size_type size_type;
358 size_type const n_key_plus_buf = l_intbuf+n_keys;
359 if(buffer_right){
360 //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
361 stable_sort(first+len-l_intbuf, first+len, comp, xbuf);
362 stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf);
363 unstable_sort(first, first+n_keys, comp, xbuf);
364 stable_merge(first, first+n_keys, first+len, comp, xbuf);
365 }
366 else{
367 //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
368 stable_sort(first, first+n_key_plus_buf, comp, xbuf);
369 if(xbuf.capacity() >= n_key_plus_buf){
370 buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
371 }
372 else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){
373 stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf);
374 stable_merge(first, first+n_keys, first+len, comp, xbuf);
375 }
376 else{
377 stable_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
378 }
379 }
380 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After final_merge : ", len);
381 }
382
383 template<class RandIt, class Compare, class Unsigned, class XBuf>
adaptive_sort_build_params(RandIt first,Unsigned const len,Compare comp,Unsigned & n_keys,Unsigned & l_intbuf,Unsigned & l_base,Unsigned & l_build_buf,XBuf & xbuf)384 bool adaptive_sort_build_params
385 (RandIt first, Unsigned const len, Compare comp
386 , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf
387 , XBuf & xbuf
388 )
389 {
390 typedef Unsigned size_type;
391
392 //Calculate ideal parameters and try to collect needed unique keys
393 l_base = 0u;
394
395 //Try to find a value near sqrt(len) that is 2^N*l_base where
396 //l_base <= AdaptiveSortInsertionSortThreshold. This property is important
397 //as build_blocks merges to the left iteratively duplicating the
398 //merged size and all the buffer must be used just before the final
399 //merge to right step. This guarantees "build_blocks" produces
400 //segments of size l_build_buf*2, maximizing the classic merge phase.
401 l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));
402
403 //The internal buffer can be expanded if there is enough external memory
404 while(xbuf.capacity() >= l_intbuf*2){
405 l_intbuf *= 2;
406 }
407
408 //This is the minimum number of keys to implement the ideal algorithm
409 //
410 //l_intbuf is used as buffer plus the key count
411 size_type n_min_ideal_keys = l_intbuf-1;
412 while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){
413 --n_min_ideal_keys;
414 }
415 n_min_ideal_keys += 1;
416 BOOST_ASSERT(n_min_ideal_keys <= l_intbuf);
417
418 if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){
419 n_keys = 0u;
420 l_build_buf = l_intbuf;
421 }
422 else{
423 //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that
424 //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half
425 //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin.
426 //
427 //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
428 //(to be used for keys in combine_all_blocks) as the whole l_build_buf
429 //will be backuped in the buffer during build_blocks.
430 bool const non_unique_buf = xbuf.capacity() >= l_intbuf;
431 size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2;
432 size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
433
434 //If available memory is 2*sqrt(l), then for "build_params"
435 //the situation is the same as if 2*l_intbuf were collected.
436 if(non_unique_buf && collected == n_min_ideal_keys){
437 l_build_buf = l_intbuf;
438 n_keys = n_min_ideal_keys;
439 }
440 else if(collected == 2*l_intbuf){
441 //l_intbuf*2 elements found. Use all of them in the build phase
442 l_build_buf = l_intbuf*2;
443 n_keys = l_intbuf;
444 }
445 else if(collected == (n_min_ideal_keys+l_intbuf)){
446 l_build_buf = l_intbuf;
447 n_keys = n_min_ideal_keys;
448 }
449 //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix
450 //is possible (due to very low unique keys), then go to a slow sort based on rotations.
451 else{
452 BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf));
453 if(collected < 4){ //No combination possible with less that 4 keys
454 return false;
455 }
456 n_keys = l_intbuf;
457 while(n_keys&(n_keys-1)){
458 n_keys &= n_keys-1; // make it power or 2
459 }
460 while(n_keys > collected){
461 n_keys/=2;
462 }
463 //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
464 l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
465 l_intbuf = 0;
466 l_build_buf = n_keys;
467 }
468 BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf);
469 }
470
471 return true;
472 }
473
474 // Main explanation of the sort algorithm.
475 //
476 // csqrtlen = ceil(sqrt(len));
477 //
478 // * First, 2*csqrtlen unique elements elements are extracted from elements to be
479 // sorted and placed in the beginning of the range.
480 //
481 // * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
482 // will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
483 // are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
484 // 2*csqrtlen unique elements are again the leading elements of the whole range.
485 //
486 // * Step "combine_blocks": pairs of previously formed blocks are merged with a different
487 // ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
488 // "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
489 // elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.
490 //
491 // In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to
492 // know if elements belong to the first or second block to be merged and another
493 // leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step:
494 //
495 // Iteratively until all trailing (len-2*csqrtlen) elements are merged:
496 // Iteratively for each pair of previously merged block:
497 // * Blocks are divided groups of csqrtlen elements and
498 // 2*merged_block/csqrtlen keys are sorted to be used as markers
499 // * Groups are selection-sorted by first or last element (depending whether they are going
500 // to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
501 // * Elements of each block pair are merged using the csqrtlen buffer taking into account
502 // if they belong to the first half or second half (marked by the key).
503 //
504 // * In the final merge step leading elements (2*csqrtlen) are sorted and merged with
505 // rotations with the rest of sorted elements in the "combine_blocks" step.
506 //
507 // Corner cases:
508 //
509 // * If no 2*csqrtlen elements can be extracted:
510 //
511 // * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used
512 // as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This
513 // means that an additional "combine_blocks" step will be needed to merge all elements.
514 //
515 // * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum,
516 // then reduces the number of elements used as buffer and keys in the "build_blocks"
517 // and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
518 // then uses a rotation based smart merge.
519 //
520 // * If the minimum number of keys can't be extracted, a rotation-based sorting is performed.
521 //
522 // * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used.
523 //
524 // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
525 // then only csqrtlen elements need to be extracted and "combine_blocks" will use integral
526 // keys to combine blocks.
527 //
528 // * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks
529 // using classic merge and "combine_blocks" will use bigger blocks when merging.
530 template<class RandIt, class Compare, class XBuf>
adaptive_sort_impl(RandIt first,typename iterator_traits<RandIt>::size_type const len,Compare comp,XBuf & xbuf)531 void adaptive_sort_impl
532 ( RandIt first
533 , typename iterator_traits<RandIt>::size_type const len
534 , Compare comp
535 , XBuf & xbuf
536 )
537 {
538 typedef typename iterator_traits<RandIt>::size_type size_type;
539
540 //Small sorts go directly to insertion sort
541 if(len <= size_type(AdaptiveSortInsertionSortThreshold)){
542 insertion_sort(first, first + len, comp);
543 }
544 else if((len-len/2) <= xbuf.capacity()){
545 merge_sort(first, first+len, comp, xbuf.data());
546 }
547 else{
548 //Make sure it is at least four
549 BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4);
550
551 size_type l_base = 0;
552 size_type l_intbuf = 0;
553 size_type n_keys = 0;
554 size_type l_build_buf = 0;
555
556 //Calculate and extract needed unique elements. If a minimum is not achieved
557 //fallback to a slow stable sort
558 if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){
559 stable_sort(first, first+len, comp, xbuf);
560 }
561 else{
562 BOOST_ASSERT(l_build_buf);
563 //Otherwise, continue the adaptive_sort
564 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n After collect_unique: ", len);
565 size_type const n_key_plus_buf = l_intbuf+n_keys;
566 //l_build_buf is always power of two if l_intbuf is zero
567 BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1))));
568
569 //Classic merge sort until internal buffer and xbuf are exhausted
570 size_type const l_merged = adaptive_sort_build_blocks
571 (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp);
572 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After build_blocks: ", len);
573
574 //Non-trivial merge
575 bool const buffer_right = adaptive_sort_combine_all_blocks
576 (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp);
577
578 //Sort keys and buffer and merge the whole sequence
579 adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
580 }
581 }
582 }
583
584 } //namespace detail_adaptive {
585
586 ///@endcond
587
588 //! <b>Effects</b>: Sorts the elements in the range [first, last) in ascending order according
589 //! to comparison functor "comp". The sort is stable (order of equal elements
590 //! is guaranteed to be preserved). Performance is improved if additional raw storage is
591 //! provided.
592 //!
593 //! <b>Requires</b>:
594 //! - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator.
595 //! - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible.
596 //!
597 //! <b>Parameters</b>:
598 //! - first, last: the range of elements to sort
599 //! - comp: comparison function object which returns true if the first argument is is ordered before the second.
600 //! - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len"
601 //! elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len
602 //! is ceil(std::distance(first, last)/2).
603 //!
604 //! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type
605 //! of dereferenced RandIt throws.
606 //!
607 //! <b>Complexity</b>: Always K x O(Nxlog(N)) comparisons and move assignments/constructors/swaps.
608 //! Comparisons are close to minimum even with no additional memory. Constant factor for data movement is minimized
609 //! when uninitialized_len is ceil(std::distance(first, last)/2). Pretty good enough performance is achieved when
610 //! ceil(sqrt(std::distance(first, last)))*2.
611 //!
612 //! <b>Caution</b>: Experimental implementation, not production-ready.
613 template<class RandIt, class RandRawIt, class Compare>
adaptive_sort(RandIt first,RandIt last,Compare comp,RandRawIt uninitialized,typename iterator_traits<RandIt>::size_type uninitialized_len)614 void adaptive_sort( RandIt first, RandIt last, Compare comp
615 , RandRawIt uninitialized
616 , typename iterator_traits<RandIt>::size_type uninitialized_len)
617 {
618 typedef typename iterator_traits<RandIt>::size_type size_type;
619 typedef typename iterator_traits<RandIt>::value_type value_type;
620
621 ::boost::movelib::adaptive_xbuf<value_type, RandRawIt, size_type> xbuf(uninitialized, uninitialized_len);
622 ::boost::movelib::detail_adaptive::adaptive_sort_impl(first, size_type(last - first), comp, xbuf);
623 }
624
625 template<class RandIt, class Compare>
adaptive_sort(RandIt first,RandIt last,Compare comp)626 void adaptive_sort( RandIt first, RandIt last, Compare comp)
627 {
628 typedef typename iterator_traits<RandIt>::value_type value_type;
629 adaptive_sort(first, last, comp, (value_type*)0, 0u);
630 }
631
632 } //namespace movelib {
633 } //namespace boost {
634
635 #include <boost/move/detail/config_end.hpp>
636
637 #endif //#define BOOST_MOVE_ADAPTIVE_SORT_HPP
638