• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //////////////////////////////////////////////////////////////////////////////
2 //
3 // (C) Copyright Ion Gaztanaga 2015-2016.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // See http://www.boost.org/libs/move for documentation.
9 //
10 //////////////////////////////////////////////////////////////////////////////
11 
12 #ifndef BOOST_MOVE_ADAPTIVE_MERGE_HPP
13 #define BOOST_MOVE_ADAPTIVE_MERGE_HPP
14 
15 #include <boost/move/detail/config_begin.hpp>
16 #include <boost/move/algo/detail/adaptive_sort_merge.hpp>
17 
18 namespace boost {
19 namespace movelib {
20 
21 ///@cond
22 namespace detail_adaptive {
23 
24 template<class RandIt, class Compare, class XBuf>
adaptive_merge_combine_blocks(RandIt first,typename iterator_traits<RandIt>::size_type len1,typename iterator_traits<RandIt>::size_type len2,typename iterator_traits<RandIt>::size_type collected,typename iterator_traits<RandIt>::size_type n_keys,typename iterator_traits<RandIt>::size_type l_block,bool use_internal_buf,bool xbuf_used,Compare comp,XBuf & xbuf)25 inline void adaptive_merge_combine_blocks( RandIt first
26                                       , typename iterator_traits<RandIt>::size_type len1
27                                       , typename iterator_traits<RandIt>::size_type len2
28                                       , typename iterator_traits<RandIt>::size_type collected
29                                       , typename iterator_traits<RandIt>::size_type n_keys
30                                       , typename iterator_traits<RandIt>::size_type l_block
31                                       , bool use_internal_buf
32                                       , bool xbuf_used
33                                       , Compare comp
34                                       , XBuf & xbuf
35                                       )
36 {
37    typedef typename iterator_traits<RandIt>::size_type size_type;
38    size_type const len = len1+len2;
39    size_type const l_combine  = len-collected;
40    size_type const l_combine1 = len1-collected;
41 
42     if(n_keys){
43       RandIt const first_data = first+collected;
44       RandIt const keys = first;
45       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combine: ", len);
46       if(xbuf_used){
47          if(xbuf.size() < l_block){
48             xbuf.initialize_until(l_block, *first);
49          }
50          BOOST_ASSERT(xbuf.size() >= l_block);
51          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
52          combine_params( keys, comp, l_combine
53                            , l_combine1, l_block, xbuf
54                            , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
55          op_merge_blocks_with_buf
56             (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
57          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A mrg xbf: ", len);
58       }
59       else{
60          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
61          combine_params( keys, comp, l_combine
62                            , l_combine1, l_block, xbuf
63                            , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
64          if(use_internal_buf){
65             op_merge_blocks_with_buf
66                (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), first_data-l_block);
67             BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A mrg buf: ", len);
68          }
69          else{
70             merge_blocks_bufferless
71                (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp);
72             BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A mrg nbf: ", len);
73          }
74       }
75    }
76    else{
77       xbuf.shrink_to_fit(l_block);
78       if(xbuf.size() < l_block){
79          xbuf.initialize_until(l_block, *first);
80       }
81       size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block);
82       size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
83       combine_params( uint_keys, less(), l_combine
84                      , l_combine1, l_block, xbuf
85                      , n_block_a, n_block_b, l_irreg1, l_irreg2, true);   //Outputs
86       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combine: ", len);
87       BOOST_ASSERT(xbuf.size() >= l_block);
88       op_merge_blocks_with_buf
89          (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
90       xbuf.clear();
91       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A mrg buf: ", len);
92    }
93 }
94 
95 template<class RandIt, class Compare, class XBuf>
adaptive_merge_final_merge(RandIt first,typename iterator_traits<RandIt>::size_type len1,typename iterator_traits<RandIt>::size_type len2,typename iterator_traits<RandIt>::size_type collected,typename iterator_traits<RandIt>::size_type l_intbuf,typename iterator_traits<RandIt>::size_type l_block,bool use_internal_buf,bool xbuf_used,Compare comp,XBuf & xbuf)96 inline void adaptive_merge_final_merge( RandIt first
97                                       , typename iterator_traits<RandIt>::size_type len1
98                                       , typename iterator_traits<RandIt>::size_type len2
99                                       , typename iterator_traits<RandIt>::size_type collected
100                                       , typename iterator_traits<RandIt>::size_type l_intbuf
101                                       , typename iterator_traits<RandIt>::size_type l_block
102                                       , bool use_internal_buf
103                                       , bool xbuf_used
104                                       , Compare comp
105                                       , XBuf & xbuf
106                                       )
107 {
108    typedef typename iterator_traits<RandIt>::size_type size_type;
109    (void)l_block;
110    (void)use_internal_buf;
111    size_type n_keys = collected-l_intbuf;
112    size_type len = len1+len2;
113    if (!xbuf_used || n_keys) {
114       xbuf.clear();
115       const size_type middle = xbuf_used && n_keys ? n_keys: collected;
116       unstable_sort(first, first + middle, comp, xbuf);
117       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A k/b srt: ", len);
118       stable_merge(first, first + middle, first + len, comp, xbuf);
119    }
120    BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A fin mrg: ", len);
121 }
122 
123 template<class SizeType>
adaptive_merge_n_keys_without_external_keys(SizeType l_block,SizeType len1,SizeType len2,SizeType l_intbuf)124 inline static SizeType adaptive_merge_n_keys_without_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf)
125 {
126    typedef SizeType size_type;
127    //This is the minimum number of keys to implement the ideal algorithm
128    size_type n_keys = len1/l_block+len2/l_block;
129    const size_type second_half_blocks = len2/l_block;
130    const size_type first_half_aux = len1-l_intbuf;
131    while(n_keys >= ((first_half_aux-n_keys)/l_block + second_half_blocks)){
132       --n_keys;
133    }
134    ++n_keys;
135    return n_keys;
136 }
137 
138 template<class SizeType>
adaptive_merge_n_keys_with_external_keys(SizeType l_block,SizeType len1,SizeType len2,SizeType l_intbuf)139 inline static SizeType adaptive_merge_n_keys_with_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf)
140 {
141    typedef SizeType size_type;
142    //This is the minimum number of keys to implement the ideal algorithm
143    size_type n_keys = (len1-l_intbuf)/l_block + len2/l_block;
144    return n_keys;
145 }
146 
147 template<class SizeType, class Xbuf>
adaptive_merge_n_keys_intbuf(SizeType & rl_block,SizeType len1,SizeType len2,Xbuf & xbuf,SizeType & l_intbuf_inout)148 inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout)
149 {
150    typedef SizeType size_type;
151    size_type l_block = rl_block;
152    size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block;
153 
154    if (xbuf.capacity() > l_block){
155       l_block = xbuf.capacity();
156    }
157 
158    //This is the minimum number of keys to implement the ideal algorithm
159    size_type n_keys = adaptive_merge_n_keys_without_external_keys(l_block, len1, len2, l_intbuf);
160    BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block));
161 
162    if(xbuf.template supports_aligned_trailing<size_type>
163       ( l_block
164       , adaptive_merge_n_keys_with_external_keys(l_block, len1, len2, l_intbuf)))
165    {
166       n_keys = 0u;
167    }
168    l_intbuf_inout = l_intbuf;
169    rl_block = l_block;
170    return n_keys;
171 }
172 
173 // Main explanation of the merge algorithm.
174 //
175 // csqrtlen = ceil(sqrt(len));
176 //
177 // * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect
178 //   unique elements are extracted from elements to be sorted and placed in the beginning of the range.
179 //
180 // * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements
181 //   are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements.
182 //
183 //   Explanation of the "combine_blocks" step:
184 //
185 //         * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements.
186 //           Remaining elements that can't form a group are grouped in front of those elements.
187 //         * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements.
188 //           Remaining elements that can't form a group are grouped in the back of those elements.
189 //         * In parallel the following two steps are performed:
190 //             *  Groups are selection-sorted by first or last element (depending whether they are going
191 //                to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
192 //             * Elements of each block pair are merged using the csqrtlen buffer taking into account
193 //                if they belong to the first half or second half (marked by the key).
194 //
195 // * In the final merge step leading "to_collect" elements are merged with rotations
196 //   with the rest of merged elements in the "combine_blocks" step.
197 //
198 // Corner cases:
199 //
200 // * If no "to_collect" elements can be extracted:
201 //
202 //    * If more than a minimum number of elements is extracted
203 //      then reduces the number of elements used as buffer and keys in the
204 //      and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
205 //      then uses a rotation based smart merge.
206 //
207 //    * If the minimum number of keys can't be extracted, a rotation-based merge is performed.
208 //
209 // * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed.
210 //
211 // * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed.
212 //
213 // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
214 //   then no csqrtlen need to be extracted and "combine_blocks" will use integral
215 //   keys to combine blocks.
216 template<class RandIt, class Compare, class XBuf>
adaptive_merge_impl(RandIt first,typename iterator_traits<RandIt>::size_type len1,typename iterator_traits<RandIt>::size_type len2,Compare comp,XBuf & xbuf)217 void adaptive_merge_impl
218    ( RandIt first
219    , typename iterator_traits<RandIt>::size_type len1
220    , typename iterator_traits<RandIt>::size_type len2
221    , Compare comp
222    , XBuf & xbuf
223    )
224 {
225    typedef typename iterator_traits<RandIt>::size_type size_type;
226 
227    if(xbuf.capacity() >= min_value<size_type>(len1, len2)){
228       buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf);
229    }
230    else{
231       const size_type len = len1+len2;
232       //Calculate ideal parameters and try to collect needed unique keys
233       size_type l_block = size_type(ceil_sqrt(len));
234 
235       //One range is not big enough to extract keys and the internal buffer so a
236       //rotation-based based merge will do just fine
237       if(len1 <= l_block*2 || len2 <= l_block*2){
238          merge_bufferless(first, first+len1, first+len1+len2, comp);
239          return;
240       }
241 
242       //Detail the number of keys and internal buffer. If xbuf has enough memory, no
243       //internal buffer is needed so l_intbuf will remain 0.
244       size_type l_intbuf = 0;
245       size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf);
246       size_type const to_collect = l_intbuf+n_keys;
247       //Try to extract needed unique values from the first range
248       size_type const collected  = collect_unique(first, first+len1, to_collect, comp, xbuf);
249       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n   A collect: ", len);
250 
251       //Not the minimum number of keys is not available on the first range, so fallback to rotations
252       if(collected != to_collect && collected < 4){
253          merge_bufferless(first, first+collected, first+len1, comp);
254          merge_bufferless(first, first + len1, first + len1 + len2, comp);
255          return;
256       }
257 
258       //If not enough keys but more than minimum, adjust the internal buffer and key count
259       bool use_internal_buf = collected == to_collect;
260       if (!use_internal_buf){
261          l_intbuf = 0u;
262          n_keys = collected;
263          l_block  = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf);
264          //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used
265          l_intbuf = use_internal_buf ? l_block : 0u;
266       }
267 
268       bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block;
269       //Merge trailing elements using smart merges
270       adaptive_merge_combine_blocks(first, len1, len2, collected,   n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf);
271       //Merge buffer and keys with the rest of the values
272       adaptive_merge_final_merge   (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf);
273    }
274 }
275 
276 }  //namespace detail_adaptive {
277 
278 ///@endcond
279 
280 //! <b>Effects</b>: Merges two consecutive sorted ranges [first, middle) and [middle, last)
281 //!   into one sorted range [first, last) according to the given comparison function comp.
282 //!   The algorithm is stable (if there are equivalent elements in the original two ranges,
283 //!   the elements from the first range (preserving their original order) precede the elements
284 //!   from the second range (preserving their original order).
285 //!
286 //! <b>Requires</b>:
287 //!   - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator.
288 //!   - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible.
289 //!
290 //! <b>Parameters</b>:
291 //!   - first: the beginning of the first sorted range.
292 //!   - middle: the end of the first sorted range and the beginning of the second
293 //!   - last: the end of the second sorted range
294 //!   - comp: comparison function object which returns true if the first argument is is ordered before the second.
295 //!   - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len"
296 //!      elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len
297 //!      is min(std::distance(first, middle), std::distance(middle, last)).
298 //!
299 //! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type
300 //!   of dereferenced RandIt throws.
301 //!
302 //! <b>Complexity</b>: Always K x O(N) comparisons and move assignments/constructors/swaps.
303 //!   Constant factor for comparisons and data movement is minimized when uninitialized_len
304 //!   is min(std::distance(first, middle), std::distance(middle, last)).
305 //!   Pretty good enough performance is achieved when uninitialized_len is
306 //!   ceil(sqrt(std::distance(first, last)))*2.
307 //!
308 //! <b>Caution</b>: Experimental implementation, not production-ready.
309 template<class RandIt, class Compare>
adaptive_merge(RandIt first,RandIt middle,RandIt last,Compare comp,typename iterator_traits<RandIt>::value_type * uninitialized=0,typename iterator_traits<RandIt>::size_type uninitialized_len=0)310 void adaptive_merge( RandIt first, RandIt middle, RandIt last, Compare comp
311                 , typename iterator_traits<RandIt>::value_type* uninitialized = 0
312                 , typename iterator_traits<RandIt>::size_type uninitialized_len = 0)
313 {
314    typedef typename iterator_traits<RandIt>::size_type  size_type;
315    typedef typename iterator_traits<RandIt>::value_type value_type;
316 
317    if (first == middle || middle == last){
318       return;
319    }
320 
321    //Reduce ranges to merge if possible
322    do {
323       if (comp(*middle, *first)){
324          break;
325       }
326       ++first;
327       if (first == middle)
328          return;
329    } while(1);
330 
331    RandIt first_high(middle);
332    --first_high;
333    do {
334       --last;
335       if (comp(*last, *first_high)){
336          ++last;
337          break;
338       }
339       if (last == middle)
340          return;
341    } while(1);
342 
343    ::boost::movelib::adaptive_xbuf<value_type, value_type*, size_type> xbuf(uninitialized, size_type(uninitialized_len));
344    ::boost::movelib::detail_adaptive::adaptive_merge_impl(first, size_type(middle - first), size_type(last - middle), comp, xbuf);
345 }
346 
347 }  //namespace movelib {
348 }  //namespace boost {
349 
350 #include <boost/move/detail/config_end.hpp>
351 
352 #endif   //#define BOOST_MOVE_ADAPTIVE_MERGE_HPP
353