1 //----------------------------------------------------------------------------
2 /// @file sample_sort.hpp
3 /// @brief contains the class sample_sort
4 ///
5 /// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
6 /// Distributed under the Boost Software License, Version 1.0.\n
7 /// ( See accompanying file LICENSE_1_0.txt or copy at
8 /// http://www.boost.org/LICENSE_1_0.txt )
9 /// @version 0.1
10 ///
11 /// @remarks
12 //-----------------------------------------------------------------------------
13 #ifndef __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
14 #define __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
15
16 #include <functional>
17 #include <future>
18 #include <iterator>
19 #include <memory>
20 #include <type_traits>
21 #include <vector>
22
23 #include <algorithm>
24 #include <boost/sort/spinsort/spinsort.hpp>
25 #include <boost/sort/common/indirect.hpp>
26 #include <boost/sort/common/util/atomic.hpp>
27 #include <boost/sort/common/merge_four.hpp>
28 #include <boost/sort/common/merge_vector.hpp>
29 #include <boost/sort/common/range.hpp>
30
31 namespace boost
32 {
33 namespace sort
34 {
35 namespace sample_detail
36 {
37 //---------------------------------------------------------------------------
38 // USING SENTENCES
39 //---------------------------------------------------------------------------
40 namespace bsc = boost::sort::common;
41 namespace bss = boost::sort::spin_detail;
42 namespace bscu = boost::sort::common::util;
43 using bsc::range;
44 using bscu::atomic_add;
45 using bsc::merge_vector4;
46 using bsc::uninit_merge_level4;
47 using bsc::less_ptr_no_null;
48
49 //
50 ///---------------------------------------------------------------------------
51 /// @struct sample_sort
52 /// @brief This a structure for to implement a sample sort, exception
53 /// safe
54 /// @tparam
55 /// @remarks
56 //----------------------------------------------------------------------------
57 template<class Iter_t, class Compare>
58 struct sample_sort
59 {
60 //------------------------------------------------------------------------
61 // DEFINITIONS
62 //------------------------------------------------------------------------
63 typedef value_iter<Iter_t> value_t;
64 typedef range<Iter_t> range_it;
65 typedef range<value_t *> range_buf;
66 typedef sample_sort<Iter_t, Compare> this_t;
67
68 //------------------------------------------------------------------------
69 // VARIABLES AND CONSTANTS
70 //------------------------------------------------------------------------
71 // minimun numbers of elements for to be sortd in parallel mode
72 static const uint32_t thread_min = (1 << 16);
73
74 // Number of threads to use in the algorithm
75 // Number of intervals for to do the internal division of the data
76 uint32_t nthread, ninterval;
77
78 // Bool variables indicating if the auxiliary memory is constructed
79 // and indicating in the auxiliary memory had been obtained inside the
80 /// algorithm or had been received as a parameter
81 bool construct = false, owner = false;
82
83 // Comparison object for to compare two elements
84 Compare comp;
85
86 // Range with all the elements to sort
87 range_it global_range;
88
89 // range with the auxiliary memory
90 range_buf global_buf;
91
92 // vector of futures
93 std::vector<std::future<void>> vfuture;
94
95 // vector of vectors which contains the ranges to merge obtained in the
96 // subdivision
97 std::vector<std::vector<range_it>> vv_range_it;
98
99 // each vector of ranges of the vv_range_it, need their corresponding buffer
100 // for to do the merge
101 std::vector<std::vector<range_buf>> vv_range_buf;
102
103 // Initial vector of ranges
104 std::vector<range_it> vrange_it_ini;
105
106 // Initial vector of buffers
107 std::vector<range_buf> vrange_buf_ini;
108
109 // atomic counter for to know when are finished the function_t created
110 // inside a function
111 std::atomic<uint32_t> njob;
112
113 // Indicate if an error in the algorithm for to undo all
114 bool error;
115
116 //------------------------------------------------------------------------
117 // FUNCTIONS OF THE STRUCT
118 //------------------------------------------------------------------------
119 void initial_configuration(void);
120
121 sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
122 value_t *paux, size_t naux);
123
sample_sortboost::sort::sample_detail::sample_sort124 sample_sort(Iter_t first, Iter_t last)
125 : sample_sort (first, last, Compare(), std::thread::hardware_concurrency(),
126 nullptr, 0) { };
127
sample_sortboost::sort::sample_detail::sample_sort128 sample_sort(Iter_t first, Iter_t last, Compare cmp)
129 : sample_sort(first, last, cmp, std::thread::hardware_concurrency(),
130 nullptr, 0) { };
131
sample_sortboost::sort::sample_detail::sample_sort132 sample_sort(Iter_t first, Iter_t last, uint32_t num_thread)
133 : sample_sort(first, last, Compare(), num_thread, nullptr, 0) { };
134
sample_sortboost::sort::sample_detail::sample_sort135 sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread)
136 : sample_sort(first, last, cmp, num_thread, nullptr, 0) { };
137
sample_sortboost::sort::sample_detail::sample_sort138 sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
139 range_buf range_buf_initial)
140 : sample_sort(first, last, cmp, num_thread,
141 range_buf_initial.first, range_buf_initial.size()) { };
142
143 void destroy_all(void);
144 //
145 //-----------------------------------------------------------------------------
146 // function :~sample_sort
147 /// @brief destructor of the class. The utility is to destroy the temporary
148 /// buffer used in the sorting process
149 //-----------------------------------------------------------------------------
~sample_sortboost::sort::sample_detail::sample_sort150 ~sample_sort(void) { destroy_all(); };
151 //
152 //-----------------------------------------------------------------------
153 // function : execute first
154 /// @brief this a function to assign to each thread in the first merge
155 //-----------------------------------------------------------------------
execute_firstboost::sort::sample_detail::sample_sort156 void execute_first(void)
157 {
158 uint32_t job = 0;
159 while ((job = atomic_add(njob, 1)) < ninterval)
160 {
161 uninit_merge_level4(vrange_buf_ini[job], vv_range_it[job],
162 vv_range_buf[job], comp);
163 };
164 };
165 //
166 //-----------------------------------------------------------------------
167 // function : execute
168 /// @brief this is a function to assignt each thread the final merge
169 //-----------------------------------------------------------------------
executeboost::sort::sample_detail::sample_sort170 void execute(void)
171 {
172 uint32_t job = 0;
173 while ((job = atomic_add(njob, 1)) < ninterval)
174 {
175 merge_vector4(vrange_buf_ini[job], vrange_it_ini[job],
176 vv_range_buf[job], vv_range_it[job], comp);
177 };
178 };
179 //
180 //-----------------------------------------------------------------------
181 // function : first merge
182 /// @brief Implement the merge of the initially sparse ranges
183 //-----------------------------------------------------------------------
first_mergeboost::sort::sample_detail::sample_sort184 void first_merge(void)
185 { //---------------------------------- begin --------------------------
186 njob = 0;
187
188 for (uint32_t i = 0; i < nthread; ++i)
189 {
190 vfuture[i] = std::async(std::launch::async, &this_t::execute_first,
191 this);
192 };
193 for (uint32_t i = 0; i < nthread; ++i)
194 vfuture[i].get();
195 };
196 //
197 //-----------------------------------------------------------------------
198 // function : final merge
199 /// @brief Implement the final merge of the ranges
200 //-----------------------------------------------------------------------
final_mergeboost::sort::sample_detail::sample_sort201 void final_merge(void)
202 { //---------------------------------- begin --------------------------
203 njob = 0;
204
205 for (uint32_t i = 0; i < nthread; ++i)
206 {
207 vfuture[i] = std::async(std::launch::async, &this_t::execute, this);
208 };
209 for (uint32_t i = 0; i < nthread; ++i)
210 vfuture[i].get();
211 };
212 //----------------------------------------------------------------------------
213 };
214 // End class sample_sort
215 //----------------------------------------------------------------------------
216 //
217 //############################################################################
218 // ##
219 // N O N I N L I N E F U N C T I O N S ##
220 // ##
221 // ##
222 //############################################################################
223 //
224 //-----------------------------------------------------------------------------
225 // function : sample_sort
226 /// @brief constructor of the class
227 ///
228 /// @param first : iterator to the first element of the range to sort
229 /// @param last : iterator after the last element to the range to sort
230 /// @param cmp : object for to compare two elements pointed by Iter_t iterators
231 /// @param num_thread : Number of threads to use in the process. When this value
232 /// is lower than 2, the sorting is done with 1 thread
233 /// @param paux : pointer to the auxiliary memory. If nullptr, the memory is
234 /// created inside the class
235 /// @param naux : number of elements of the memory pointed by paux
236 //-----------------------------------------------------------------------------
237 template<class Iter_t, typename Compare>
238 sample_sort<Iter_t, Compare>
sample_sort(Iter_t first,Iter_t last,Compare cmp,uint32_t num_thread,value_t * paux,size_t naux)239 ::sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
240 value_t *paux, size_t naux)
241 : nthread(num_thread), owner(false), comp(cmp), global_range(first, last),
242 global_buf(nullptr, nullptr), error(false)
243 {
244 assert((last - first) >= 0);
245 size_t nelem = size_t(last - first);
246 construct = false;
247 njob = 0;
248 vfuture.resize(nthread);
249
250 // Adjust when have many threads and only a few elements
251 while (nelem > thread_min and (nthread * nthread) > (nelem >> 3))
252 {
253 nthread /= 2;
254 };
255 ninterval = (nthread << 3);
256
257 if (nthread < 2 or nelem <= (thread_min))
258 {
259 bss::spinsort<Iter_t, Compare>(first, last, comp);
260 return;
261 };
262
263 //------------------- check if sort --------------------------------------
264 bool sw = true;
265 for (Iter_t it1 = first, it2 = first + 1;
266 it2 != last and (sw = not comp(*it2, *it1)); it1 = it2++);
267 if (sw) return;
268
269 //------------------- check if reverse sort ---------------------------
270 sw = true;
271 for (Iter_t it1 = first, it2 = first + 1;
272 it2 != last and (sw = comp(*it2, *it1)); it1 = it2++);
273 if (sw)
274 {
275 size_t nelem2 = nelem >> 1;
276 Iter_t it1 = first, it2 = last - 1;
277 for (size_t i = 0; i < nelem2; ++i)
278 std::swap(*(it1++), *(it2--));
279 return;
280 };
281
282 if (paux != nullptr)
283 {
284 assert(naux != 0);
285 global_buf.first = paux;
286 global_buf.last = paux + naux;
287 owner = false;
288 }
289 else
290 {
291 value_t *ptr = std::get_temporary_buffer<value_t>(nelem).first;
292 if (ptr == nullptr) throw std::bad_alloc();
293 owner = true;
294 global_buf = range_buf(ptr, ptr + nelem);
295 };
296 //------------------------------------------------------------------------
297 // PROCESS
298 //------------------------------------------------------------------------
299 try
300 {
301 initial_configuration();
302 } catch (std::bad_alloc &)
303 {
304 error = true;
305 };
306 if (not error)
307 {
308 first_merge();
309 construct = true;
310 final_merge();
311 };
312 if (error)
313 {
314 destroy_all();
315 throw std::bad_alloc();
316 };
317 }
318 ;
319 //
320 //-----------------------------------------------------------------------------
321 // function : destroy_all
322 /// @brief destructor of the class. The utility is to destroy the temporary
323 /// buffer used in the sorting process
324 //-----------------------------------------------------------------------------
325 template<class Iter_t, typename Compare>
destroy_all(void)326 void sample_sort<Iter_t, Compare>::destroy_all(void)
327 {
328 if (construct)
329 {
330 destroy(global_buf);
331 construct = false;
332 }
333 if (global_buf.first != nullptr and owner)
334 std::return_temporary_buffer(global_buf.first);
335 }
336 //
337 //-----------------------------------------------------------------------------
338 // function : initial_configuration
339 /// @brief Create the internal data structures, and obtain the inital set of
340 /// ranges to merge
341 //-----------------------------------------------------------------------------
342 template<class Iter_t, typename Compare>
initial_configuration(void)343 void sample_sort<Iter_t, Compare>::initial_configuration(void)
344 {
345 std::vector<range_it> vmem_thread;
346 std::vector<range_buf> vbuf_thread;
347 size_t nelem = global_range.size();
348
349 //------------------------------------------------------------------------
350 size_t cupo = nelem / nthread;
351 Iter_t it_first = global_range.first;
352 value_t *buf_first = global_buf.first;
353 vmem_thread.reserve(nthread + 1);
354 vbuf_thread.reserve(nthread + 1);
355
356 for (uint32_t i = 0; i < (nthread - 1); ++i, it_first += cupo, buf_first +=
357 cupo)
358 {
359 vmem_thread.emplace_back(it_first, it_first + cupo);
360 vbuf_thread.emplace_back(buf_first, buf_first + cupo);
361 };
362
363 vmem_thread.emplace_back(it_first, global_range.last);
364 vbuf_thread.emplace_back(buf_first, global_buf.last);
365
366 //------------------------------------------------------------------------
367 // Sorting of the ranges
368 //------------------------------------------------------------------------
369 std::vector<std::future<void>> vfuture(nthread);
370
371 for (uint32_t i = 0; i < nthread; ++i)
372 {
373 auto func = [=]()
374 {
375 bss::spinsort<Iter_t, Compare> (vmem_thread[i].first,
376 vmem_thread[i].last, comp,
377 vbuf_thread[i]);
378 };
379 vfuture[i] = std::async(std::launch::async, func);
380 };
381
382 for (uint32_t i = 0; i < nthread; ++i)
383 vfuture[i].get();
384
385 //------------------------------------------------------------------------
386 // Obtain the vector of milestones
387 //------------------------------------------------------------------------
388 std::vector<Iter_t> vsample;
389 vsample.reserve(nthread * (ninterval - 1));
390
391 for (uint32_t i = 0; i < nthread; ++i)
392 {
393 size_t distance = vmem_thread[i].size() / ninterval;
394 for (size_t j = 1, pos = distance; j < ninterval; ++j, pos += distance)
395 {
396 vsample.push_back(vmem_thread[i].first + pos);
397 };
398 };
399 typedef less_ptr_no_null<Iter_t, Compare> compare_ptr;
400 typedef typename std::vector<Iter_t>::iterator it_to_it;
401
402 bss::spinsort<it_to_it, compare_ptr>(vsample.begin(), vsample.end(),
403 compare_ptr(comp));
404
405 //------------------------------------------------------------------------
406 // Create the final milestone vector
407 //------------------------------------------------------------------------
408 std::vector<Iter_t> vmilestone;
409 vmilestone.reserve(ninterval);
410
411 for (uint32_t pos = nthread >> 1; pos < vsample.size(); pos += nthread)
412 {
413 vmilestone.push_back(vsample[pos]);
414 };
415
416 //------------------------------------------------------------------------
417 // Creation of the first vector of ranges
418 //------------------------------------------------------------------------
419 std::vector<std::vector<range<Iter_t>>>vv_range_first (nthread);
420
421 for (uint32_t i = 0; i < nthread; ++i)
422 {
423 Iter_t itaux = vmem_thread[i].first;
424
425 for (uint32_t k = 0; k < (ninterval - 1); ++k)
426 {
427 Iter_t it2 = std::upper_bound(itaux, vmem_thread[i].last,
428 *vmilestone[k], comp);
429
430 vv_range_first[i].emplace_back(itaux, it2);
431 itaux = it2;
432 };
433 vv_range_first[i].emplace_back(itaux, vmem_thread[i].last);
434 };
435
436 //------------------------------------------------------------------------
437 // Copy in buffer and creation of the final matrix of ranges
438 //------------------------------------------------------------------------
439 vv_range_it.resize(ninterval);
440 vv_range_buf.resize(ninterval);
441 vrange_it_ini.reserve(ninterval);
442 vrange_buf_ini.reserve(ninterval);
443
444 for (uint32_t i = 0; i < ninterval; ++i)
445 {
446 vv_range_it[i].reserve(nthread);
447 vv_range_buf[i].reserve(nthread);
448 };
449
450 Iter_t it = global_range.first;
451 value_t *it_buf = global_buf.first;
452
453 for (uint32_t k = 0; k < ninterval; ++k)
454 {
455 size_t nelem_interval = 0;
456
457 for (uint32_t i = 0; i < nthread; ++i)
458 {
459 size_t nelem_range = vv_range_first[i][k].size();
460 if (nelem_range != 0)
461 {
462 vv_range_it[k].push_back(vv_range_first[i][k]);
463 };
464 nelem_interval += nelem_range;
465 };
466
467 vrange_it_ini.emplace_back(it, it + nelem_interval);
468 vrange_buf_ini.emplace_back(it_buf, it_buf + nelem_interval);
469
470 it += nelem_interval;
471 it_buf += nelem_interval;
472 };
473 }
474 ;
475 //
476 //****************************************************************************
477 }
478 ;
479 // End namespace sample_detail
480 //****************************************************************************
481 //
482 namespace bscu = boost::sort::common::util;
483 //
484 //############################################################################
485 // ##
486 // ##
487 // S A M P L E _ S O R T ##
488 // ##
489 // ##
490 //############################################################################
491 //
492 //-----------------------------------------------------------------------------
493 // function : sample_sort
494 /// @brief parallel sample sort algorithm (stable sort)
495 ///
496 /// @param first : iterator to the first element of the range to sort
497 /// @param last : iterator after the last element to the range to sort
498 //-----------------------------------------------------------------------------
499 template<class Iter_t>
sample_sort(Iter_t first,Iter_t last)500 void sample_sort(Iter_t first, Iter_t last)
501 {
502 typedef compare_iter<Iter_t> Compare;
503 sample_detail::sample_sort<Iter_t, Compare>(first, last);
504 };
505 //
506 //-----------------------------------------------------------------------------
507 // function : sample_sort
508 /// @brief parallel sample sort algorithm (stable sort)
509 ///
510 /// @param first : iterator to the first element of the range to sort
511 /// @param last : iterator after the last element to the range to sort
512 /// @param nthread : Number of threads to use in the process. When this value
513 /// is lower than 2, the sorting is done with 1 thread
514 //-----------------------------------------------------------------------------
515 template<class Iter_t>
sample_sort(Iter_t first,Iter_t last,uint32_t nthread)516 void sample_sort(Iter_t first, Iter_t last, uint32_t nthread)
517 {
518 typedef compare_iter<Iter_t> Compare;
519 sample_detail::sample_sort<Iter_t, Compare>(first, last, nthread);
520 };
521 //
522 //-----------------------------------------------------------------------------
523 // function : sample_sort
524 /// @brief parallel sample sort algorithm (stable sort)
525 ///
526 /// @param first : iterator to the first element of the range to sort
527 /// @param last : iterator after the last element to the range to sort
528 /// @param comp : object for to compare two elements pointed by Iter_t
529 /// iterators
530 //-----------------------------------------------------------------------------
531 template<class Iter_t, class Compare, bscu::enable_if_not_integral<Compare> * =
532 nullptr>
sample_sort(Iter_t first,Iter_t last,Compare comp)533 void sample_sort(Iter_t first, Iter_t last, Compare comp)
534 {
535 sample_detail::sample_sort<Iter_t, Compare>(first, last, comp);
536 };
537 //
538 //-----------------------------------------------------------------------------
539 // function : sample_sort
540 /// @brief parallel sample sort algorithm (stable sort)
541 ///
542 /// @param first : iterator to the first element of the range to sort
543 /// @param last : iterator after the last element to the range to sort
544 /// @param comp : object for to compare two elements pointed by Iter_t
545 /// iterators
546 /// @param nthread : Number of threads to use in the process. When this value
547 /// is lower than 2, the sorting is done with 1 thread
548 //-----------------------------------------------------------------------------
549 template<class Iter_t, class Compare>
sample_sort(Iter_t first,Iter_t last,Compare comp,uint32_t nthread)550 void sample_sort(Iter_t first, Iter_t last, Compare comp, uint32_t nthread)
551 {
552 sample_detail::sample_sort<Iter_t, Compare>(first, last, comp, nthread);
553 };
554 //
555 //****************************************************************************
556 };// End namespace sort
557 };// End namespace boost
558 //****************************************************************************
559 //
560 #endif
561