• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //----------------------------------------------------------------------------
2 /// @file sample_sort.hpp
3 /// @brief contains the class sample_sort
4 ///
5 /// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
6 ///         Distributed under the Boost Software License, Version 1.0.\n
7 ///         ( See accompanying file LICENSE_1_0.txt or copy at
8 ///           http://www.boost.org/LICENSE_1_0.txt  )
9 /// @version 0.1
10 ///
11 /// @remarks
12 //-----------------------------------------------------------------------------
13 #ifndef __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
14 #define __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
15 
16 #include <functional>
17 #include <future>
18 #include <iterator>
19 #include <memory>
20 #include <type_traits>
21 #include <vector>
22 
23 #include <algorithm>
24 #include <boost/sort/spinsort/spinsort.hpp>
25 #include <boost/sort/common/indirect.hpp>
26 #include <boost/sort/common/util/atomic.hpp>
27 #include <boost/sort/common/merge_four.hpp>
28 #include <boost/sort/common/merge_vector.hpp>
29 #include <boost/sort/common/range.hpp>
30 
31 namespace boost
32 {
33 namespace sort
34 {
35 namespace sample_detail
36 {
37 //---------------------------------------------------------------------------
38 //                    USING SENTENCES
39 //---------------------------------------------------------------------------
40 namespace bsc = boost::sort::common;
41 namespace bss = boost::sort::spin_detail;
42 namespace bscu = boost::sort::common::util;
43 using bsc::range;
44 using bscu::atomic_add;
45 using bsc::merge_vector4;
46 using bsc::uninit_merge_level4;
47 using bsc::less_ptr_no_null;
48 
49 //
50 ///---------------------------------------------------------------------------
51 /// @struct sample_sort
52 /// @brief This a structure for to implement a sample sort, exception
53 ///        safe
54 /// @tparam
55 /// @remarks
56 //----------------------------------------------------------------------------
57 template<class Iter_t, class Compare>
58 struct sample_sort
59 {
60     //------------------------------------------------------------------------
61     //                     DEFINITIONS
62     //------------------------------------------------------------------------
63     typedef value_iter<Iter_t> value_t;
64     typedef range<Iter_t> range_it;
65     typedef range<value_t *> range_buf;
66     typedef sample_sort<Iter_t, Compare> this_t;
67 
68     //------------------------------------------------------------------------
69     //                VARIABLES AND CONSTANTS
70     //------------------------------------------------------------------------
71     // minimun numbers of elements for to be sortd in parallel mode
72     static const uint32_t thread_min = (1 << 16);
73 
74     // Number of threads to use in the algorithm
75     // Number of intervals for to do the internal division of the data
76     uint32_t nthread, ninterval;
77 
78     // Bool variables indicating if the auxiliary memory is constructed
79     // and indicating in the auxiliary memory had been obtained inside the
80     /// algorithm or had been received as a parameter
81     bool construct = false, owner = false;
82 
83     // Comparison object for to compare two elements
84     Compare comp;
85 
86     // Range with all the elements to sort
87     range_it global_range;
88 
89     // range with the auxiliary memory
90     range_buf global_buf;
91 
92     // vector of futures
93     std::vector<std::future<void>> vfuture;
94 
95     // vector of vectors which contains the ranges to merge obtained in the
96     // subdivision
97     std::vector<std::vector<range_it>> vv_range_it;
98 
99     // each vector of ranges of the vv_range_it, need their corresponding buffer
100     // for to do the merge
101     std::vector<std::vector<range_buf>> vv_range_buf;
102 
103     // Initial vector of ranges
104     std::vector<range_it> vrange_it_ini;
105 
106     // Initial vector of buffers
107     std::vector<range_buf> vrange_buf_ini;
108 
109     // atomic counter for to know when are finished the function_t created
110     // inside a function
111     std::atomic<uint32_t> njob;
112 
113     // Indicate if an error in the algorithm for to undo all
114     bool error;
115 
116     //------------------------------------------------------------------------
117     //                       FUNCTIONS OF THE STRUCT
118     //------------------------------------------------------------------------
119     void initial_configuration(void);
120 
121     sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
122                  value_t *paux, size_t naux);
123 
sample_sortboost::sort::sample_detail::sample_sort124     sample_sort(Iter_t first, Iter_t last)
125     : sample_sort (first, last, Compare(), std::thread::hardware_concurrency(),
126                    nullptr, 0) { };
127 
sample_sortboost::sort::sample_detail::sample_sort128     sample_sort(Iter_t first, Iter_t last, Compare cmp)
129     : sample_sort(first, last, cmp, std::thread::hardware_concurrency(),
130                   nullptr, 0) { };
131 
sample_sortboost::sort::sample_detail::sample_sort132     sample_sort(Iter_t first, Iter_t last, uint32_t num_thread)
133     : sample_sort(first, last, Compare(), num_thread, nullptr, 0) { };
134 
sample_sortboost::sort::sample_detail::sample_sort135     sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread)
136     : sample_sort(first, last, cmp, num_thread, nullptr, 0) { };
137 
sample_sortboost::sort::sample_detail::sample_sort138     sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
139                 range_buf range_buf_initial)
140     : sample_sort(first, last, cmp, num_thread,
141                   range_buf_initial.first, range_buf_initial.size()) { };
142 
143     void destroy_all(void);
144     //
145     //-----------------------------------------------------------------------------
146     //  function :~sample_sort
147     /// @brief destructor of the class. The utility is to destroy the temporary
148     ///        buffer used in the sorting process
149     //-----------------------------------------------------------------------------
~sample_sortboost::sort::sample_detail::sample_sort150     ~sample_sort(void) { destroy_all(); };
151     //
152     //-----------------------------------------------------------------------
153     //  function : execute first
154     /// @brief this a function to assign to each thread in the first merge
155     //-----------------------------------------------------------------------
execute_firstboost::sort::sample_detail::sample_sort156     void execute_first(void)
157     {
158         uint32_t job = 0;
159         while ((job = atomic_add(njob, 1)) < ninterval)
160         {
161             uninit_merge_level4(vrange_buf_ini[job], vv_range_it[job],
162                             vv_range_buf[job], comp);
163         };
164     };
165     //
166     //-----------------------------------------------------------------------
167     //  function : execute
168     /// @brief this is a function to assignt each thread the final merge
169     //-----------------------------------------------------------------------
executeboost::sort::sample_detail::sample_sort170     void execute(void)
171     {
172         uint32_t job = 0;
173         while ((job = atomic_add(njob, 1)) < ninterval)
174         {
175             merge_vector4(vrange_buf_ini[job], vrange_it_ini[job],
176                             vv_range_buf[job], vv_range_it[job], comp);
177         };
178     };
179     //
180     //-----------------------------------------------------------------------
181     //  function : first merge
182     /// @brief Implement the merge of the initially sparse ranges
183     //-----------------------------------------------------------------------
first_mergeboost::sort::sample_detail::sample_sort184     void first_merge(void)
185     { //---------------------------------- begin --------------------------
186         njob = 0;
187 
188         for (uint32_t i = 0; i < nthread; ++i)
189         {
190             vfuture[i] = std::async(std::launch::async, &this_t::execute_first,
191                             this);
192         };
193         for (uint32_t i = 0; i < nthread; ++i)
194             vfuture[i].get();
195     };
196     //
197     //-----------------------------------------------------------------------
198     //  function : final merge
199     /// @brief Implement the final merge of the ranges
200     //-----------------------------------------------------------------------
final_mergeboost::sort::sample_detail::sample_sort201     void final_merge(void)
202     { //---------------------------------- begin --------------------------
203         njob = 0;
204 
205         for (uint32_t i = 0; i < nthread; ++i)
206         {
207             vfuture[i] = std::async(std::launch::async, &this_t::execute, this);
208         };
209         for (uint32_t i = 0; i < nthread; ++i)
210             vfuture[i].get();
211     };
212     //----------------------------------------------------------------------------
213 };
214 //                    End class sample_sort
215 //----------------------------------------------------------------------------
216 //
217 //############################################################################
218 //                                                                          ##
219 //              N O N    I N L I N E      F U N C T I O N S                 ##
220 //                                                                          ##
221 //                                                                          ##
222 //############################################################################
223 //
224 //-----------------------------------------------------------------------------
225 //  function : sample_sort
226 /// @brief constructor of the class
227 ///
228 /// @param first : iterator to the first element of the range to sort
229 /// @param last : iterator after the last element to the range to sort
230 /// @param cmp : object for to compare two elements pointed by Iter_t iterators
231 /// @param num_thread : Number of threads to use in the process. When this value
232 ///                     is lower than 2, the sorting is done with 1 thread
233 /// @param paux : pointer to the auxiliary memory. If nullptr, the memory is
234 ///               created inside the class
235 /// @param naux : number of elements of the memory pointed by paux
236 //-----------------------------------------------------------------------------
237 template<class Iter_t, typename Compare>
238 sample_sort<Iter_t, Compare>
sample_sort(Iter_t first,Iter_t last,Compare cmp,uint32_t num_thread,value_t * paux,size_t naux)239 ::sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
240                value_t *paux, size_t naux)
241 : nthread(num_thread), owner(false), comp(cmp), global_range(first, last),
242   global_buf(nullptr, nullptr), error(false)
243 {
244     assert((last - first) >= 0);
245     size_t nelem = size_t(last - first);
246     construct = false;
247     njob = 0;
248     vfuture.resize(nthread);
249 
250     // Adjust when have many threads and only a few elements
251     while (nelem > thread_min and (nthread * nthread) > (nelem >> 3))
252     {
253         nthread /= 2;
254     };
255     ninterval = (nthread << 3);
256 
257     if (nthread < 2 or nelem <= (thread_min))
258     {
259         bss::spinsort<Iter_t, Compare>(first, last, comp);
260         return;
261     };
262 
263     //------------------- check if sort --------------------------------------
264     bool sw = true;
265     for (Iter_t it1 = first, it2 = first + 1;
266                     it2 != last and (sw = not comp(*it2, *it1)); it1 = it2++);
267     if (sw) return;
268 
269     //------------------- check if reverse sort ---------------------------
270     sw = true;
271     for (Iter_t it1 = first, it2 = first + 1;
272                     it2 != last and (sw = comp(*it2, *it1)); it1 = it2++);
273     if (sw)
274     {
275         size_t nelem2 = nelem >> 1;
276         Iter_t it1 = first, it2 = last - 1;
277         for (size_t i = 0; i < nelem2; ++i)
278             std::swap(*(it1++), *(it2--));
279         return;
280     };
281 
282     if (paux != nullptr)
283     {
284         assert(naux != 0);
285         global_buf.first = paux;
286         global_buf.last = paux + naux;
287         owner = false;
288     }
289     else
290     {
291         value_t *ptr = std::get_temporary_buffer<value_t>(nelem).first;
292         if (ptr == nullptr) throw std::bad_alloc();
293         owner = true;
294         global_buf = range_buf(ptr, ptr + nelem);
295     };
296     //------------------------------------------------------------------------
297     //                    PROCESS
298     //------------------------------------------------------------------------
299     try
300     {
301         initial_configuration();
302     } catch (std::bad_alloc &)
303     {
304         error = true;
305     };
306     if (not error)
307     {
308         first_merge();
309         construct = true;
310         final_merge();
311     };
312     if (error)
313     {
314         destroy_all();
315         throw std::bad_alloc();
316     };
317 }
318 ;
319 //
320 //-----------------------------------------------------------------------------
321 //  function : destroy_all
322 /// @brief destructor of the class. The utility is to destroy the temporary
323 ///        buffer used in the sorting process
324 //-----------------------------------------------------------------------------
325 template<class Iter_t, typename Compare>
destroy_all(void)326 void sample_sort<Iter_t, Compare>::destroy_all(void)
327 {
328     if (construct)
329     {
330         destroy(global_buf);
331         construct = false;
332     }
333     if (global_buf.first != nullptr and owner)
334         std::return_temporary_buffer(global_buf.first);
335 }
336 //
337 //-----------------------------------------------------------------------------
338 //  function : initial_configuration
339 /// @brief Create the internal data structures, and obtain the inital set of
340 ///        ranges to merge
341 //-----------------------------------------------------------------------------
342 template<class Iter_t, typename Compare>
initial_configuration(void)343 void sample_sort<Iter_t, Compare>::initial_configuration(void)
344 {
345     std::vector<range_it> vmem_thread;
346     std::vector<range_buf> vbuf_thread;
347     size_t nelem = global_range.size();
348 
349     //------------------------------------------------------------------------
350     size_t cupo = nelem / nthread;
351     Iter_t it_first = global_range.first;
352     value_t *buf_first = global_buf.first;
353     vmem_thread.reserve(nthread + 1);
354     vbuf_thread.reserve(nthread + 1);
355 
356     for (uint32_t i = 0; i < (nthread - 1); ++i, it_first += cupo, buf_first +=
357                     cupo)
358     {
359         vmem_thread.emplace_back(it_first, it_first + cupo);
360         vbuf_thread.emplace_back(buf_first, buf_first + cupo);
361     };
362 
363     vmem_thread.emplace_back(it_first, global_range.last);
364     vbuf_thread.emplace_back(buf_first, global_buf.last);
365 
366     //------------------------------------------------------------------------
367     // Sorting of the ranges
368     //------------------------------------------------------------------------
369     std::vector<std::future<void>> vfuture(nthread);
370 
371     for (uint32_t i = 0; i < nthread; ++i)
372     {
373         auto func = [=]()
374         {
375             bss::spinsort<Iter_t, Compare> (vmem_thread[i].first,
376                             vmem_thread[i].last, comp,
377                             vbuf_thread[i]);
378         };
379         vfuture[i] = std::async(std::launch::async, func);
380     };
381 
382     for (uint32_t i = 0; i < nthread; ++i)
383         vfuture[i].get();
384 
385     //------------------------------------------------------------------------
386     // Obtain the vector of milestones
387     //------------------------------------------------------------------------
388     std::vector<Iter_t> vsample;
389     vsample.reserve(nthread * (ninterval - 1));
390 
391     for (uint32_t i = 0; i < nthread; ++i)
392     {
393         size_t distance = vmem_thread[i].size() / ninterval;
394         for (size_t j = 1, pos = distance; j < ninterval; ++j, pos += distance)
395         {
396             vsample.push_back(vmem_thread[i].first + pos);
397         };
398     };
399     typedef less_ptr_no_null<Iter_t, Compare> compare_ptr;
400     typedef typename std::vector<Iter_t>::iterator it_to_it;
401 
402     bss::spinsort<it_to_it, compare_ptr>(vsample.begin(), vsample.end(),
403                     compare_ptr(comp));
404 
405     //------------------------------------------------------------------------
406     // Create the final milestone vector
407     //------------------------------------------------------------------------
408     std::vector<Iter_t> vmilestone;
409     vmilestone.reserve(ninterval);
410 
411     for (uint32_t pos = nthread >> 1; pos < vsample.size(); pos += nthread)
412     {
413         vmilestone.push_back(vsample[pos]);
414     };
415 
416     //------------------------------------------------------------------------
417     // Creation of the first vector of ranges
418     //------------------------------------------------------------------------
419     std::vector<std::vector<range<Iter_t>>>vv_range_first (nthread);
420 
421     for (uint32_t i = 0; i < nthread; ++i)
422     {
423         Iter_t itaux = vmem_thread[i].first;
424 
425         for (uint32_t k = 0; k < (ninterval - 1); ++k)
426         {
427             Iter_t it2 = std::upper_bound(itaux, vmem_thread[i].last,
428                             *vmilestone[k], comp);
429 
430             vv_range_first[i].emplace_back(itaux, it2);
431             itaux = it2;
432         };
433         vv_range_first[i].emplace_back(itaux, vmem_thread[i].last);
434     };
435 
436     //------------------------------------------------------------------------
437     // Copy in buffer and  creation of the final matrix of ranges
438     //------------------------------------------------------------------------
439     vv_range_it.resize(ninterval);
440     vv_range_buf.resize(ninterval);
441     vrange_it_ini.reserve(ninterval);
442     vrange_buf_ini.reserve(ninterval);
443 
444     for (uint32_t i = 0; i < ninterval; ++i)
445     {
446         vv_range_it[i].reserve(nthread);
447         vv_range_buf[i].reserve(nthread);
448     };
449 
450     Iter_t it = global_range.first;
451     value_t *it_buf = global_buf.first;
452 
453     for (uint32_t k = 0; k < ninterval; ++k)
454     {
455         size_t nelem_interval = 0;
456 
457         for (uint32_t i = 0; i < nthread; ++i)
458         {
459             size_t nelem_range = vv_range_first[i][k].size();
460             if (nelem_range != 0)
461             {
462                 vv_range_it[k].push_back(vv_range_first[i][k]);
463             };
464             nelem_interval += nelem_range;
465         };
466 
467         vrange_it_ini.emplace_back(it, it + nelem_interval);
468         vrange_buf_ini.emplace_back(it_buf, it_buf + nelem_interval);
469 
470         it += nelem_interval;
471         it_buf += nelem_interval;
472     };
473 }
474 ;
475 //
476 //****************************************************************************
477 }
478 ;
479 //    End namespace sample_detail
480 //****************************************************************************
481 //
482 namespace bscu = boost::sort::common::util;
483 //
484 //############################################################################
485 //                                                                          ##
486 //                                                                          ##
487 //                       S A M P L E _ S O R T                              ##
488 //                                                                          ##
489 //                                                                          ##
490 //############################################################################
491 //
492 //-----------------------------------------------------------------------------
493 //  function : sample_sort
494 /// @brief parallel sample sort  algorithm (stable sort)
495 ///
496 /// @param first : iterator to the first element of the range to sort
497 /// @param last : iterator after the last element to the range to sort
498 //-----------------------------------------------------------------------------
499 template<class Iter_t>
sample_sort(Iter_t first,Iter_t last)500 void sample_sort(Iter_t first, Iter_t last)
501 {
502     typedef compare_iter<Iter_t> Compare;
503     sample_detail::sample_sort<Iter_t, Compare>(first, last);
504 };
505 //
506 //-----------------------------------------------------------------------------
507 //  function : sample_sort
508 /// @brief parallel sample sort  algorithm (stable sort)
509 ///
510 /// @param first : iterator to the first element of the range to sort
511 /// @param last : iterator after the last element to the range to sort
512 /// @param nthread : Number of threads to use in the process. When this value
513 ///                  is lower than 2, the sorting is done with 1 thread
514 //-----------------------------------------------------------------------------
515 template<class Iter_t>
sample_sort(Iter_t first,Iter_t last,uint32_t nthread)516 void sample_sort(Iter_t first, Iter_t last, uint32_t nthread)
517 {
518     typedef compare_iter<Iter_t> Compare;
519     sample_detail::sample_sort<Iter_t, Compare>(first, last, nthread);
520 };
521 //
522 //-----------------------------------------------------------------------------
523 //  function : sample_sort
524 /// @brief parallel sample sort  algorithm (stable sort)
525 ///
526 /// @param first : iterator to the first element of the range to sort
527 /// @param last : iterator after the last element to the range to sort
528 /// @param comp : object for to compare two elements pointed by Iter_t
529 ///               iterators
530 //-----------------------------------------------------------------------------
531 template<class Iter_t, class Compare, bscu::enable_if_not_integral<Compare> * =
532                 nullptr>
sample_sort(Iter_t first,Iter_t last,Compare comp)533 void sample_sort(Iter_t first, Iter_t last, Compare comp)
534 {
535     sample_detail::sample_sort<Iter_t, Compare>(first, last, comp);
536 };
537 //
538 //-----------------------------------------------------------------------------
539 //  function : sample_sort
540 /// @brief parallel sample sort  algorithm (stable sort)
541 ///
542 /// @param first : iterator to the first element of the range to sort
543 /// @param last : iterator after the last element to the range to sort
544 /// @param comp : object for to compare two elements pointed by Iter_t
545 ///               iterators
546 /// @param nthread : Number of threads to use in the process. When this value
547 ///                  is lower than 2, the sorting is done with 1 thread
548 //-----------------------------------------------------------------------------
549 template<class Iter_t, class Compare>
sample_sort(Iter_t first,Iter_t last,Compare comp,uint32_t nthread)550 void sample_sort(Iter_t first, Iter_t last, Compare comp, uint32_t nthread)
551 {
552     sample_detail::sample_sort<Iter_t, Compare>(first, last, comp, nthread);
553 };
554 //
555 //****************************************************************************
556 };//    End namespace sort
557 };//    End namespace boost
558 //****************************************************************************
559 //
560 #endif
561