• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //////////////////////////////////////////////////////////////////////////////
2 //
3 // (C) Copyright Ion Gaztanaga 2015-2016.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // See http://www.boost.org/libs/move for documentation.
9 //
10 //////////////////////////////////////////////////////////////////////////////
11 
12 //#define BOOST_MOVE_ADAPTIVE_SORT_STATS
13 //#define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 2
14 
15 #include <algorithm> //std::inplace_merge
16 #include <cstdio>    //std::printf
17 #include <iostream>  //std::cout
18 #include <boost/container/vector.hpp>  //boost::container::vector
19 
20 #include <boost/config.hpp>
21 
22 #include <boost/move/unique_ptr.hpp>
23 #include <boost/timer/timer.hpp>
24 
25 #include "order_type.hpp"
26 #include "random_shuffle.hpp"
27 
28 using boost::timer::cpu_timer;
29 using boost::timer::cpu_times;
30 using boost::timer::nanosecond_type;
31 
print_stats(const char * str,boost::ulong_long_type element_count)32 void print_stats(const char *str, boost::ulong_long_type element_count)
33 {
34    std::printf("%sCmp:%8.04f Cpy:%9.04f\n", str, double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count );
35 }
36 
37 #include <boost/move/algo/adaptive_merge.hpp>
38 #include <boost/move/algo/detail/merge.hpp>
39 #include <boost/move/core.hpp>
40 
41 template<class T, class Compare>
generate_elements(boost::container::vector<T> & elements,std::size_t L,std::size_t NK,Compare comp)42 std::size_t generate_elements(boost::container::vector<T> &elements, std::size_t L, std::size_t NK, Compare comp)
43 {
44    elements.resize(L);
45    boost::movelib::unique_ptr<std::size_t[]> key_reps(new std::size_t[NK ? NK : L]);
46 
47    std::srand(0);
48    for (std::size_t i = 0; i < (NK ? NK : L); ++i) {
49       key_reps[i] = 0;
50    }
51    for (std::size_t i = 0; i < L; ++i) {
52       std::size_t  key = NK ? (i % NK) : i;
53       elements[i].key = key;
54    }
55    ::random_shuffle(elements.data(), elements.data() + L);
56    ::random_shuffle(elements.data(), elements.data() + L);
57 
58    for (std::size_t i = 0; i < L; ++i) {
59       elements[i].val = key_reps[elements[i].key]++;
60    }
61    std::size_t split_count = L / 2;
62    std::stable_sort(elements.data(), elements.data() + split_count, comp);
63    std::stable_sort(elements.data() + split_count, elements.data() + L, comp);
64    return split_count;
65 }
66 
67 template<class T, class Compare>
adaptive_merge_buffered(T * elements,T * mid,T * last,Compare comp,std::size_t BufLen)68 void adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen)
69 {
70    boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]);
71    boost::movelib::adaptive_merge(elements, mid, last, comp, reinterpret_cast<T*>(mem.get()), BufLen);
72 }
73 
74 template<class T, class Compare>
std_like_adaptive_merge_buffered(T * elements,T * mid,T * last,Compare comp,std::size_t BufLen)75 void std_like_adaptive_merge_buffered(T *elements, T *mid, T *last, Compare comp, std::size_t BufLen)
76 {
77    boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]);
78    boost::movelib::merge_adaptive_ONlogN(elements, mid, last, comp, reinterpret_cast<T*>(mem.get()), BufLen);
79 }
80 
81 enum AlgoType
82 {
83    StdMerge,
84    AdaptMerge,
85    SqrtHAdaptMerge,
86    SqrtAdaptMerge,
87    Sqrt2AdaptMerge,
88    QuartAdaptMerge,
89    StdInplaceMerge,
90    StdSqrtHAdaptMerge,
91    StdSqrtAdaptMerge,
92    StdSqrt2AdaptMerge,
93    StdQuartAdaptMerge,
94    MaxMerge
95 };
96 
97 const char *AlgoNames [] = { "StdMerge           "
98                            , "AdaptMerge         "
99                            , "SqrtHAdaptMerge    "
100                            , "SqrtAdaptMerge     "
101                            , "Sqrt2AdaptMerge    "
102                            , "QuartAdaptMerge    "
103                            , "StdInplaceMerge    "
104                            , "StdSqrtHAdaptMerge "
105                            , "StdSqrtAdaptMerge  "
106                            , "StdSqrt2AdaptMerge "
107                            , "StdQuartAdaptMerge "
108                            };
109 
110 BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxMerge);
111 
112 template<class T>
measure_algo(T * elements,std::size_t element_count,std::size_t split_pos,std::size_t alg,nanosecond_type & prev_clock)113 bool measure_algo(T *elements, std::size_t element_count, std::size_t split_pos, std::size_t alg, nanosecond_type &prev_clock)
114 {
115    std::printf("%s ", AlgoNames[alg]);
116    order_perf_type::num_compare=0;
117    order_perf_type::num_copy=0;
118    order_perf_type::num_elements = element_count;
119    cpu_timer timer;
120    timer.resume();
121    switch(alg)
122    {
123       case StdMerge:
124          std::inplace_merge(elements, elements+split_pos, elements+element_count, order_type_less());
125       break;
126       case AdaptMerge:
127          boost::movelib::adaptive_merge(elements, elements+split_pos, elements+element_count, order_type_less());
128       break;
129       case SqrtHAdaptMerge:
130          adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
131                             , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1);
132       break;
133       case SqrtAdaptMerge:
134          adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
135                             , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
136       break;
137       case Sqrt2AdaptMerge:
138          adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
139                             , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
140       break;
141       case QuartAdaptMerge:
142          adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
143                             , (element_count)/4+1);
144       break;
145       case StdInplaceMerge:
146          boost::movelib::merge_bufferless_ONlogN(elements, elements+split_pos, elements+element_count, order_type_less());
147       break;
148       case StdSqrtHAdaptMerge:
149          std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
150                             , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1);
151       break;
152       case StdSqrtAdaptMerge:
153          std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
154                             , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
155       break;
156       case StdSqrt2AdaptMerge:
157          std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
158                             , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
159       break;
160       case StdQuartAdaptMerge:
161          std_like_adaptive_merge_buffered( elements, elements+split_pos, elements+element_count, order_type_less()
162                             , (element_count)/4+1);
163       break;
164    }
165    timer.stop();
166 
167    if(order_perf_type::num_elements == element_count){
168       std::printf(" Tmp Ok ");
169    } else{
170       std::printf(" Tmp KO ");
171    }
172    nanosecond_type new_clock = timer.elapsed().wall;
173 
174    //std::cout << "Cmp:" << order_perf_type::num_compare << " Cpy:" << order_perf_type::num_copy;   //for old compilers without ll size argument
175    std::printf("Cmp:%8.04f Cpy:%9.04f", double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count );
176 
177    double time = double(new_clock);
178 
179    const char *units = "ns";
180    if(time >= 1000000000.0){
181       time /= 1000000000.0;
182       units = " s";
183    }
184    else if(time >= 1000000.0){
185       time /= 1000000.0;
186       units = "ms";
187    }
188    else if(time >= 1000.0){
189       time /= 1000.0;
190       units = "us";
191    }
192 
193    std::printf(" %6.02f%s (%6.02f)\n"
194               , time
195               , units
196               , prev_clock ? double(new_clock)/double(prev_clock): 1.0);
197    prev_clock = new_clock;
198    bool res = is_order_type_ordered(elements, element_count, true);
199    return res;
200 }
201 
202 template<class T>
measure_all(std::size_t L,std::size_t NK)203 bool measure_all(std::size_t L, std::size_t NK)
204 {
205    boost::container::vector<T> original_elements, elements;
206    std::size_t split_pos = generate_elements(original_elements, L, NK, order_type_less());
207    std::printf("\n - - N: %u, NK: %u - -\n", (unsigned)L, (unsigned)NK);
208 
209    nanosecond_type prev_clock = 0;
210    nanosecond_type back_clock;
211    bool res = true;
212 
213    elements = original_elements;
214    res = res && measure_algo(elements.data(), L, split_pos, StdMerge, prev_clock);
215    back_clock = prev_clock;
216    //
217 
218    prev_clock = back_clock;
219    elements = original_elements;
220    res = res && measure_algo(elements.data(), L, split_pos, QuartAdaptMerge, prev_clock);
221    //
222    prev_clock = back_clock;
223    elements = original_elements;
224    res = res && measure_algo(elements.data(), L, split_pos, StdQuartAdaptMerge, prev_clock);
225    //
226    prev_clock = back_clock;
227    elements = original_elements;
228    res = res && measure_algo(elements.data(), L, split_pos, Sqrt2AdaptMerge, prev_clock);
229    //
230    prev_clock = back_clock;
231    elements = original_elements;
232    res = res && measure_algo(elements.data(), L, split_pos, StdSqrt2AdaptMerge, prev_clock);
233    //
234    prev_clock = back_clock;
235    elements = original_elements;
236    res = res && measure_algo(elements.data(), L, split_pos, SqrtAdaptMerge, prev_clock);
237    //
238    prev_clock = back_clock;
239    elements = original_elements;
240    res = res && measure_algo(elements.data(), L, split_pos, StdSqrtAdaptMerge, prev_clock);
241    //
242    prev_clock = back_clock;
243    elements = original_elements;
244    res = res && measure_algo(elements.data(), L, split_pos, SqrtHAdaptMerge, prev_clock);
245    //
246    prev_clock = back_clock;
247    elements = original_elements;
248    res = res && measure_algo(elements.data(), L, split_pos, StdSqrtHAdaptMerge, prev_clock);
249    //
250    prev_clock = back_clock;
251    elements = original_elements;
252    res = res && measure_algo(elements.data(), L, split_pos, AdaptMerge, prev_clock);
253    //
254    prev_clock = back_clock;
255    elements = original_elements;
256    res = res && measure_algo(elements.data(), L, split_pos,StdInplaceMerge, prev_clock);
257    //
258 
259    if(!res)
260       throw int(0);
261    return res;
262 }
263 
264 //Undef it to run the long test
265 #define BENCH_MERGE_SHORT
266 #define BENCH_SORT_UNIQUE_VALUES
267 
main()268 int main()
269 {
270    try{
271    #ifndef BENCH_SORT_UNIQUE_VALUES
272    measure_all<order_perf_type>(101,1);
273    measure_all<order_perf_type>(101,5);
274    measure_all<order_perf_type>(101,7);
275    measure_all<order_perf_type>(101,31);
276    #endif
277    measure_all<order_perf_type>(101,0);
278 
279    //
280    #ifndef BENCH_SORT_UNIQUE_VALUES
281    measure_all<order_perf_type>(1101,1);
282    measure_all<order_perf_type>(1001,7);
283    measure_all<order_perf_type>(1001,31);
284    measure_all<order_perf_type>(1001,127);
285    measure_all<order_perf_type>(1001,511);
286    #endif
287    measure_all<order_perf_type>(1001,0);
288 
289    //
290    #ifndef BENCH_SORT_UNIQUE_VALUES
291    measure_all<order_perf_type>(10001,65);
292    measure_all<order_perf_type>(10001,255);
293    measure_all<order_perf_type>(10001,1023);
294    measure_all<order_perf_type>(10001,4095);
295    #endif
296    measure_all<order_perf_type>(10001,0);
297 
298    //
299    #if defined(NDEBUG)
300    #ifndef BENCH_SORT_UNIQUE_VALUES
301    measure_all<order_perf_type>(100001,511);
302    measure_all<order_perf_type>(100001,2047);
303    measure_all<order_perf_type>(100001,8191);
304    measure_all<order_perf_type>(100001,32767);
305    #endif
306    measure_all<order_perf_type>(100001,0);
307 
308    //
309    #if !defined(BENCH_MERGE_SHORT)
310    #ifndef BENCH_SORT_UNIQUE_VALUES
311    measure_all<order_perf_type>(1000001, 8192);
312    measure_all<order_perf_type>(1000001, 32768);
313    measure_all<order_perf_type>(1000001, 131072);
314    measure_all<order_perf_type>(1000001, 524288);
315    #endif
316    measure_all<order_perf_type>(1000001,0);
317 
318    #ifndef BENCH_SORT_UNIQUE_VALUES
319    measure_all<order_perf_type>(10000001, 65536);
320    measure_all<order_perf_type>(10000001, 262144);
321    measure_all<order_perf_type>(10000001, 1048576);
322    measure_all<order_perf_type>(10000001, 4194304);
323    #endif
324    measure_all<order_perf_type>(10000001,0);
325    #endif   //#ifndef BENCH_MERGE_SHORT
326    #endif   //#ifdef NDEBUG
327    }
328    catch(...)
329    {
330       return 1;
331    }
332 
333    return 0;
334 }
335 
336