1 //////////////////////////////////////////////////////////////////////////////
2 //
3 // (C) Copyright Ion Gaztanaga 2015-2016.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // See http://www.boost.org/libs/move for documentation.
9 //
10 //////////////////////////////////////////////////////////////////////////////
11
12 #include <cstdlib> //std::srand
13 #include <algorithm> //std::stable_sort, std::make|sort_heap, std::random_shuffle
14 #include <cstdio> //std::printf
15 #include <iostream> //std::cout
16 #include <boost/container/vector.hpp> //boost::container::vector
17
18 #include <boost/config.hpp>
19 #include <boost/move/unique_ptr.hpp>
20 #include <boost/timer/timer.hpp>
21
22 using boost::timer::cpu_timer;
23 using boost::timer::cpu_times;
24 using boost::timer::nanosecond_type;
25
26 #include "order_type.hpp"
27 #include "random_shuffle.hpp"
28
29 //#define BOOST_MOVE_ADAPTIVE_SORT_STATS
30 //#define BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS
print_stats(const char * str,boost::ulong_long_type element_count)31 void print_stats(const char *str, boost::ulong_long_type element_count)
32 {
33 std::printf("%sCmp:%7.03f Cpy:%8.03f\n", str, double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count );
34 }
35
36
37 #include <boost/move/algo/adaptive_sort.hpp>
38 #include <boost/move/algo/detail/merge_sort.hpp>
39 #include <boost/move/algo/detail/pdqsort.hpp>
40 #include <boost/move/algo/detail/heap_sort.hpp>
41 #include <boost/move/core.hpp>
42
43 template<class T>
generate_elements(boost::container::vector<T> & elements,std::size_t L,std::size_t NK)44 void generate_elements(boost::container::vector<T> &elements, std::size_t L, std::size_t NK)
45 {
46 elements.resize(L);
47 boost::movelib::unique_ptr<std::size_t[]> key_reps(new std::size_t[NK ? NK : L]);
48
49 std::srand(0);
50 for (std::size_t i = 0; i < (NK ? NK : L); ++i) {
51 key_reps[i] = 0;
52 }
53 for (std::size_t i = 0; i < L; ++i) {
54 std::size_t key = NK ? (i % NK) : i;
55 elements[i].key = key;
56 }
57 ::random_shuffle(elements.data(), elements.data() + L);
58 ::random_shuffle(elements.data(), elements.data() + L);
59
60 for (std::size_t i = 0; i < L; ++i) {
61 elements[i].val = key_reps[elements[i].key]++;
62 }
63 }
64
65 template<class T, class Compare>
adaptive_sort_buffered(T * elements,std::size_t element_count,Compare comp,std::size_t BufLen)66 void adaptive_sort_buffered(T *elements, std::size_t element_count, Compare comp, std::size_t BufLen)
67 {
68 boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]);
69 boost::movelib::adaptive_sort(elements, elements + element_count, comp, reinterpret_cast<T*>(mem.get()), BufLen);
70 }
71
72 template<class T, class Compare>
std_like_adaptive_stable_sort_buffered(T * elements,std::size_t element_count,Compare comp,std::size_t BufLen)73 void std_like_adaptive_stable_sort_buffered(T *elements, std::size_t element_count, Compare comp, std::size_t BufLen)
74 {
75 boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]);
76 boost::movelib::stable_sort_adaptive_ONlogN2(elements, elements + element_count, comp, reinterpret_cast<T*>(mem.get()), BufLen);
77 }
78
79 template<class T, class Compare>
merge_sort_buffered(T * elements,std::size_t element_count,Compare comp)80 void merge_sort_buffered(T *elements, std::size_t element_count, Compare comp)
81 {
82 boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*((element_count+1)/2)]);
83 boost::movelib::merge_sort(elements, elements + element_count, comp, reinterpret_cast<T*>(mem.get()));
84 }
85
86 enum AlgoType
87 {
88 MergeSort,
89 StableSort,
90 PdQsort,
91 StdSort,
92 AdaptiveSort,
93 SqrtHAdaptiveSort,
94 SqrtAdaptiveSort,
95 Sqrt2AdaptiveSort,
96 QuartAdaptiveSort,
97 InplaceStableSort,
98 StdSqrtHAdpSort,
99 StdSqrtAdpSort,
100 StdSqrt2AdpSort,
101 StdQuartAdpSort,
102 SlowStableSort,
103 HeapSort,
104 MaxSort
105 };
106
107 const char *AlgoNames [] = { "MergeSort "
108 , "StableSort "
109 , "PdQsort "
110 , "StdSort "
111 , "AdaptSort "
112 , "SqrtHAdaptSort "
113 , "SqrtAdaptSort "
114 , "Sqrt2AdaptSort "
115 , "QuartAdaptSort "
116 , "InplStableSort "
117 , "StdSqrtHAdpSort"
118 , "StdSqrtAdpSort "
119 , "StdSqrt2AdpSort"
120 , "StdQuartAdpSort"
121 , "SlowSort "
122 , "HeapSort "
123 };
124
125 BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxSort);
126
127 template<class T>
measure_algo(T * elements,std::size_t element_count,std::size_t alg,nanosecond_type & prev_clock)128 bool measure_algo(T *elements, std::size_t element_count, std::size_t alg, nanosecond_type &prev_clock)
129 {
130 std::printf("%s ", AlgoNames[alg]);
131 order_perf_type::num_compare=0;
132 order_perf_type::num_copy=0;
133 order_perf_type::num_elements = element_count;
134 cpu_timer timer;
135 timer.resume();
136 switch(alg)
137 {
138 case MergeSort:
139 merge_sort_buffered(elements, element_count, order_type_less());
140 break;
141 case StableSort:
142 std::stable_sort(elements,elements+element_count,order_type_less());
143 break;
144 case PdQsort:
145 boost::movelib::pdqsort(elements,elements+element_count,order_type_less());
146 break;
147 case StdSort:
148 std::sort(elements,elements+element_count,order_type_less());
149 break;
150 case AdaptiveSort:
151 boost::movelib::adaptive_sort(elements, elements+element_count, order_type_less());
152 break;
153 case SqrtHAdaptiveSort:
154 adaptive_sort_buffered( elements, element_count, order_type_less()
155 , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1);
156 break;
157 case SqrtAdaptiveSort:
158 adaptive_sort_buffered( elements, element_count, order_type_less()
159 , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
160 break;
161 case Sqrt2AdaptiveSort:
162 adaptive_sort_buffered( elements, element_count, order_type_less()
163 , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
164 break;
165 case QuartAdaptiveSort:
166 adaptive_sort_buffered( elements, element_count, order_type_less()
167 , (element_count-1)/4+1);
168 break;
169 case InplaceStableSort:
170 boost::movelib::inplace_stable_sort(elements, elements+element_count, order_type_less());
171 break;
172 case StdSqrtHAdpSort:
173 std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less()
174 , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1);
175 break;
176 case StdSqrtAdpSort:
177 std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less()
178 , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
179 break;
180 case StdSqrt2AdpSort:
181 std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less()
182 , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
183 break;
184 case StdQuartAdpSort:
185 std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less()
186 , (element_count-1)/4+1);
187 break;
188 case SlowStableSort:
189 boost::movelib::detail_adaptive::slow_stable_sort(elements, elements+element_count, order_type_less());
190 break;
191 case HeapSort:
192 boost::movelib::heap_sort(elements, elements+element_count, order_type_less());
193 boost::movelib::heap_sort((order_move_type*)0, (order_move_type*)0, order_type_less());
194
195 break;
196 }
197 timer.stop();
198
199 if(order_perf_type::num_elements == element_count){
200 std::printf(" Tmp Ok ");
201 } else{
202 std::printf(" Tmp KO ");
203 }
204 nanosecond_type new_clock = timer.elapsed().wall;
205
206 //std::cout << "Cmp:" << order_perf_type::num_compare << " Cpy:" << order_perf_type::num_copy; //for old compilers without ll size argument
207 std::printf("Cmp:%7.03f Cpy:%8.03f", double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count );
208
209 double time = double(new_clock);
210
211 const char *units = "ns";
212 if(time >= 1000000000.0){
213 time /= 1000000000.0;
214 units = " s";
215 }
216 else if(time >= 1000000.0){
217 time /= 1000000.0;
218 units = "ms";
219 }
220 else if(time >= 1000.0){
221 time /= 1000.0;
222 units = "us";
223 }
224
225 std::printf(" %6.02f%s (%6.02f)\n"
226 , time
227 , units
228 , prev_clock ? double(new_clock)/double(prev_clock): 1.0);
229 prev_clock = new_clock;
230 bool res = is_order_type_ordered(elements, element_count, alg != HeapSort && alg != PdQsort && alg != StdSort);
231 return res;
232 }
233
234 template<class T>
measure_all(std::size_t L,std::size_t NK)235 bool measure_all(std::size_t L, std::size_t NK)
236 {
237 boost::container::vector<T> original_elements, elements;
238 generate_elements(original_elements, L, NK);
239 std::printf("\n - - N: %u, NK: %u - -\n", (unsigned)L, (unsigned)NK);
240
241 nanosecond_type prev_clock = 0;
242 nanosecond_type back_clock;
243 bool res = true;
244 elements = original_elements;
245 res = res && measure_algo(elements.data(), L,MergeSort, prev_clock);
246 back_clock = prev_clock;
247 //
248 prev_clock = back_clock;
249 elements = original_elements;
250 res = res && measure_algo(elements.data(), L,StableSort, prev_clock);
251 //
252 prev_clock = back_clock;
253 elements = original_elements;
254 res = res && measure_algo(elements.data(), L,PdQsort, prev_clock);
255 //
256 prev_clock = back_clock;
257 elements = original_elements;
258 res = res && measure_algo(elements.data(), L,StdSort, prev_clock);
259 //
260 prev_clock = back_clock;
261 elements = original_elements;
262 res = res && measure_algo(elements.data(), L,HeapSort, prev_clock);
263 //
264 prev_clock = back_clock;
265 elements = original_elements;
266 res = res && measure_algo(elements.data(), L,QuartAdaptiveSort, prev_clock);
267 //
268 prev_clock = back_clock;
269 elements = original_elements;
270 res = res && measure_algo(elements.data(), L, StdQuartAdpSort, prev_clock);
271 //
272 prev_clock = back_clock;
273 elements = original_elements;
274 res = res && measure_algo(elements.data(), L,Sqrt2AdaptiveSort, prev_clock);
275 //
276 prev_clock = back_clock;
277 elements = original_elements;
278 res = res && measure_algo(elements.data(), L, StdSqrt2AdpSort, prev_clock);
279 //
280 prev_clock = back_clock;
281 elements = original_elements;
282 res = res && measure_algo(elements.data(), L,SqrtAdaptiveSort, prev_clock);
283 //
284 prev_clock = back_clock;
285 elements = original_elements;
286 res = res && measure_algo(elements.data(), L, StdSqrtAdpSort, prev_clock);
287 //
288 prev_clock = back_clock;
289 elements = original_elements;
290 res = res && measure_algo(elements.data(), L,SqrtHAdaptiveSort, prev_clock);
291 //
292 prev_clock = back_clock;
293 elements = original_elements;
294 res = res && measure_algo(elements.data(), L, StdSqrtHAdpSort, prev_clock);
295 //
296 prev_clock = back_clock;
297 elements = original_elements;
298 res = res && measure_algo(elements.data(), L,AdaptiveSort, prev_clock);
299 //
300 prev_clock = back_clock;
301 elements = original_elements;
302 res = res && measure_algo(elements.data(), L,InplaceStableSort, prev_clock);
303 //
304 //prev_clock = back_clock;
305 //elements = original_elements;
306 //res = res && measure_algo(elements.data(), L,SlowStableSort, prev_clock);
307 //
308 if(!res)
309 throw int(0);
310 return res;
311 }
312
313 //Undef it to run the long test
314 #define BENCH_SORT_SHORT
315 #define BENCH_SORT_UNIQUE_VALUES
316
main()317 int main()
318 {
319 #ifndef BENCH_SORT_UNIQUE_VALUES
320 measure_all<order_perf_type>(101,1);
321 measure_all<order_perf_type>(101,7);
322 measure_all<order_perf_type>(101,31);
323 #endif
324 measure_all<order_perf_type>(101,0);
325
326 //
327 #ifndef BENCH_SORT_UNIQUE_VALUES
328 measure_all<order_perf_type>(1101,1);
329 measure_all<order_perf_type>(1001,7);
330 measure_all<order_perf_type>(1001,31);
331 measure_all<order_perf_type>(1001,127);
332 measure_all<order_perf_type>(1001,511);
333 #endif
334 measure_all<order_perf_type>(1001,0);
335 //
336 #ifndef BENCH_SORT_UNIQUE_VALUES
337 measure_all<order_perf_type>(10001,65);
338 measure_all<order_perf_type>(10001,255);
339 measure_all<order_perf_type>(10001,1023);
340 measure_all<order_perf_type>(10001,4095);
341 #endif
342 measure_all<order_perf_type>(10001,0);
343
344 //
345 #ifdef NDEBUG
346 #ifndef BENCH_SORT_UNIQUE_VALUES
347 measure_all<order_perf_type>(100001,511);
348 measure_all<order_perf_type>(100001,2047);
349 measure_all<order_perf_type>(100001,8191);
350 measure_all<order_perf_type>(100001,32767);
351 #endif
352 measure_all<order_perf_type>(100001,0);
353
354 //
355 #ifndef BENCH_SORT_SHORT
356 #ifndef BENCH_SORT_UNIQUE_VALUES
357 measure_all<order_perf_type>(1000001, 8192);
358 measure_all<order_perf_type>(1000001, 32768);
359 measure_all<order_perf_type>(1000001, 131072);
360 measure_all<order_perf_type>(1000001, 524288);
361 #endif
362 measure_all<order_perf_type>(1000001,0);
363
364 #ifndef BENCH_SORT_UNIQUE_VALUES
365 measure_all<order_perf_type>(10000001, 65536);
366 measure_all<order_perf_type>(10000001, 262144);
367 measure_all<order_perf_type>(10000001, 1048576);
368 measure_all<order_perf_type>(10000001, 4194304);
369 #endif
370 measure_all<order_perf_type>(1000001,0);
371 #endif //#ifndef BENCH_SORT_SHORT
372 #endif //NDEBUG
373
374 //measure_all<order_perf_type>(100000001,0);
375
376 return 0;
377 }
378