1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10
11 #include <algorithm>
12 #include <iostream>
13 #include <vector>
14
15 #include <boost/program_options.hpp>
16
17 #include <boost/compute/system.hpp>
18 #include <boost/compute/algorithm/sort.hpp>
19 #include <boost/compute/algorithm/is_sorted.hpp>
20 #include <boost/compute/container/vector.hpp>
21
22 #include "perf.hpp"
23
24 namespace po = boost::program_options;
25 namespace compute = boost::compute;
26
27 template<class T>
perf_sort(const std::vector<T> & data,const size_t trials,compute::command_queue & queue)28 double perf_sort(const std::vector<T>& data,
29 const size_t trials,
30 compute::command_queue& queue)
31 {
32 compute::vector<T> vec(data.size(), queue.get_context());
33
34 perf_timer t;
35 for(size_t trial = 0; trial < trials; trial++){
36 compute::copy(data.begin(), data.end(), vec.begin(), queue);
37 t.start();
38 compute::sort(vec.begin(), vec.end(), queue);
39 queue.finish();
40 t.stop();
41
42 if(!compute::is_sorted(vec.begin(), vec.end(), queue)){
43 std::cerr << "ERROR: is_sorted() returned false" << std::endl;
44 }
45 }
46 return t.min_time();
47 }
48
49 template<class T>
tune_sort(const std::vector<T> & data,const size_t trials,compute::command_queue & queue)50 void tune_sort(const std::vector<T>& data,
51 const size_t trials,
52 compute::command_queue& queue)
53 {
54 boost::shared_ptr<compute::detail::parameter_cache>
55 params = compute::detail::parameter_cache::get_global_cache(queue.get_device());
56
57 const std::string cache_key =
58 std::string("__boost_radix_sort_") + compute::type_name<T>();
59
60 const compute::uint_ tpbs[] = { 32, 64, 128, 256, 512, 1024 };
61
62 double min_time = (std::numeric_limits<double>::max)();
63 compute::uint_ best_tpb = 0;
64
65 for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){
66 params->set(cache_key, "tpb", tpbs[i]);
67
68 try {
69 const double t = perf_sort(data, trials, queue);
70 if(t < min_time){
71 best_tpb = tpbs[i];
72 min_time = t;
73 }
74 }
75 catch(compute::opencl_error&){
76 // invalid work group size for this device, skip
77 }
78 }
79
80 // store optimal parameters
81 params->set(cache_key, "tpb", best_tpb);
82 }
83
main(int argc,char * argv[])84 int main(int argc, char *argv[])
85 {
86 // setup command line arguments
87 po::options_description options("options");
88 options.add_options()
89 ("help", "show usage instructions")
90 ("size", po::value<size_t>()->default_value(8192), "input size")
91 ("trials", po::value<size_t>()->default_value(3), "number of trials to run")
92 ("tune", "run tuning procedure")
93 ;
94 po::positional_options_description positional_options;
95 positional_options.add("size", 1);
96
97 // parse command line
98 po::variables_map vm;
99 po::store(
100 po::command_line_parser(argc, argv)
101 .options(options).positional(positional_options).run(),
102 vm
103 );
104 po::notify(vm);
105
106 const size_t size = vm["size"].as<size_t>();
107 const size_t trials = vm["trials"].as<size_t>();
108 std::cout << "size: " << size << std::endl;
109
110 // setup context and queue for the default device
111 compute::device device = boost::compute::system::default_device();
112 compute::context context(device);
113 compute::command_queue queue(context, device);
114 std::cout << "device: " << device.name() << std::endl;
115
116 // create vector of random numbers on the host
117 std::vector<unsigned int> data(size);
118 std::generate(data.begin(), data.end(), rand);
119
120 // run tuning proceure (if requested)
121 if(vm.count("tune")){
122 tune_sort(data, trials, queue);
123 }
124
125 // run sort benchmark
126 double t = perf_sort(data, trials, queue);
127 std::cout << "time: " << t / 1e6 << " ms" << std::endl;
128
129 return 0;
130 }
131