1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2014 Benoit Dequidt <benoit.dequidt@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10
11 #include <iostream>
12 #include <cstdlib>
13
14 #include <boost/compute/core.hpp>
15 #include <boost/compute/algorithm/copy.hpp>
16 #include <boost/compute/algorithm/inclusive_scan.hpp>
17 #include <boost/compute/container/vector.hpp>
18 #include <boost/compute/type_traits/type_name.hpp>
19 #include <boost/compute/utility/source.hpp>
20
21 namespace compute = boost::compute;
22
23 /// warning precision is not precise due
24 /// to the float error accumulation when size is large enough
25 /// for more precision use double
26 /// or a kahan sum else results can diverge
27 /// from the CPU implementation
make_sma_program(const compute::context & context)28 compute::program make_sma_program(const compute::context& context)
29 {
30 const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
31 __kernel void SMA(__global const float *scannedValues, int size, __global float *output, int wSize)
32 {
33 const int gid = get_global_id(0);
34
35 float cumValues = 0.f;
36 int endIdx = gid + wSize/2;
37 int startIdx = gid -1 - wSize/2;
38
39 if(endIdx > size -1)
40 endIdx = size -1;
41
42 cumValues += scannedValues[endIdx];
43 if(startIdx < 0)
44 startIdx = -1;
45 else
46 cumValues -= scannedValues[startIdx];
47
48 output[gid] =(float)( cumValues / ( float )(endIdx - startIdx));
49 }
50 );
51
52 // create sma program
53 return compute::program::build_with_source(source,context);
54 }
55
check_results(const std::vector<float> & values,const std::vector<float> & smoothValues,unsigned int wSize)56 bool check_results(const std::vector<float>& values, const std::vector<float>& smoothValues, unsigned int wSize)
57 {
58 int size = values.size();
59 if(size != (int)smoothValues.size()) return false;
60
61 int semiWidth = wSize/2;
62
63 bool ret = true;
64 for(int idx = 0 ; idx < size ; ++idx)
65 {
66 int start = (std::max)(idx - semiWidth,0);
67 int end = (std::min)(idx + semiWidth,size-1);
68 float res = 0;
69 for(int j = start ; j <= end ; ++j)
70 {
71 res+= values[j];
72 }
73
74 res /= float(end - start +1);
75
76 if(std::abs(res-smoothValues[idx]) > 1e-3)
77 {
78 std::cout << "idx = " << idx << " -- expected = " << res << " -- result = " << smoothValues[idx] << std::endl;
79 ret = false;
80 }
81 }
82
83 return ret;
84 }
85
86 // generate a uniform law over [0,10]
myRand()87 float myRand()
88 {
89 static const double divisor = double(RAND_MAX)+1.;
90 return double(rand())/divisor * 10.;
91 }
92
main()93 int main()
94 {
95 unsigned int size = 1024;
96 // wSize must be odd
97 unsigned int wSize = 21;
98 // get the default device
99 compute::device device = compute::system::default_device();
100 // create a context for the device
101 compute::context context(device);
102 // get the program
103 compute::program program = make_sma_program(context);
104
105 // create vector of random numbers on the host
106 std::vector<float> host_vector(size);
107 std::vector<float> host_result(size);
108 std::generate(host_vector.begin(), host_vector.end(), myRand);
109
110 compute::vector<float> a(size,context);
111 compute::vector<float> b(size,context);
112 compute::vector<float> c(size,context);
113 compute::command_queue queue(context, device);
114
115 compute::copy(host_vector.begin(),host_vector.end(),a.begin(),queue);
116
117 // scan values
118 compute::inclusive_scan(a.begin(),a.end(),b.begin(),queue);
119 // sma kernel
120 compute::kernel kernel(program, "SMA");
121 kernel.set_arg(0,b.get_buffer());
122 kernel.set_arg(1,(int)b.size());
123 kernel.set_arg(2,c.get_buffer());
124 kernel.set_arg(3,(int)wSize);
125
126 using compute::uint_;
127 uint_ tpb = 128;
128 uint_ workSize = size;
129 queue.enqueue_1d_range_kernel(kernel,0,workSize,tpb);
130
131 compute::copy(c.begin(),c.end(),host_result.begin(),queue);
132
133 bool res = check_results(host_vector,host_result,wSize);
134 std::string status = res ? "results are equivalent" : "GPU results differs from CPU one's";
135 std::cout << status << std::endl;
136
137 return 0;
138 }
139
140