• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2014 Benoit Dequidt <benoit.dequidt@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10 
11 #include <iostream>
12 #include <cstdlib>
13 
14 #include <boost/compute/core.hpp>
15 #include <boost/compute/algorithm/copy.hpp>
16 #include <boost/compute/algorithm/inclusive_scan.hpp>
17 #include <boost/compute/container/vector.hpp>
18 #include <boost/compute/type_traits/type_name.hpp>
19 #include <boost/compute/utility/source.hpp>
20 
21 namespace compute = boost::compute;
22 
23 /// warning precision is not precise due
24 /// to the float error accumulation when size is large enough
25 /// for more precision use double
26 /// or a kahan sum else results can diverge
27 /// from the CPU implementation
make_sma_program(const compute::context & context)28 compute::program make_sma_program(const compute::context& context)
29 {
30     const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
31         __kernel void SMA(__global const float *scannedValues, int size, __global float *output, int wSize)
32         {
33             const int gid = get_global_id(0);
34 
35             float cumValues = 0.f;
36             int endIdx = gid + wSize/2;
37             int startIdx = gid -1 - wSize/2;
38 
39             if(endIdx > size -1)
40                 endIdx = size -1;
41 
42             cumValues += scannedValues[endIdx];
43             if(startIdx < 0)
44                 startIdx = -1;
45             else
46                 cumValues -= scannedValues[startIdx];
47 
48             output[gid] =(float)( cumValues / ( float )(endIdx - startIdx));
49         }
50    );
51 
52     // create sma program
53     return compute::program::build_with_source(source,context);
54 }
55 
check_results(const std::vector<float> & values,const std::vector<float> & smoothValues,unsigned int wSize)56 bool check_results(const std::vector<float>& values, const std::vector<float>& smoothValues, unsigned int wSize)
57 {
58     int size = values.size();
59     if(size != (int)smoothValues.size()) return false;
60 
61     int semiWidth = wSize/2;
62 
63     bool ret = true;
64     for(int idx = 0 ; idx < size ; ++idx)
65     {
66         int start = (std::max)(idx - semiWidth,0);
67         int end = (std::min)(idx + semiWidth,size-1);
68         float res = 0;
69         for(int j = start ; j <= end ; ++j)
70         {
71             res+= values[j];
72         }
73 
74         res /= float(end - start +1);
75 
76         if(std::abs(res-smoothValues[idx]) > 1e-3)
77         {
78             std::cout << "idx = " << idx << " -- expected = " << res << " -- result = " << smoothValues[idx] << std::endl;
79             ret = false;
80         }
81     }
82 
83     return ret;
84 }
85 
86 // generate a uniform law over [0,10]
myRand()87 float myRand()
88 {
89     static const double divisor = double(RAND_MAX)+1.;
90     return double(rand())/divisor * 10.;
91 }
92 
main()93 int main()
94 {
95     unsigned int size = 1024;
96     // wSize must be odd
97     unsigned int wSize = 21;
98     // get the default device
99     compute::device device = compute::system::default_device();
100     // create a context for the device
101     compute::context context(device);
102     // get the program
103     compute::program program = make_sma_program(context);
104 
105     // create vector of random numbers on the host
106     std::vector<float> host_vector(size);
107     std::vector<float> host_result(size);
108     std::generate(host_vector.begin(), host_vector.end(), myRand);
109 
110     compute::vector<float> a(size,context);
111     compute::vector<float> b(size,context);
112     compute::vector<float> c(size,context);
113     compute::command_queue queue(context, device);
114 
115     compute::copy(host_vector.begin(),host_vector.end(),a.begin(),queue);
116 
117     // scan values
118     compute::inclusive_scan(a.begin(),a.end(),b.begin(),queue);
119     // sma kernel
120     compute::kernel kernel(program, "SMA");
121     kernel.set_arg(0,b.get_buffer());
122     kernel.set_arg(1,(int)b.size());
123     kernel.set_arg(2,c.get_buffer());
124     kernel.set_arg(3,(int)wSize);
125 
126     using compute::uint_;
127     uint_ tpb = 128;
128     uint_ workSize = size;
129     queue.enqueue_1d_range_kernel(kernel,0,workSize,tpb);
130 
131     compute::copy(c.begin(),c.end(),host_result.begin(),queue);
132 
133     bool res = check_results(host_vector,host_result,wSize);
134     std::string status = res ? "results are equivalent" : "GPU results differs from CPU one's";
135     std::cout << status << std::endl;
136 
137     return 0;
138 }
139 
140