• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10 
11 #ifndef BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP
12 #define BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP
13 
14 #include <boost/static_assert.hpp>
15 #include <boost/preprocessor/seq/for_each.hpp>
16 
17 #include <boost/compute/system.hpp>
18 #include <boost/compute/functional.hpp>
19 #include <boost/compute/command_queue.hpp>
20 #include <boost/compute/algorithm/reduce.hpp>
21 #include <boost/compute/algorithm/detail/serial_accumulate.hpp>
22 #include <boost/compute/container/array.hpp>
23 #include <boost/compute/container/vector.hpp>
24 #include <boost/compute/type_traits/is_device_iterator.hpp>
25 #include <boost/compute/detail/iterator_range_size.hpp>
26 
27 namespace boost {
28 namespace compute {
29 namespace detail {
30 
31 // Space complexity O(1)
32 template<class InputIterator, class T, class BinaryFunction>
generic_accumulate(InputIterator first,InputIterator last,T init,BinaryFunction function,command_queue & queue)33 inline T generic_accumulate(InputIterator first,
34                             InputIterator last,
35                             T init,
36                             BinaryFunction function,
37                             command_queue &queue)
38 {
39     const context &context = queue.get_context();
40 
41     size_t size = iterator_range_size(first, last);
42     if(size == 0){
43         return init;
44     }
45 
46     // accumulate on device
47     array<T, 1> device_result(context);
48     detail::serial_accumulate(
49         first, last, device_result.begin(), init, function, queue
50     );
51 
52     // copy result to host
53     T result;
54     ::boost::compute::copy_n(device_result.begin(), 1, &result, queue);
55     return result;
56 }
57 
58 // returns true if we can use reduce() instead of accumulate() when
59 // accumulate() this is true when the function is commutative (such as
60 // addition of integers) and the initial value is the identity value
61 // for the operation (zero for addition, one for multiplication).
62 template<class T, class F>
can_accumulate_with_reduce(T init,F function)63 inline bool can_accumulate_with_reduce(T init, F function)
64 {
65     (void) init;
66     (void) function;
67 
68     return false;
69 }
70 
71 /// \internal_
72 #define BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE(r, data, type) \
73     inline bool can_accumulate_with_reduce(type init, plus<type>) \
74     { \
75         return init == type(0); \
76     } \
77     inline bool can_accumulate_with_reduce(type init, multiplies<type>) \
78     { \
79         return init == type(1); \
80     }
81 
82 BOOST_PP_SEQ_FOR_EACH(
83     BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE,
84     _,
85     (char_)(uchar_)(short_)(ushort_)(int_)(uint_)(long_)(ulong_)
86 )
87 
88 template<class T>
can_accumulate_with_reduce(T init,min<T>)89 inline bool can_accumulate_with_reduce(T init, min<T>)
90 {
91     return init == (std::numeric_limits<T>::max)();
92 }
93 
94 template<class T>
can_accumulate_with_reduce(T init,max<T>)95 inline bool can_accumulate_with_reduce(T init, max<T>)
96 {
97     return init == (std::numeric_limits<T>::min)();
98 }
99 
100 #undef BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE
101 
102 template<class InputIterator, class T, class BinaryFunction>
dispatch_accumulate(InputIterator first,InputIterator last,T init,BinaryFunction function,command_queue & queue)103 inline T dispatch_accumulate(InputIterator first,
104                              InputIterator last,
105                              T init,
106                              BinaryFunction function,
107                              command_queue &queue)
108 {
109     size_t size = iterator_range_size(first, last);
110     if(size == 0){
111         return init;
112     }
113 
114     if(can_accumulate_with_reduce(init, function)){
115         T result;
116         reduce(first, last, &result, function, queue);
117         return result;
118     }
119     else {
120         return generic_accumulate(first, last, init, function, queue);
121     }
122 }
123 
124 } // end detail namespace
125 
126 /// Returns the result of applying \p function to the elements in the
127 /// range [\p first, \p last) and \p init.
128 ///
129 /// If no function is specified, \c plus will be used.
130 ///
131 /// \param first first element in the input range
132 /// \param last last element in the input range
133 /// \param init initial value
134 /// \param function binary reduction function
135 /// \param queue command queue to perform the operation
136 ///
137 /// \return the accumulated result value
138 ///
139 /// In specific situations the call to \c accumulate() can be automatically
140 /// optimized to a call to the more efficient \c reduce() algorithm. This
141 /// occurs when the binary reduction function is recognized as associative
142 /// (such as the \c plus<int> function).
143 ///
144 /// Note that because floating-point addition is not associative, calling
145 /// \c accumulate() with \c plus<float> results in a less efficient serial
146 /// reduction algorithm being executed. If a slight loss in precision is
147 /// acceptable, the more efficient parallel \c reduce() algorithm should be
148 /// used instead.
149 ///
150 /// For example:
151 /// \code
152 /// // with vec = boost::compute::vector<int>
153 /// accumulate(vec.begin(), vec.end(), 0, plus<int>());   // fast
154 /// reduce(vec.begin(), vec.end(), &result, plus<int>()); // fast
155 ///
156 /// // with vec = boost::compute::vector<float>
157 /// accumulate(vec.begin(), vec.end(), 0, plus<float>());   // slow
158 /// reduce(vec.begin(), vec.end(), &result, plus<float>()); // fast
159 /// \endcode
160 ///
161 /// Space complexity: \Omega(1)<br>
162 /// Space complexity when optimized to \c reduce(): \Omega(n)
163 ///
164 /// \see reduce()
165 template<class InputIterator, class T, class BinaryFunction>
accumulate(InputIterator first,InputIterator last,T init,BinaryFunction function,command_queue & queue=system::default_queue ())166 inline T accumulate(InputIterator first,
167                     InputIterator last,
168                     T init,
169                     BinaryFunction function,
170                     command_queue &queue = system::default_queue())
171 {
172     BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value);
173 
174     return detail::dispatch_accumulate(first, last, init, function, queue);
175 }
176 
177 /// \overload
178 template<class InputIterator, class T>
accumulate(InputIterator first,InputIterator last,T init,command_queue & queue=system::default_queue ())179 inline T accumulate(InputIterator first,
180                     InputIterator last,
181                     T init,
182                     command_queue &queue = system::default_queue())
183 {
184     BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value);
185     typedef typename std::iterator_traits<InputIterator>::value_type IT;
186 
187     return detail::dispatch_accumulate(first, last, init, plus<IT>(), queue);
188 }
189 
190 } // end compute namespace
191 } // end boost namespace
192 
193 #endif // BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP
194