1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10
11 // Undefining BOOST_COMPUTE_USE_OFFLINE_CACHE macro as we want to modify cached
12 // parameters for copy algorithm without any undesirable consequences (like
13 // saving modified values of those parameters).
14 #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE
15 #undef BOOST_COMPUTE_USE_OFFLINE_CACHE
16 #endif
17
18 #define BOOST_TEST_MODULE TestScan
19 #include <boost/test/unit_test.hpp>
20
21 #include <numeric>
22 #include <functional>
23 #include <vector>
24
25 #include <boost/compute/functional.hpp>
26 #include <boost/compute/lambda.hpp>
27 #include <boost/compute/system.hpp>
28 #include <boost/compute/command_queue.hpp>
29 #include <boost/compute/algorithm/copy.hpp>
30 #include <boost/compute/algorithm/exclusive_scan.hpp>
31 #include <boost/compute/algorithm/inclusive_scan.hpp>
32 #include <boost/compute/container/vector.hpp>
33 #include <boost/compute/iterator/counting_iterator.hpp>
34 #include <boost/compute/iterator/transform_iterator.hpp>
35
36 #include "check_macros.hpp"
37 #include "context_setup.hpp"
38
39 namespace bc = boost::compute;
40
BOOST_AUTO_TEST_CASE(inclusive_scan_int)41 BOOST_AUTO_TEST_CASE(inclusive_scan_int)
42 {
43 using boost::compute::uint_;
44 using boost::compute::int_;
45
46 int_ data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
47 bc::vector<int_> vector(data, data + 12, queue);
48 BOOST_CHECK_EQUAL(vector.size(), size_t(12));
49
50 bc::vector<int_> result(12, context);
51 BOOST_CHECK_EQUAL(result.size(), size_t(12));
52
53 // inclusive scan
54 bc::inclusive_scan(vector.begin(), vector.end(), result.begin(), queue);
55 CHECK_RANGE_EQUAL(int_, 12, result, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66));
56
57 // in-place inclusive scan
58 CHECK_RANGE_EQUAL(int_, 12, vector, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
59 bc::inclusive_scan(vector.begin(), vector.end(), vector.begin(), queue);
60 CHECK_RANGE_EQUAL(int_, 12, vector, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66));
61
62 // scan_on_cpu
63
64 bc::copy(data, data + 12, vector.begin(), queue);
65
66 // make sure parallel scan_on_cpu is used, no serial_scan
67 std::string cache_key =
68 "__boost_scan_cpu_4";
69 boost::shared_ptr<bc::detail::parameter_cache> parameters =
70 bc::detail::parameter_cache::get_global_cache(device);
71
72 // save
73 uint_ map_copy_threshold =
74 parameters->get(cache_key, "serial_scan_threshold", 0);
75 // force parallel scan_on_cpu
76 parameters->set(cache_key, "serial_scan_threshold", 0);
77
78 // inclusive scan
79 bc::inclusive_scan(vector.begin(), vector.end(), result.begin(), queue);
80 CHECK_RANGE_EQUAL(int_, 12, result, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66));
81
82 // in-place inclusive scan
83 CHECK_RANGE_EQUAL(int_, 12, vector, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
84 bc::inclusive_scan(vector.begin(), vector.end(), vector.begin(), queue);
85 CHECK_RANGE_EQUAL(int_, 12, vector, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66));
86
87 // restore
88 parameters->set(cache_key, "serial_scan_threshold", map_copy_threshold);
89 }
90
BOOST_AUTO_TEST_CASE(exclusive_scan_int)91 BOOST_AUTO_TEST_CASE(exclusive_scan_int)
92 {
93 using boost::compute::uint_;
94 using boost::compute::int_;
95
96 int_ data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
97 bc::vector<int_> vector(data, data + 12, queue);
98 BOOST_CHECK_EQUAL(vector.size(), size_t(12));
99
100 bc::vector<int_> result(size_t(12), int_(0), queue);
101 BOOST_CHECK_EQUAL(result.size(), size_t(12));
102
103 // exclusive scan
104 bc::exclusive_scan(vector.begin(), vector.end(), result.begin(), queue);
105 CHECK_RANGE_EQUAL(int_, 12, result, (0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55));
106
107 // in-place exclusive scan
108 CHECK_RANGE_EQUAL(int_, 12, vector, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
109 bc::exclusive_scan(vector.begin(), vector.end(), vector.begin(), queue);
110 CHECK_RANGE_EQUAL(int_, 12, vector, (0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55));
111
112 // scan_on_cpu
113 bc::copy(data, data + 12, vector.begin(), queue);
114
115 // make sure parallel scan_on_cpu is used, no serial_scan
116 std::string cache_key =
117 "__boost_scan_cpu_4";
118 boost::shared_ptr<bc::detail::parameter_cache> parameters =
119 bc::detail::parameter_cache::get_global_cache(device);
120
121 // save
122 uint_ map_copy_threshold =
123 parameters->get(cache_key, "serial_scan_threshold", 0);
124 // force parallel scan_on_cpu
125 parameters->set(cache_key, "serial_scan_threshold", 0);
126
127 // exclusive scan
128 bc::exclusive_scan(vector.begin(), vector.end(), result.begin(), queue);
129 CHECK_RANGE_EQUAL(int_, 12, result, (0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55));
130
131 // in-place exclusive scan
132 CHECK_RANGE_EQUAL(int_, 12, vector, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11));
133 bc::exclusive_scan(vector.begin(), vector.end(), vector.begin(), queue);
134 CHECK_RANGE_EQUAL(int_, 12, vector, (0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55));
135
136 // restore
137 parameters->set(cache_key, "serial_scan_threshold", map_copy_threshold);
138 }
139
BOOST_AUTO_TEST_CASE(inclusive_scan_int2)140 BOOST_AUTO_TEST_CASE(inclusive_scan_int2)
141 {
142 using boost::compute::int_;
143 using boost::compute::uint_;
144 using boost::compute::int2_;
145
146 int_ data[] = { 1, 2,
147 3, 4,
148 5, 6,
149 7, 8,
150 9, 0 };
151
152 boost::compute::vector<int2_> input(reinterpret_cast<int2_*>(data),
153 reinterpret_cast<int2_*>(data) + 5,
154 queue);
155 BOOST_CHECK_EQUAL(input.size(), size_t(5));
156
157 boost::compute::vector<int2_> output(5, context);
158 boost::compute::inclusive_scan(input.begin(), input.end(), output.begin(),
159 queue);
160 CHECK_RANGE_EQUAL(
161 int2_, 5, output,
162 (int2_(1, 2), int2_(4, 6), int2_(9, 12), int2_(16, 20), int2_(25, 20))
163 );
164
165 // scan_on_cpu
166
167 // make sure parallel scan_on_cpu is used, no serial_scan
168 std::string cache_key =
169 "__boost_scan_cpu_8";
170 boost::shared_ptr<bc::detail::parameter_cache> parameters =
171 bc::detail::parameter_cache::get_global_cache(device);
172
173 // save
174 uint_ map_copy_threshold =
175 parameters->get(cache_key, "serial_scan_threshold", 0);
176 // force parallel scan_on_cpu
177 parameters->set(cache_key, "serial_scan_threshold", 0);
178
179 boost::compute::inclusive_scan(input.begin(), input.end(), output.begin(),
180 queue);
181 CHECK_RANGE_EQUAL(
182 int2_, 5, output,
183 (int2_(1, 2), int2_(4, 6), int2_(9, 12), int2_(16, 20), int2_(25, 20))
184 );
185
186 // restore
187 parameters->set(cache_key, "serial_scan_threshold", map_copy_threshold);
188 }
189
BOOST_AUTO_TEST_CASE(inclusive_scan_counting_iterator)190 BOOST_AUTO_TEST_CASE(inclusive_scan_counting_iterator)
191 {
192 using boost::compute::int_;
193 using boost::compute::uint_;
194
195 bc::vector<int_> result(10, context);
196 bc::inclusive_scan(bc::make_counting_iterator(1),
197 bc::make_counting_iterator(11),
198 result.begin(), queue);
199 CHECK_RANGE_EQUAL(int_, 10, result, (1, 3, 6, 10, 15, 21, 28, 36, 45, 55));
200
201 // scan_on_cpu
202
203 // make sure parallel scan_on_cpu is used, no serial_scan
204 std::string cache_key =
205 "__boost_scan_cpu_4";
206 boost::shared_ptr<bc::detail::parameter_cache> parameters =
207 bc::detail::parameter_cache::get_global_cache(device);
208
209 // save
210 uint_ map_copy_threshold =
211 parameters->get(cache_key, "serial_scan_threshold", 0);
212 // force parallel scan_on_cpu
213 parameters->set(cache_key, "serial_scan_threshold", 0);
214
215 bc::inclusive_scan(bc::make_counting_iterator(1),
216 bc::make_counting_iterator(11),
217 result.begin(), queue);
218 CHECK_RANGE_EQUAL(int_, 10, result, (1, 3, 6, 10, 15, 21, 28, 36, 45, 55));
219
220 // restore
221 parameters->set(cache_key, "serial_scan_threshold", map_copy_threshold);
222 }
223
BOOST_AUTO_TEST_CASE(exclusive_scan_counting_iterator)224 BOOST_AUTO_TEST_CASE(exclusive_scan_counting_iterator)
225 {
226 using boost::compute::int_;
227 using boost::compute::uint_;
228
229 bc::vector<int_> result(10, context);
230 bc::exclusive_scan(bc::make_counting_iterator(1),
231 bc::make_counting_iterator(11),
232 result.begin(), queue);
233 CHECK_RANGE_EQUAL(int_, 10, result, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45));
234
235 // scan_on_cpu
236
237 // make sure parallel scan_on_cpu is used, no serial_scan
238 std::string cache_key =
239 "__boost_scan_cpu_4";
240 boost::shared_ptr<bc::detail::parameter_cache> parameters =
241 bc::detail::parameter_cache::get_global_cache(device);
242
243 // save
244 uint_ map_copy_threshold =
245 parameters->get(cache_key, "serial_scan_threshold", 0);
246 // force parallel scan_on_cpu
247 parameters->set(cache_key, "serial_scan_threshold", 0);
248
249 bc::exclusive_scan(bc::make_counting_iterator(1),
250 bc::make_counting_iterator(11),
251 result.begin(), queue);
252 CHECK_RANGE_EQUAL(int_, 10, result, (0, 1, 3, 6, 10, 15, 21, 28, 36, 45));
253
254 // restore
255 parameters->set(cache_key, "serial_scan_threshold", map_copy_threshold);
256 }
257
BOOST_AUTO_TEST_CASE(inclusive_scan_transform_iterator)258 BOOST_AUTO_TEST_CASE(inclusive_scan_transform_iterator)
259 {
260 float data[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f };
261 bc::vector<float> input(data, data + 5, queue);
262 bc::vector<float> output(5, context);
263
264 // normal inclusive scan of the input
265 bc::inclusive_scan(input.begin(), input.end(), output.begin(), queue);
266 queue.finish();
267 BOOST_CHECK_CLOSE(float(output[0]), 1.0f, 1e-4f);
268 BOOST_CHECK_CLOSE(float(output[1]), 3.0f, 1e-4f);
269 BOOST_CHECK_CLOSE(float(output[2]), 6.0f, 1e-4f);
270 BOOST_CHECK_CLOSE(float(output[3]), 10.0f, 1e-4f);
271 BOOST_CHECK_CLOSE(float(output[4]), 15.0f, 1e-4f);
272
273 // inclusive scan of squares of the input
274 using ::boost::compute::_1;
275
276 bc::inclusive_scan(bc::make_transform_iterator(input.begin(), pown(_1, 2)),
277 bc::make_transform_iterator(input.end(), pown(_1, 2)),
278 output.begin(), queue);
279 queue.finish();
280 BOOST_CHECK_CLOSE(float(output[0]), 1.0f, 1e-4f);
281 BOOST_CHECK_CLOSE(float(output[1]), 5.0f, 1e-4f);
282 BOOST_CHECK_CLOSE(float(output[2]), 14.0f, 1e-4f);
283 BOOST_CHECK_CLOSE(float(output[3]), 30.0f, 1e-4f);
284 BOOST_CHECK_CLOSE(float(output[4]), 55.0f, 1e-4f);
285 }
286
BOOST_AUTO_TEST_CASE(inclusive_scan_doctest)287 BOOST_AUTO_TEST_CASE(inclusive_scan_doctest)
288 {
289 //! [inclusive_scan_int]
290 // setup input
291 int data[] = { 1, 2, 3, 4 };
292 boost::compute::vector<int> input(data, data + 4, queue);
293
294 // setup output
295 boost::compute::vector<int> output(4, context);
296
297 // scan values
298 boost::compute::inclusive_scan(
299 input.begin(), input.end(), output.begin(), queue
300 );
301
302 // output = [ 1, 3, 6, 10 ]
303 //! [inclusive_scan_int]
304
305 CHECK_RANGE_EQUAL(int, 4, output, (1, 3, 6, 10));
306 }
307
BOOST_AUTO_TEST_CASE(exclusive_scan_doctest)308 BOOST_AUTO_TEST_CASE(exclusive_scan_doctest)
309 {
310 //! [exclusive_scan_int]
311 // setup input
312 int data[] = { 1, 2, 3, 4 };
313 boost::compute::vector<int> input(data, data + 4, queue);
314
315 // setup output
316 boost::compute::vector<int> output(4, context);
317
318 // scan values
319 boost::compute::exclusive_scan(
320 input.begin(), input.end(), output.begin(), queue
321 );
322
323 // output = [ 0, 1, 3, 6 ]
324 //! [exclusive_scan_int]
325
326 CHECK_RANGE_EQUAL(int, 4, output, (0, 1, 3, 6));
327 }
328
BOOST_AUTO_TEST_CASE(inclusive_scan_int_multiplies)329 BOOST_AUTO_TEST_CASE(inclusive_scan_int_multiplies)
330 {
331 //! [inclusive_scan_int_multiplies]
332 // setup input
333 int data[] = { 1, 2, 1, 2, 3 };
334 boost::compute::vector<int> input(data, data + 5, queue);
335
336 // setup output
337 boost::compute::vector<int> output(5, context);
338
339 // inclusive scan with multiplication
340 boost::compute::inclusive_scan(
341 input.begin(), input.end(), output.begin(),
342 boost::compute::multiplies<int>(), queue
343 );
344
345 // output = [1, 2, 2, 4, 12]
346 //! [inclusive_scan_int_multiplies]
347
348 BOOST_CHECK_EQUAL(input.size(), size_t(5));
349 BOOST_CHECK_EQUAL(output.size(), size_t(5));
350
351 CHECK_RANGE_EQUAL(int, 5, output, (1, 2, 2, 4, 12));
352
353 // in-place inclusive scan
354 CHECK_RANGE_EQUAL(int, 5, input, (1, 2, 1, 2, 3));
355 boost::compute::inclusive_scan(input.begin(), input.end(), input.begin(),
356 boost::compute::multiplies<int>(), queue);
357 CHECK_RANGE_EQUAL(int, 5, input, (1, 2, 2, 4, 12));
358 }
359
BOOST_AUTO_TEST_CASE(exclusive_scan_int_multiplies)360 BOOST_AUTO_TEST_CASE(exclusive_scan_int_multiplies)
361 {
362 //! [exclusive_scan_int_multiplies]
363 // setup input
364 int data[] = { 1, 2, 1, 2, 3 };
365 boost::compute::vector<int> input(data, data + 5, queue);
366
367 // setup output
368 boost::compute::vector<int> output(5, context);
369
370 // exclusive_scan with multiplication
371 // initial value equals 10
372 boost::compute::exclusive_scan(
373 input.begin(), input.end(), output.begin(),
374 int(10), boost::compute::multiplies<int>(), queue
375 );
376
377 // output = [10, 10, 20, 20, 40]
378 //! [exclusive_scan_int_multiplies]
379
380 BOOST_CHECK_EQUAL(input.size(), size_t(5));
381 BOOST_CHECK_EQUAL(output.size(), size_t(5));
382
383 CHECK_RANGE_EQUAL(int, 5, output, (10, 10, 20, 20, 40));
384
385 // in-place exclusive scan
386 CHECK_RANGE_EQUAL(int, 5, input, (1, 2, 1, 2, 3));
387 bc::exclusive_scan(input.begin(), input.end(), input.begin(),
388 int(10), bc::multiplies<int>(), queue);
389 CHECK_RANGE_EQUAL(int, 5, input, (10, 10, 20, 20, 40));
390 }
391
BOOST_AUTO_TEST_CASE(inclusive_scan_int_multiplies_long_vector)392 BOOST_AUTO_TEST_CASE(inclusive_scan_int_multiplies_long_vector)
393 {
394 size_t size = 1000;
395 bc::vector<int> device_vector(size, int(2), queue);
396 BOOST_CHECK_EQUAL(device_vector.size(), size);
397 bc::inclusive_scan(device_vector.begin(), device_vector.end(),
398 device_vector.begin(), bc::multiplies<int>(), queue);
399
400 std::vector<int> host_vector(size, 2);
401 BOOST_CHECK_EQUAL(host_vector.size(), size);
402 bc::copy(device_vector.begin(), device_vector.end(),
403 host_vector.begin(), queue);
404
405 std::vector<int> test(size, 2);
406 BOOST_CHECK_EQUAL(test.size(), size);
407 std::partial_sum(test.begin(), test.end(),
408 test.begin(), std::multiplies<int>());
409
410 BOOST_CHECK_EQUAL_COLLECTIONS(host_vector.begin(), host_vector.end(),
411 test.begin(), test.end());
412 }
413
BOOST_AUTO_TEST_CASE(exclusive_scan_int_multiplies_long_vector)414 BOOST_AUTO_TEST_CASE(exclusive_scan_int_multiplies_long_vector)
415 {
416 size_t size = 1000;
417 bc::vector<int> device_vector(size, int(2), queue);
418 BOOST_CHECK_EQUAL(device_vector.size(), size);
419 bc::exclusive_scan(device_vector.begin(), device_vector.end(),
420 device_vector.begin(), int(10), bc::multiplies<int>(),
421 queue);
422
423 std::vector<int> host_vector(size, 2);
424 BOOST_CHECK_EQUAL(host_vector.size(), size);
425 bc::copy(device_vector.begin(), device_vector.end(),
426 host_vector.begin(), queue);
427
428 std::vector<int> test(size, 2);
429 BOOST_CHECK_EQUAL(test.size(), size);
430 test[0] = 10;
431 std::partial_sum(test.begin(), test.end(),
432 test.begin(), std::multiplies<int>());
433
434 BOOST_CHECK_EQUAL_COLLECTIONS(host_vector.begin(), host_vector.end(),
435 test.begin(), test.end());
436 }
437
BOOST_AUTO_TEST_CASE(inclusive_scan_int_custom_function)438 BOOST_AUTO_TEST_CASE(inclusive_scan_int_custom_function)
439 {
440 BOOST_COMPUTE_FUNCTION(int, multi, (int x, int y),
441 {
442 return x * y * 2;
443 });
444
445 int data[] = { 1, 2, 1, 2, 3 };
446 bc::vector<int> vector(data, data + 5, queue);
447 BOOST_CHECK_EQUAL(vector.size(), size_t(5));
448
449 bc::vector<int> result(5, context);
450 BOOST_CHECK_EQUAL(result.size(), size_t(5));
451
452 // inclusive scan
453 bc::inclusive_scan(vector.begin(), vector.end(), result.begin(),
454 multi, queue);
455 CHECK_RANGE_EQUAL(int, 5, result, (1, 4, 8, 32, 192));
456
457 // in-place inclusive scan
458 CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 1, 2, 3));
459 bc::inclusive_scan(vector.begin(), vector.end(), vector.begin(),
460 multi, queue);
461 CHECK_RANGE_EQUAL(int, 5, vector, (1, 4, 8, 32, 192));
462 }
463
BOOST_AUTO_TEST_CASE(exclusive_scan_int_custom_function)464 BOOST_AUTO_TEST_CASE(exclusive_scan_int_custom_function)
465 {
466 BOOST_COMPUTE_FUNCTION(int, multi, (int x, int y),
467 {
468 return x * y * 2;
469 });
470
471 int data[] = { 1, 2, 1, 2, 3 };
472 bc::vector<int> vector(data, data + 5, queue);
473 BOOST_CHECK_EQUAL(vector.size(), size_t(5));
474
475 bc::vector<int> result(5, context);
476 BOOST_CHECK_EQUAL(result.size(), size_t(5));
477
478 // exclusive_scan
479 bc::exclusive_scan(vector.begin(), vector.end(), result.begin(),
480 int(1), multi, queue);
481 CHECK_RANGE_EQUAL(int, 5, result, (1, 2, 8, 16, 64));
482
483 // in-place exclusive scan
484 CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 1, 2, 3));
485 bc::exclusive_scan(vector.begin(), vector.end(), vector.begin(),
486 int(1), multi, queue);
487 CHECK_RANGE_EQUAL(int, 5, vector, (1, 2, 8, 16, 64));
488 }
489
490 BOOST_AUTO_TEST_SUITE_END()
491