1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10
11 #define BOOST_TEST_MODULE TestCommandQueue
12 #include <boost/test/unit_test.hpp>
13
14 #include <iostream>
15
16 #include <boost/compute/kernel.hpp>
17 #include <boost/compute/system.hpp>
18 #include <boost/compute/program.hpp>
19 #include <boost/compute/command_queue.hpp>
20 #include <boost/compute/algorithm/fill.hpp>
21 #include <boost/compute/container/vector.hpp>
22 #include <boost/compute/utility/dim.hpp>
23 #include <boost/compute/utility/source.hpp>
24 #include <boost/compute/detail/diagnostic.hpp>
25
26 #include "check_macros.hpp"
27 #include "context_setup.hpp"
28
29 namespace bc = boost::compute;
30 namespace compute = boost::compute;
31
BOOST_AUTO_TEST_CASE(get_context)32 BOOST_AUTO_TEST_CASE(get_context)
33 {
34 BOOST_VERIFY(queue.get_context() == context);
35 BOOST_VERIFY(queue.get_info<CL_QUEUE_CONTEXT>() == context.get());
36 }
37
BOOST_AUTO_TEST_CASE(get_device)38 BOOST_AUTO_TEST_CASE(get_device)
39 {
40 BOOST_VERIFY(queue.get_info<CL_QUEUE_DEVICE>() == device.get());
41 }
42
BOOST_AUTO_TEST_CASE(equality_operator)43 BOOST_AUTO_TEST_CASE(equality_operator)
44 {
45 compute::command_queue queue1(context, device);
46 BOOST_CHECK(queue1 == queue1);
47
48 compute::command_queue queue2 = queue1;
49 BOOST_CHECK(queue1 == queue2);
50
51 compute::command_queue queue3(context, device);
52 BOOST_CHECK(queue1 != queue3);
53 }
54
BOOST_AUTO_TEST_CASE(event_profiling)55 BOOST_AUTO_TEST_CASE(event_profiling)
56 {
57 bc::command_queue queue(context, device, bc::command_queue::enable_profiling);
58
59 int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
60 bc::buffer buffer(context, sizeof(data));
61
62 bc::event event =
63 queue.enqueue_write_buffer_async(buffer,
64 0,
65 sizeof(data),
66 static_cast<const void *>(data));
67 queue.finish();
68
69 event.get_profiling_info<cl_ulong>(bc::event::profiling_command_queued);
70 event.get_profiling_info<cl_ulong>(bc::event::profiling_command_submit);
71 event.get_profiling_info<cl_ulong>(bc::event::profiling_command_start);
72 event.get_profiling_info<cl_ulong>(bc::event::profiling_command_end);
73 }
74
BOOST_AUTO_TEST_CASE(kernel_profiling)75 BOOST_AUTO_TEST_CASE(kernel_profiling)
76 {
77 // create queue with profiling enabled
78 boost::compute::command_queue queue(
79 context, device, boost::compute::command_queue::enable_profiling
80 );
81
82 // input data
83 int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
84 boost::compute::buffer buffer(context, sizeof(data));
85
86 // copy input data to device
87 queue.enqueue_write_buffer(buffer, 0, sizeof(data), data);
88
89 // setup kernel
90 const char source[] =
91 "__kernel void iscal(__global int *buffer, int alpha)\n"
92 "{\n"
93 " buffer[get_global_id(0)] *= alpha;\n"
94 "}\n";
95
96 boost::compute::program program =
97 boost::compute::program::create_with_source(source, context);
98 program.build();
99
100 boost::compute::kernel kernel(program, "iscal");
101 kernel.set_arg(0, buffer);
102 kernel.set_arg(1, 2);
103
104 // execute kernel
105 size_t global_work_offset = 0;
106 size_t global_work_size = 8;
107
108 boost::compute::event event =
109 queue.enqueue_nd_range_kernel(kernel,
110 size_t(1),
111 &global_work_offset,
112 &global_work_size,
113 0);
114
115 // wait until kernel is finished
116 event.wait();
117
118 // check profiling information
119 event.get_profiling_info<cl_ulong>(bc::event::profiling_command_queued);
120 event.get_profiling_info<cl_ulong>(bc::event::profiling_command_submit);
121 event.get_profiling_info<cl_ulong>(bc::event::profiling_command_start);
122 event.get_profiling_info<cl_ulong>(bc::event::profiling_command_end);
123
124 // read results back to host
125 queue.enqueue_read_buffer(buffer, 0, sizeof(data), data);
126
127 // check results
128 BOOST_CHECK_EQUAL(data[0], 2);
129 BOOST_CHECK_EQUAL(data[1], 4);
130 BOOST_CHECK_EQUAL(data[2], 6);
131 BOOST_CHECK_EQUAL(data[3], 8);
132 BOOST_CHECK_EQUAL(data[4], 10);
133 BOOST_CHECK_EQUAL(data[5], 12);
134 BOOST_CHECK_EQUAL(data[6], 14);
135 BOOST_CHECK_EQUAL(data[7], 16);
136 }
137
BOOST_AUTO_TEST_CASE(construct_from_cl_command_queue)138 BOOST_AUTO_TEST_CASE(construct_from_cl_command_queue)
139 {
140 // create cl_command_queue
141 cl_command_queue cl_queue;
142 #ifdef BOOST_COMPUTE_CL_VERSION_2_0
143 if (device.check_version(2, 0)){ // runtime check
144 cl_queue =
145 clCreateCommandQueueWithProperties(context, device.id(), 0, 0);
146 } else
147 #endif // BOOST_COMPUTE_CL_VERSION_2_0
148 {
149 // Suppress deprecated declarations warning
150 BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
151 cl_queue =
152 clCreateCommandQueue(context, device.id(), 0, 0);
153 BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
154 }
155 BOOST_VERIFY(cl_queue);
156
157 // create boost::compute::command_queue
158 boost::compute::command_queue queue(cl_queue);
159
160 // check queue
161 BOOST_CHECK(queue.get_context() == context);
162 BOOST_CHECK(cl_command_queue(queue) == cl_queue);
163
164 // cleanup cl_command_queue
165 clReleaseCommandQueue(cl_queue);
166 }
167
168 #ifdef BOOST_COMPUTE_CL_VERSION_1_1
BOOST_AUTO_TEST_CASE(write_buffer_rect)169 BOOST_AUTO_TEST_CASE(write_buffer_rect)
170 {
171 REQUIRES_OPENCL_VERSION(1, 1);
172
173 // skip this test on AMD GPUs due to a buggy implementation
174 // of the clEnqueueWriteBufferRect() function
175 if(device.vendor() == "Advanced Micro Devices, Inc." &&
176 device.type() & boost::compute::device::gpu){
177 std::cerr << "skipping write_buffer_rect test on AMD GPU" << std::endl;
178 return;
179 }
180
181 int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
182 boost::compute::buffer buffer(context, 8 * sizeof(int));
183
184 // copy every other value to the buffer
185 size_t buffer_origin[] = { 0, 0, 0 };
186 size_t host_origin[] = { 0, 0, 0 };
187 size_t region[] = { sizeof(int), sizeof(int), 1 };
188
189 queue.enqueue_write_buffer_rect(
190 buffer,
191 buffer_origin,
192 host_origin,
193 region,
194 sizeof(int),
195 0,
196 2 * sizeof(int),
197 0,
198 data
199 );
200
201 // check output values
202 int output[4];
203 queue.enqueue_read_buffer(buffer, 0, 4 * sizeof(int), output);
204 BOOST_CHECK_EQUAL(output[0], 1);
205 BOOST_CHECK_EQUAL(output[1], 3);
206 BOOST_CHECK_EQUAL(output[2], 5);
207 BOOST_CHECK_EQUAL(output[3], 7);
208 }
209 #endif // BOOST_COMPUTE_CL_VERSION_1_1
210
211 static bool nullary_kernel_executed = false;
212
nullary_kernel()213 static void nullary_kernel()
214 {
215 nullary_kernel_executed = true;
216 }
217
BOOST_AUTO_TEST_CASE(native_kernel)218 BOOST_AUTO_TEST_CASE(native_kernel)
219 {
220 cl_device_exec_capabilities exec_capabilities =
221 device.get_info<CL_DEVICE_EXECUTION_CAPABILITIES>();
222 if(!(exec_capabilities & CL_EXEC_NATIVE_KERNEL)){
223 std::cerr << "skipping native_kernel test: "
224 << "device does not support CL_EXEC_NATIVE_KERNEL"
225 << std::endl;
226 return;
227 }
228
229 compute::vector<int> vector(1000, context);
230 compute::fill(vector.begin(), vector.end(), 42, queue);
231 BOOST_CHECK_EQUAL(nullary_kernel_executed, false);
232 queue.enqueue_native_kernel(&nullary_kernel);
233 queue.finish();
234 BOOST_CHECK_EQUAL(nullary_kernel_executed, true);
235 }
236
BOOST_AUTO_TEST_CASE(copy_with_wait_list)237 BOOST_AUTO_TEST_CASE(copy_with_wait_list)
238 {
239 int data1[] = { 1, 3, 5, 7 };
240 int data2[] = { 2, 4, 6, 8 };
241
242 compute::buffer buf1(context, 4 * sizeof(int));
243 compute::buffer buf2(context, 4 * sizeof(int));
244
245 compute::event write_event1 =
246 queue.enqueue_write_buffer_async(buf1, 0, buf1.size(), data1);
247
248 compute::event write_event2 =
249 queue.enqueue_write_buffer_async(buf2, 0, buf2.size(), data2);
250
251 compute::event read_event1 =
252 queue.enqueue_read_buffer_async(buf1, 0, buf1.size(), data2, write_event1);
253
254 compute::event read_event2 =
255 queue.enqueue_read_buffer_async(buf2, 0, buf2.size(), data1, write_event2);
256
257 read_event1.wait();
258 read_event2.wait();
259
260 CHECK_HOST_RANGE_EQUAL(int, 4, data1, (2, 4, 6, 8));
261 CHECK_HOST_RANGE_EQUAL(int, 4, data2, (1, 3, 5, 7));
262 }
263
264 #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
BOOST_AUTO_TEST_CASE(enqueue_kernel_with_extents)265 BOOST_AUTO_TEST_CASE(enqueue_kernel_with_extents)
266 {
267 using boost::compute::dim;
268 using boost::compute::uint_;
269
270 const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
271 __kernel void foo(__global int *output1, __global int *output2)
272 {
273 output1[get_global_id(0)] = get_local_id(0);
274 output2[get_global_id(1)] = get_local_id(1);
275 }
276 );
277
278 compute::kernel kernel =
279 compute::kernel::create_with_source(source, "foo", context);
280
281 compute::vector<uint_> output1(4, context);
282 compute::vector<uint_> output2(4, context);
283
284 kernel.set_arg(0, output1);
285 kernel.set_arg(1, output2);
286
287 queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(1, 1));
288
289 CHECK_RANGE_EQUAL(int, 4, output1, (0, 0, 0, 0));
290 CHECK_RANGE_EQUAL(int, 4, output2, (0, 0, 0, 0));
291
292 // Maximum number of work-items that can be specified in each
293 // dimension of the work-group to clEnqueueNDRangeKernel.
294 std::vector<size_t> max_work_item_sizes =
295 device.get_info<CL_DEVICE_MAX_WORK_ITEM_SIZES>();
296
297 if(max_work_item_sizes[0] < size_t(2)) {
298 return;
299 }
300
301 queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 1));
302
303 CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1));
304 CHECK_RANGE_EQUAL(int, 4, output2, (0, 0, 0, 0));
305
306 if(max_work_item_sizes[1] < size_t(2)) {
307 return;
308 }
309
310 queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 2));
311
312 CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1));
313 CHECK_RANGE_EQUAL(int, 4, output2, (0, 1, 0, 1));
314 }
315 #endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
316
317 #ifdef BOOST_COMPUTE_CL_VERSION_2_1
BOOST_AUTO_TEST_CASE(get_default_device_queue)318 BOOST_AUTO_TEST_CASE(get_default_device_queue)
319 {
320 REQUIRES_OPENCL_VERSION(2, 1);
321
322 boost::compute::command_queue default_device_queue(
323 context, device,
324 boost::compute::command_queue::on_device |
325 boost::compute::command_queue::on_device_default |
326 boost::compute::command_queue::enable_out_of_order_execution
327 );
328 BOOST_CHECK_NO_THROW(queue.get_info<CL_QUEUE_DEVICE_DEFAULT>());
329 BOOST_CHECK_EQUAL(
330 queue.get_default_device_queue(),
331 default_device_queue
332 );
333 }
334
BOOST_AUTO_TEST_CASE(set_as_default_device_queue)335 BOOST_AUTO_TEST_CASE(set_as_default_device_queue)
336 {
337 REQUIRES_OPENCL_VERSION(2, 1);
338
339 boost::compute::command_queue new_default_device_queue(
340 context, device,
341 boost::compute::command_queue::on_device |
342 boost::compute::command_queue::enable_out_of_order_execution
343 );
344 new_default_device_queue.set_as_default_device_queue();
345 BOOST_CHECK_EQUAL(
346 queue.get_default_device_queue(),
347 new_default_device_queue
348 );
349 }
350 #endif
351
352 BOOST_AUTO_TEST_SUITE_END()
353