1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10
11 #define BOOST_TEST_MODULE TestKernel
12 #include <boost/test/unit_test.hpp>
13
14 #include <boost/compute/buffer.hpp>
15 #include <boost/compute/kernel.hpp>
16 #include <boost/compute/types.hpp>
17 #include <boost/compute/system.hpp>
18 #include <boost/compute/utility/source.hpp>
19
20 #include "context_setup.hpp"
21 #include "check_macros.hpp"
22
23 namespace compute = boost::compute;
24
BOOST_AUTO_TEST_CASE(name)25 BOOST_AUTO_TEST_CASE(name)
26 {
27 compute::kernel foo = compute::kernel::create_with_source(
28 "__kernel void foo(int x) { }", "foo", context
29 );
30 BOOST_CHECK_EQUAL(foo.name(), "foo");
31
32 compute::kernel bar = compute::kernel::create_with_source(
33 "__kernel void bar(float x) { }", "bar", context
34 );
35 BOOST_CHECK_EQUAL(bar.name(), "bar");
36 }
37
BOOST_AUTO_TEST_CASE(arity)38 BOOST_AUTO_TEST_CASE(arity)
39 {
40 compute::kernel foo = compute::kernel::create_with_source(
41 "__kernel void foo(int x) { }", "foo", context
42 );
43 BOOST_CHECK_EQUAL(foo.arity(), size_t(1));
44
45 compute::kernel bar = compute::kernel::create_with_source(
46 "__kernel void bar(float x, float y) { }", "bar", context
47 );
48 BOOST_CHECK_EQUAL(bar.arity(), size_t(2));
49
50 compute::kernel baz = compute::kernel::create_with_source(
51 "__kernel void baz(char x, char y, char z) { }", "baz", context
52 );
53 BOOST_CHECK_EQUAL(baz.arity(), size_t(3));
54 }
55
BOOST_AUTO_TEST_CASE(set_buffer_arg)56 BOOST_AUTO_TEST_CASE(set_buffer_arg)
57 {
58 const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
59 __kernel void foo(__global int *x, __global int *y)
60 {
61 x[get_global_id(0)] = -y[get_global_id(0)];
62 }
63 );
64
65 compute::kernel foo =
66 compute::kernel::create_with_source(source, "foo", context);
67
68 compute::buffer x(context, 16);
69 compute::buffer y(context, 16);
70
71 foo.set_arg(0, x);
72 foo.set_arg(1, y.get());
73 }
74
BOOST_AUTO_TEST_CASE(get_work_group_info)75 BOOST_AUTO_TEST_CASE(get_work_group_info)
76 {
77 const char source[] =
78 "__kernel void sum(__global const float *input,\n"
79 " __global float *output)\n"
80 "{\n"
81 " __local float scratch[16];\n"
82 " const uint gid = get_global_id(0);\n"
83 " const uint lid = get_local_id(0);\n"
84 " if(lid < 16)\n"
85 " scratch[lid] = input[gid];\n"
86 "}\n";
87
88 compute::program program =
89 compute::program::create_with_source(source, context);
90
91 program.build();
92
93 compute::kernel kernel = program.create_kernel("sum");
94
95 using compute::ulong_;
96
97 // get local memory size
98 kernel.get_work_group_info<ulong_>(device, CL_KERNEL_LOCAL_MEM_SIZE);
99
100 // check work group size
101 size_t work_group_size =
102 kernel.get_work_group_info<size_t>(device, CL_KERNEL_WORK_GROUP_SIZE);
103 BOOST_CHECK(work_group_size >= 1);
104 }
105
106 #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
BOOST_AUTO_TEST_CASE(kernel_set_args)107 BOOST_AUTO_TEST_CASE(kernel_set_args)
108 {
109 compute::kernel k = compute::kernel::create_with_source(
110 "__kernel void test(int x, float y, char z) { }", "test", context
111 );
112
113 k.set_args(4, 2.4f, 'a');
114 }
115 #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
116
117 // Originally failed to compile on macOS (several types are resolved differently)
BOOST_AUTO_TEST_CASE(kernel_set_args_mac)118 BOOST_AUTO_TEST_CASE(kernel_set_args_mac)
119 {
120 compute::kernel k = compute::kernel::create_with_source(
121 "__kernel void test(unsigned int a, unsigned long b) { }", "test", context
122 );
123
124 compute::uint_ a;
125 compute::ulong_ b;
126
127 k.set_arg(0, a);
128 k.set_arg(1, b);
129 }
130
131
132 #ifdef BOOST_COMPUTE_CL_VERSION_1_2
BOOST_AUTO_TEST_CASE(get_arg_info)133 BOOST_AUTO_TEST_CASE(get_arg_info)
134 {
135 REQUIRES_OPENCL_VERSION(1, 2);
136
137 const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
138 __kernel void sum_kernel(__global const int *input,
139 const uint size,
140 __global int *result)
141 {
142 int sum = 0;
143 for(uint i = 0; i < size; i++){
144 sum += input[i];
145 }
146 *result = sum;
147 }
148 );
149
150 compute::program program =
151 compute::program::create_with_source(source, context);
152
153 program.build("-cl-kernel-arg-info");
154
155 compute::kernel kernel = program.create_kernel("sum_kernel");
156
157 BOOST_CHECK_EQUAL(kernel.get_info<CL_KERNEL_NUM_ARGS>(), compute::uint_(3));
158
159 BOOST_CHECK_EQUAL(kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_TYPE_NAME), "int*");
160 BOOST_CHECK_EQUAL(kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME), "input");
161 BOOST_CHECK_EQUAL(kernel.get_arg_info<std::string>(1, CL_KERNEL_ARG_TYPE_NAME), "uint");
162 BOOST_CHECK_EQUAL(kernel.get_arg_info<std::string>(1, CL_KERNEL_ARG_NAME), "size");
163 BOOST_CHECK_EQUAL(kernel.get_arg_info<std::string>(2, CL_KERNEL_ARG_TYPE_NAME), "int*");
164 BOOST_CHECK_EQUAL(kernel.get_arg_info<std::string>(2, CL_KERNEL_ARG_NAME), "result");
165
166 BOOST_CHECK_EQUAL(kernel.get_arg_info<CL_KERNEL_ARG_TYPE_NAME>(0), "int*");
167 BOOST_CHECK_EQUAL(kernel.get_arg_info<CL_KERNEL_ARG_NAME>(0), "input");
168 BOOST_CHECK_EQUAL(kernel.get_arg_info<CL_KERNEL_ARG_TYPE_NAME>(1), "uint");
169 BOOST_CHECK_EQUAL(kernel.get_arg_info<CL_KERNEL_ARG_NAME>(1), "size");
170 BOOST_CHECK_EQUAL(kernel.get_arg_info<CL_KERNEL_ARG_TYPE_NAME>(2), "int*");
171 BOOST_CHECK_EQUAL(kernel.get_arg_info<CL_KERNEL_ARG_NAME>(2), "result");
172 }
173 #endif // BOOST_COMPUTE_CL_VERSION_1_2
174
175 #ifdef BOOST_COMPUTE_CL_VERSION_2_0
176 #ifndef CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR
177 #define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE
178 #endif
179 #ifndef CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR
180 #define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
181 #endif
BOOST_AUTO_TEST_CASE(get_sub_group_info_ext)182 BOOST_AUTO_TEST_CASE(get_sub_group_info_ext)
183 {
184 compute::kernel k = compute::kernel::create_with_source(
185 "__kernel void test(float x) { }", "test", context
186 );
187
188 // get_sub_group_info(const device&, cl_kernel_sub_group_info, const std::vector<size_t>)
189 std::vector<size_t> local_work_size(2, size_t(64));
190 boost::optional<size_t> count = k.get_sub_group_info<size_t>(
191 device,
192 CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
193 local_work_size
194 );
195
196 #ifdef BOOST_COMPUTE_CL_VERSION_2_1
197 if(device.check_version(2, 1))
198 {
199 BOOST_CHECK(count);
200 }
201 else
202 #endif // BOOST_COMPUTE_CL_VERSION_2_1
203 if(device.check_version(2, 0) && device.supports_extension("cl_khr_subgroups"))
204 {
205 // for device with cl_khr_subgroups it should return some value
206 BOOST_CHECK(count);
207 }
208 else
209 {
210 // for device without cl_khr_subgroups ext it should return null optional
211 BOOST_CHECK(count == boost::none);
212 }
213
214 // get_sub_group_info(const device&, cl_kernel_sub_group_info, const size_t, const void *)
215 count = k.get_sub_group_info<size_t>(
216 device,
217 CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
218 2 * sizeof(size_t),
219 &local_work_size[0]
220 );
221
222 #ifdef BOOST_COMPUTE_CL_VERSION_2_1
223 if(device.check_version(2, 1))
224 {
225 BOOST_CHECK(count);
226 }
227 else
228 #endif // BOOST_COMPUTE_CL_VERSION_2_1
229 if(device.check_version(2, 0) && device.supports_extension("cl_khr_subgroups"))
230 {
231 // for device with cl_khr_subgroups it should return some value
232 BOOST_CHECK(count);
233 }
234 else
235 {
236 // for device without cl_khr_subgroups ext it should return null optional
237 BOOST_CHECK(count == boost::none);
238 }
239 }
240 #endif // BOOST_COMPUTE_CL_VERSION_2_0
241
242 #ifdef BOOST_COMPUTE_CL_VERSION_2_1
BOOST_AUTO_TEST_CASE(get_sub_group_info_core)243 BOOST_AUTO_TEST_CASE(get_sub_group_info_core)
244 {
245 compute::kernel k = compute::kernel::create_with_source(
246 "__kernel void test(float x) { }", "test", context
247 );
248
249 // get_sub_group_info(const device&, cl_kernel_sub_group_info, const size_t)
250 boost::optional<std::vector<size_t>> local_size =
251 k.get_sub_group_info<std::vector<size_t> >(
252 device,
253 CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
254 size_t(1)
255 );
256
257 if(device.check_version(2, 1))
258 {
259 // for 2.1 devices it should return some value
260 BOOST_CHECK(local_size);
261 BOOST_CHECK(local_size.value().size() == 3);
262 }
263 else
264 {
265 // for 1.x and 2.0 devices it should return null optional,
266 // because CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT is not
267 // supported by cl_khr_subgroups (2.0 ext)
268 BOOST_CHECK(local_size == boost::none);
269 }
270
271 // get_sub_group_info(const device&, cl_kernel_sub_group_info, const size_t)
272 boost::optional<size_t> local_size_simple =
273 k.get_sub_group_info<size_t>(
274 device,
275 CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
276 size_t(1)
277 );
278
279 if(device.check_version(2, 1))
280 {
281 // for 2.1 devices it should return some value
282 BOOST_CHECK(local_size_simple);
283 }
284 else
285 {
286 // for 1.x and 2.0 devices it should return null optional,
287 // because CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT is not
288 // supported by cl_khr_subgroups (2.0 ext)
289 BOOST_CHECK(local_size_simple == boost::none);
290 }
291
292 // get_sub_group_info(const device&, cl_kernel_sub_group_info)
293 boost::optional<size_t> max =
294 k.get_sub_group_info<size_t>(
295 device,
296 CL_KERNEL_MAX_NUM_SUB_GROUPS
297 );
298
299 if(device.check_version(2, 1))
300 {
301 // for 2.1 devices it should return some value
302 BOOST_CHECK(max);
303 }
304 else
305 {
306 // for 1.x and 2.0 devices it should return null optional,
307 // because CL_KERNEL_MAX_NUM_SUB_GROUPS is not
308 // supported by cl_khr_subgroups (2.0 ext)
309 BOOST_CHECK(max == boost::none);
310 }
311 }
312 #endif // BOOST_COMPUTE_CL_VERSION_2_1
313
314 #ifdef BOOST_COMPUTE_CL_VERSION_2_1
BOOST_AUTO_TEST_CASE(clone_kernel)315 BOOST_AUTO_TEST_CASE(clone_kernel)
316 {
317 REQUIRES_OPENCL_PLATFORM_VERSION(2, 1);
318
319 compute::kernel k1 = compute::kernel::create_with_source(
320 "__kernel void test(__global int * x) { x[get_global_id(0)] = get_global_id(0); }",
321 "test", context
322 );
323
324 compute::buffer x(context, 5 * sizeof(compute::int_));
325 k1.set_arg(0, x);
326
327 // Clone k1 kernel
328 compute::kernel k2 = k1.clone();
329 // After clone k2 0th argument (__global float * x) should be set,
330 // so we should be able to enqueue k2 kernel without problems
331 queue.enqueue_1d_range_kernel(k2, 0, x.size() / sizeof(compute::int_), 0).wait();
332 }
333 #endif // BOOST_COMPUTE_CL_VERSION_2_1
334
335 BOOST_AUTO_TEST_SUITE_END()
336