• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10 
11 #ifndef BOOST_COMPUTE_KERNEL_HPP
12 #define BOOST_COMPUTE_KERNEL_HPP
13 
14 #include <string>
15 
16 #include <boost/assert.hpp>
17 #include <boost/utility/enable_if.hpp>
18 #include <boost/optional.hpp>
19 
20 #include <boost/compute/cl_ext.hpp> // cl_khr_subgroups
21 
22 #include <boost/compute/config.hpp>
23 #include <boost/compute/exception.hpp>
24 #include <boost/compute/program.hpp>
25 #include <boost/compute/platform.hpp>
26 #include <boost/compute/type_traits/is_fundamental.hpp>
27 #include <boost/compute/detail/diagnostic.hpp>
28 #include <boost/compute/detail/get_object_info.hpp>
29 #include <boost/compute/detail/assert_cl_success.hpp>
30 
31 namespace boost {
32 namespace compute {
33 namespace detail {
34 
35 template<class T> struct set_kernel_arg;
36 
37 } // end detail namespace
38 
39 /// \class kernel
40 /// \brief A compute kernel.
41 ///
42 /// \see command_queue, program
43 class kernel
44 {
45 public:
46     /// Creates a null kernel object.
kernel()47     kernel()
48         : m_kernel(0)
49     {
50     }
51 
52     /// Creates a new kernel object for \p kernel. If \p retain is
53     /// \c true, the reference count for \p kernel will be incremented.
kernel(cl_kernel kernel,bool retain=true)54     explicit kernel(cl_kernel kernel, bool retain = true)
55         : m_kernel(kernel)
56     {
57         if(m_kernel && retain){
58             clRetainKernel(m_kernel);
59         }
60     }
61 
62     /// Creates a new kernel object with \p name from \p program.
kernel(const program & program,const std::string & name)63     kernel(const program &program, const std::string &name)
64     {
65         cl_int error = 0;
66         m_kernel = clCreateKernel(program.get(), name.c_str(), &error);
67 
68         if(!m_kernel){
69             BOOST_THROW_EXCEPTION(opencl_error(error));
70         }
71     }
72 
73     /// Creates a new kernel object as a copy of \p other.
kernel(const kernel & other)74     kernel(const kernel &other)
75         : m_kernel(other.m_kernel)
76     {
77         if(m_kernel){
78             clRetainKernel(m_kernel);
79         }
80     }
81 
82     /// Copies the kernel object from \p other to \c *this.
operator =(const kernel & other)83     kernel& operator=(const kernel &other)
84     {
85         if(this != &other){
86             if(m_kernel){
87                 clReleaseKernel(m_kernel);
88             }
89 
90             m_kernel = other.m_kernel;
91 
92             if(m_kernel){
93                 clRetainKernel(m_kernel);
94             }
95         }
96 
97         return *this;
98     }
99 
100     #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
101     /// Move-constructs a new kernel object from \p other.
kernel(kernel && other)102     kernel(kernel&& other) BOOST_NOEXCEPT
103         : m_kernel(other.m_kernel)
104     {
105         other.m_kernel = 0;
106     }
107 
108     /// Move-assigns the kernel from \p other to \c *this.
operator =(kernel && other)109     kernel& operator=(kernel&& other) BOOST_NOEXCEPT
110     {
111         if(m_kernel){
112             clReleaseKernel(m_kernel);
113         }
114 
115         m_kernel = other.m_kernel;
116         other.m_kernel = 0;
117 
118         return *this;
119     }
120     #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
121 
122     /// Destroys the kernel object.
~kernel()123     ~kernel()
124     {
125         if(m_kernel){
126             BOOST_COMPUTE_ASSERT_CL_SUCCESS(
127                 clReleaseKernel(m_kernel)
128             );
129         }
130     }
131 
132     #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
133     /// Creates a new kernel object based on a shallow copy of
134     /// the undelying OpenCL kernel object.
135     ///
136     /// \opencl_version_warning{2,1}
137     ///
138     /// \see_opencl21_ref{clCloneKernel}
clone()139     kernel clone()
140     {
141         cl_int ret = 0;
142         cl_kernel k = clCloneKernel(m_kernel, &ret);
143         return kernel(k, false);
144     }
145     #endif // BOOST_COMPUTE_CL_VERSION_2_1
146 
147     /// Returns a reference to the underlying OpenCL kernel object.
get() const148     cl_kernel& get() const
149     {
150         return const_cast<cl_kernel &>(m_kernel);
151     }
152 
153     /// Returns the function name for the kernel.
name() const154     std::string name() const
155     {
156         return get_info<std::string>(CL_KERNEL_FUNCTION_NAME);
157     }
158 
159     /// Returns the number of arguments for the kernel.
arity() const160     size_t arity() const
161     {
162         return get_info<cl_uint>(CL_KERNEL_NUM_ARGS);
163     }
164 
165     /// Returns the program for the kernel.
get_program() const166     program get_program() const
167     {
168         return program(get_info<cl_program>(CL_KERNEL_PROGRAM));
169     }
170 
171     /// Returns the context for the kernel.
get_context() const172     context get_context() const
173     {
174         return context(get_info<cl_context>(CL_KERNEL_CONTEXT));
175     }
176 
177     /// Returns information about the kernel.
178     ///
179     /// \see_opencl_ref{clGetKernelInfo}
180     template<class T>
get_info(cl_kernel_info info) const181     T get_info(cl_kernel_info info) const
182     {
183         return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info);
184     }
185 
186     /// \overload
187     template<int Enum>
188     typename detail::get_object_info_type<kernel, Enum>::type
189     get_info() const;
190 
191     #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
192     /// Returns information about the argument at \p index.
193     ///
194     /// For example, to get the name of the first argument:
195     /// \code
196     /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME);
197     /// \endcode
198     ///
199     /// Note, this function requires that the program be compiled with the
200     /// \c "-cl-kernel-arg-info" flag. For example:
201     /// \code
202     /// program.build("-cl-kernel-arg-info");
203     /// \endcode
204     ///
205     /// \opencl_version_warning{1,2}
206     ///
207     /// \see_opencl_ref{clGetKernelArgInfo}
208     template<class T>
get_arg_info(size_t index,cl_kernel_arg_info info) const209     T get_arg_info(size_t index, cl_kernel_arg_info info) const
210     {
211         return detail::get_object_info<T>(
212             clGetKernelArgInfo, m_kernel, info, static_cast<cl_uint>(index)
213         );
214     }
215 
216     /// \overload
217     template<int Enum>
218     typename detail::get_object_info_type<kernel, Enum>::type
219     get_arg_info(size_t index) const;
220     #endif // BOOST_COMPUTE_CL_VERSION_1_2
221 
222     /// Returns work-group information for the kernel with \p device.
223     ///
224     /// \see_opencl_ref{clGetKernelWorkGroupInfo}
225     template<class T>
get_work_group_info(const device & device,cl_kernel_work_group_info info) const226     T get_work_group_info(const device &device, cl_kernel_work_group_info info) const
227     {
228         return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id());
229     }
230 
231     #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
232     /// Returns sub-group information for the kernel with \p device. Returns a null
233     /// optional if \p device is not 2.1 device, or is not 2.0 device with support
234     /// for cl_khr_subgroups extension.
235     ///
236     /// \opencl_version_warning{2,1}
237     /// \see_opencl21_ref{clGetKernelSubGroupInfo}
238     /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
239     template<class T>
get_sub_group_info(const device & device,cl_kernel_sub_group_info info,const size_t input_size,const void * input) const240     boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
241                                           const size_t input_size, const void * input) const
242     {
243         if(device.check_version(2, 1))
244         {
245             return detail::get_object_info<T>(
246                 clGetKernelSubGroupInfo, m_kernel, info, device.id(), input_size, input
247             );
248         }
249         else if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
250         {
251             return boost::optional<T>();
252         }
253         // Only CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE and CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
254         // are supported in cl_khr_subgroups extension for 2.0 devices.
255         else if(info != CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE && info != CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)
256         {
257             return boost::optional<T>();
258         }
259 
260         BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
261         clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
262             reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
263                 reinterpret_cast<size_t>(
264                     device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
265                 )
266             );
267         BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
268 
269         return detail::get_object_info<T>(
270             clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
271         );
272     }
273 
274     /// \overload
275     template<class T>
get_sub_group_info(const device & device,cl_kernel_sub_group_info info) const276     boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info) const
277     {
278         return get_sub_group_info<T>(device, info, 0, 0);
279     }
280 
281     /// \overload
282     template<class T>
get_sub_group_info(const device & device,cl_kernel_sub_group_info info,const size_t input) const283     boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
284                                           const size_t input) const
285     {
286         return get_sub_group_info<T>(device, info, sizeof(size_t), &input);
287     }
288     #endif // BOOST_COMPUTE_CL_VERSION_2_1
289 
290     #if defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
291     /// Returns sub-group information for the kernel with \p device. Returns a null
292     /// optional if cl_khr_subgroups extension is not supported by \p device.
293     ///
294     /// \opencl_version_warning{2,0}
295     /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
296     template<class T>
get_sub_group_info(const device & device,cl_kernel_sub_group_info info,const size_t input_size,const void * input) const297     boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
298                                           const size_t input_size, const void * input) const
299     {
300         if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
301         {
302             return boost::optional<T>();
303         }
304 
305         BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
306         clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
307             reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
308                 reinterpret_cast<size_t>(
309                     device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
310                 )
311             );
312         BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
313 
314         return detail::get_object_info<T>(
315             clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
316         );
317     }
318     #endif // defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
319 
320     #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
321     /// \overload
322     template<class T>
get_sub_group_info(const device & device,cl_kernel_sub_group_info info,const std::vector<size_t> input) const323     boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
324                                           const std::vector<size_t> input) const
325     {
326         BOOST_ASSERT(input.size() > 0);
327         return get_sub_group_info<T>(device, info, input.size() * sizeof(size_t), &input[0]);
328     }
329     #endif // BOOST_COMPUTE_CL_VERSION_2_0
330 
331     /// Sets the argument at \p index to \p value with \p size.
332     ///
333     /// \see_opencl_ref{clSetKernelArg}
set_arg(size_t index,size_t size,const void * value)334     void set_arg(size_t index, size_t size, const void *value)
335     {
336         BOOST_ASSERT(index < arity());
337 
338         cl_int ret = clSetKernelArg(m_kernel,
339                                     static_cast<cl_uint>(index),
340                                     size,
341                                     value);
342         if(ret != CL_SUCCESS){
343             BOOST_THROW_EXCEPTION(opencl_error(ret));
344         }
345     }
346 
347     /// Sets the argument at \p index to \p value.
348     ///
349     /// For built-in types (e.g. \c float, \c int4_), this is equivalent to
350     /// calling set_arg(index, sizeof(type), &value).
351     ///
352     /// Additionally, this method is specialized for device memory objects
353     /// such as buffer and image2d. This allows for them to be passed directly
354     /// without having to extract their underlying cl_mem object.
355     ///
356     /// This method is also specialized for device container types such as
357     /// vector<T> and array<T, N>. This allows for them to be passed directly
358     /// as kernel arguments without having to extract their underlying buffer.
359     ///
360     /// For setting local memory arguments (e.g. "__local float *buf"), the
361     /// local_buffer<T> class may be used:
362     /// \code
363     /// // set argument to a local buffer with storage for 32 float's
364     /// kernel.set_arg(0, local_buffer<float>(32));
365     /// \endcode
366     template<class T>
set_arg(size_t index,const T & value)367     void set_arg(size_t index, const T &value)
368     {
369         // if you get a compilation error pointing here it means you
370         // attempted to set a kernel argument from an invalid type.
371         detail::set_kernel_arg<T>()(*this, index, value);
372     }
373 
374     /// \internal_
set_arg(size_t index,const cl_mem mem)375     void set_arg(size_t index, const cl_mem mem)
376     {
377         set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem));
378     }
379 
380     /// \internal_
set_arg(size_t index,const cl_sampler sampler)381     void set_arg(size_t index, const cl_sampler sampler)
382     {
383         set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler));
384     }
385 
386     /// \internal_
set_arg_svm_ptr(size_t index,void * ptr)387     void set_arg_svm_ptr(size_t index, void* ptr)
388     {
389         #ifdef BOOST_COMPUTE_CL_VERSION_2_0
390         cl_int ret = clSetKernelArgSVMPointer(m_kernel, static_cast<cl_uint>(index), ptr);
391         if(ret != CL_SUCCESS){
392             BOOST_THROW_EXCEPTION(opencl_error(ret));
393         }
394         #else
395         (void) index;
396         (void) ptr;
397         BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE));
398         #endif
399     }
400 
401     #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
402     /// Sets the arguments for the kernel to \p args.
403     template<class... T>
set_args(T &&...args)404     void set_args(T&&... args)
405     {
406         BOOST_ASSERT(sizeof...(T) <= arity());
407 
408         _set_args<0>(args...);
409     }
410     #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
411 
412     #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
413     /// Sets additional execution information for the kernel.
414     ///
415     /// \opencl_version_warning{2,0}
416     ///
417     /// \see_opencl2_ref{clSetKernelExecInfo}
set_exec_info(cl_kernel_exec_info info,size_t size,const void * value)418     void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value)
419     {
420         cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value);
421         if(ret != CL_SUCCESS){
422             BOOST_THROW_EXCEPTION(opencl_error(ret));
423         }
424     }
425     #endif // BOOST_COMPUTE_CL_VERSION_2_0
426 
427     /// Returns \c true if the kernel is the same at \p other.
operator ==(const kernel & other) const428     bool operator==(const kernel &other) const
429     {
430         return m_kernel == other.m_kernel;
431     }
432 
433     /// Returns \c true if the kernel is different from \p other.
operator !=(const kernel & other) const434     bool operator!=(const kernel &other) const
435     {
436         return m_kernel != other.m_kernel;
437     }
438 
439     /// \internal_
operator cl_kernel() const440     operator cl_kernel() const
441     {
442         return m_kernel;
443     }
444 
445     /// \internal_
create_with_source(const std::string & source,const std::string & name,const context & context)446     static kernel create_with_source(const std::string &source,
447                                      const std::string &name,
448                                      const context &context)
449     {
450         return program::build_with_source(source, context).create_kernel(name);
451     }
452 
453 private:
454     #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
455     /// \internal_
456     template<size_t N>
_set_args()457     void _set_args()
458     {
459     }
460 
461     /// \internal_
462     template<size_t N, class T, class... Args>
_set_args(T && arg,Args &&...rest)463     void _set_args(T&& arg, Args&&... rest)
464     {
465         set_arg(N, arg);
466         _set_args<N+1>(rest...);
467     }
468     #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
469 
470 private:
471     cl_kernel m_kernel;
472 };
473 
create_kernel(const std::string & name) const474 inline kernel program::create_kernel(const std::string &name) const
475 {
476     return kernel(*this, name);
477 }
478 
479 /// \internal_ define get_info() specializations for kernel
480 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
481     ((std::string, CL_KERNEL_FUNCTION_NAME))
482     ((cl_uint, CL_KERNEL_NUM_ARGS))
483     ((cl_uint, CL_KERNEL_REFERENCE_COUNT))
484     ((cl_context, CL_KERNEL_CONTEXT))
485     ((cl_program, CL_KERNEL_PROGRAM))
486 )
487 
488 #ifdef BOOST_COMPUTE_CL_VERSION_1_2
489 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
490     ((std::string, CL_KERNEL_ATTRIBUTES))
491 )
492 #endif // BOOST_COMPUTE_CL_VERSION_1_2
493 
494 /// \internal_ define get_arg_info() specializations for kernel
495 #ifdef BOOST_COMPUTE_CL_VERSION_1_2
496 #define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \
497     namespace detail { \
498         template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \
499     } \
500     template<> inline result_type kernel::get_arg_info<value>(size_t index) const { \
501         return get_arg_info<result_type>(index, value); \
502     }
503 
504 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_address_qualifier, CL_KERNEL_ARG_ADDRESS_QUALIFIER)
505 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_access_qualifier, CL_KERNEL_ARG_ACCESS_QUALIFIER)
506 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_TYPE_NAME)
507 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_QUALIFIER)
508 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_NAME)
509 #endif // BOOST_COMPUTE_CL_VERSION_1_2
510 
511 namespace detail {
512 
513 // set_kernel_arg implementation for built-in types
514 template<class T>
515 struct set_kernel_arg
516 {
517     typename boost::enable_if<is_fundamental<T> >::type
operator ()boost::compute::detail::set_kernel_arg518     operator()(kernel &kernel_, size_t index, const T &value)
519     {
520         kernel_.set_arg(index, sizeof(T), &value);
521     }
522 };
523 
524 // set_kernel_arg specialization for char (different from built-in cl_char)
525 template<>
526 struct set_kernel_arg<char>
527 {
operator ()boost::compute::detail::set_kernel_arg528     void operator()(kernel &kernel_, size_t index, const char c)
529     {
530         kernel_.set_arg(index, sizeof(char), &c);
531     }
532 };
533 
534 } // end detail namespace
535 } // end namespace compute
536 } // end namespace boost
537 
538 #endif // BOOST_COMPUTE_KERNEL_HPP
539