• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //---------------------------------------------------------------------------//
2 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3 //
4 // Distributed under the Boost Software License, Version 1.0
5 // See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt
7 //
8 // See http://boostorg.github.com/compute for more information.
9 //---------------------------------------------------------------------------//
10 
11 #ifndef BOOST_COMPUTE_DETAIL_META_KERNEL_HPP
12 #define BOOST_COMPUTE_DETAIL_META_KERNEL_HPP
13 
14 #include <set>
15 #include <string>
16 #include <vector>
17 #include <iomanip>
18 #include <sstream>
19 #include <utility>
20 
21 #include <boost/tuple/tuple.hpp>
22 #include <boost/type_traits.hpp>
23 #include <boost/lexical_cast.hpp>
24 #include <boost/static_assert.hpp>
25 #include <boost/algorithm/string/find.hpp>
26 #include <boost/preprocessor/repetition.hpp>
27 
28 #include <boost/compute/kernel.hpp>
29 #include <boost/compute/closure.hpp>
30 #include <boost/compute/function.hpp>
31 #include <boost/compute/functional.hpp>
32 #include <boost/compute/type_traits.hpp>
33 #include <boost/compute/command_queue.hpp>
34 #include <boost/compute/image/image2d.hpp>
35 #include <boost/compute/image/image_sampler.hpp>
36 #include <boost/compute/memory_object.hpp>
37 #include <boost/compute/memory/svm_ptr.hpp>
38 #include <boost/compute/detail/device_ptr.hpp>
39 #include <boost/compute/detail/sha1.hpp>
40 #include <boost/compute/utility/program_cache.hpp>
41 
42 namespace boost {
43 namespace compute {
44 namespace detail {
45 
46 template<class T>
47 class meta_kernel_variable
48 {
49 public:
50     typedef T result_type;
51 
meta_kernel_variable(const std::string & name)52     meta_kernel_variable(const std::string &name)
53         : m_name(name)
54     {
55     }
56 
meta_kernel_variable(const meta_kernel_variable & other)57     meta_kernel_variable(const meta_kernel_variable &other)
58         : m_name(other.m_name)
59     {
60     }
61 
operator =(const meta_kernel_variable & other)62     meta_kernel_variable& operator=(const meta_kernel_variable &other)
63     {
64         if(this != &other){
65             m_name = other.m_name;
66         }
67 
68         return *this;
69     }
70 
~meta_kernel_variable()71     ~meta_kernel_variable()
72     {
73     }
74 
name() const75     std::string name() const
76     {
77         return m_name;
78     }
79 
80 private:
81     std::string m_name;
82 };
83 
84 template<class T>
85 class meta_kernel_literal
86 {
87 public:
88     typedef T result_type;
89 
meta_kernel_literal(const T & value)90     meta_kernel_literal(const T &value)
91         : m_value(value)
92     {
93     }
94 
meta_kernel_literal(const meta_kernel_literal & other)95     meta_kernel_literal(const meta_kernel_literal &other)
96         : m_value(other.m_value)
97     {
98     }
99 
operator =(const meta_kernel_literal & other)100     meta_kernel_literal& operator=(const meta_kernel_literal &other)
101     {
102         if(this != &other){
103             m_value = other.m_value;
104         }
105 
106         return *this;
107     }
108 
~meta_kernel_literal()109     ~meta_kernel_literal()
110     {
111     }
112 
value() const113     const T& value() const
114     {
115         return m_value;
116     }
117 
118 private:
119     T m_value;
120 };
121 
122 struct meta_kernel_stored_arg
123 {
meta_kernel_stored_argboost::compute::detail::meta_kernel_stored_arg124     meta_kernel_stored_arg()
125         : m_size(0),
126           m_value(0)
127     {
128     }
129 
meta_kernel_stored_argboost::compute::detail::meta_kernel_stored_arg130     meta_kernel_stored_arg(const meta_kernel_stored_arg &other)
131         : m_size(0),
132           m_value(0)
133     {
134         set_value(other.m_size, other.m_value);
135     }
136 
operator =boost::compute::detail::meta_kernel_stored_arg137     meta_kernel_stored_arg& operator=(const meta_kernel_stored_arg &other)
138     {
139         if(this != &other){
140             set_value(other.m_size, other.m_value);
141         }
142 
143         return *this;
144     }
145 
146     template<class T>
meta_kernel_stored_argboost::compute::detail::meta_kernel_stored_arg147     meta_kernel_stored_arg(const T &value)
148         : m_size(0),
149           m_value(0)
150     {
151         set_value(value);
152     }
153 
~meta_kernel_stored_argboost::compute::detail::meta_kernel_stored_arg154     ~meta_kernel_stored_arg()
155     {
156         if(m_value){
157             std::free(m_value);
158         }
159     }
160 
set_valueboost::compute::detail::meta_kernel_stored_arg161     void set_value(size_t size, const void *value)
162     {
163         if(m_value){
164             std::free(m_value);
165         }
166 
167         m_size = size;
168 
169         if(value){
170             m_value = std::malloc(size);
171             std::memcpy(m_value, value, size);
172         }
173         else {
174             m_value = 0;
175         }
176     }
177 
178     template<class T>
set_valueboost::compute::detail::meta_kernel_stored_arg179     void set_value(const T &value)
180     {
181         set_value(sizeof(T), boost::addressof(value));
182     }
183 
184     size_t m_size;
185     void *m_value;
186 };
187 
188 struct meta_kernel_buffer_info
189 {
meta_kernel_buffer_infoboost::compute::detail::meta_kernel_buffer_info190     meta_kernel_buffer_info(const buffer &buffer,
191                             const std::string &id,
192                             memory_object::address_space addr_space,
193                             size_t i)
194       : m_mem(buffer.get()),
195         identifier(id),
196         address_space(addr_space),
197         index(i)
198     {
199     }
200 
201     cl_mem m_mem;
202     std::string identifier;
203     memory_object::address_space address_space;
204     size_t index;
205 };
206 
207 struct meta_kernel_svm_info
208 {
209     template <class T>
meta_kernel_svm_infoboost::compute::detail::meta_kernel_svm_info210     meta_kernel_svm_info(const svm_ptr<T> ptr,
211                          const std::string &id,
212                          memory_object::address_space addr_space,
213                          size_t i)
214       : ptr(ptr.get()),
215         identifier(id),
216         address_space(addr_space),
217         index(i)
218     {
219 
220     }
221 
222     void* ptr;
223     std::string identifier;
224     memory_object::address_space address_space;
225     size_t index;
226 };
227 
228 
229 class meta_kernel;
230 
231 template<class Type>
232 struct inject_type_impl
233 {
operator ()boost::compute::detail::inject_type_impl234     void operator()(meta_kernel &)
235     {
236         // default implementation does nothing
237     }
238 };
239 
240 #define BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(type) \
241     meta_kernel& operator<<(const type &x) \
242     { \
243         m_source << x; \
244         return *this; \
245     }
246 
247 #define BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(type) \
248     meta_kernel& operator<<(const type &x) \
249     { \
250         m_source << "(" << type_name<type>() << ")"; \
251         m_source << "("; \
252         for(size_t i = 0; i < vector_size<type>::value; i++){ \
253             *this << lit(x[i]); \
254             \
255             if(i != vector_size<type>::value - 1){ \
256                 m_source << ","; \
257             } \
258         } \
259         m_source << ")"; \
260         return *this; \
261     }
262 
263 #define BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(type) \
264     BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(type, _)) \
265     BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 2), _)) \
266     BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 4), _)) \
267     BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 8), _)) \
268     BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 16), _))
269 
270 class meta_kernel
271 {
272 public:
273     template<class T>
274     class argument
275     {
276     public:
argument(const std::string & name,size_t index)277         argument(const std::string &name, size_t index)
278             : m_name(name),
279               m_index(index)
280         {
281         }
282 
name() const283         const std::string &name() const
284         {
285             return m_name;
286         }
287 
index() const288         size_t index() const
289         {
290             return m_index;
291         }
292 
293     private:
294         std::string m_name;
295         size_t m_index;
296     };
297 
meta_kernel(const std::string & name)298     explicit meta_kernel(const std::string &name)
299         : m_name(name)
300     {
301     }
302 
meta_kernel(const meta_kernel & other)303     meta_kernel(const meta_kernel &other)
304     {
305         m_source.str(other.m_source.str());
306         m_options = other.m_options;
307     }
308 
operator =(const meta_kernel & other)309     meta_kernel& operator=(const meta_kernel &other)
310     {
311         if(this != &other){
312             m_source.str(other.m_source.str());
313             m_options = other.m_options;
314         }
315 
316         return *this;
317     }
318 
~meta_kernel()319     ~meta_kernel()
320     {
321     }
322 
name() const323     std::string name() const
324     {
325         return m_name;
326     }
327 
source() const328     std::string source() const
329     {
330         std::stringstream stream;
331 
332         // add pragmas
333         if(!m_pragmas.empty()){
334             stream << m_pragmas << "\n";
335         }
336 
337         // add macros
338         stream << "#define boost_pair_type(t1, t2) _pair_ ## t1 ## _ ## t2 ## _t\n";
339         stream << "#define boost_pair_get(x, n) (n == 0 ? x.first ## x.second)\n";
340         stream << "#define boost_make_pair(t1, x, t2, y) (boost_pair_type(t1, t2)) { x, y }\n";
341         stream << "#define boost_tuple_get(x, n) (x.v ## n)\n";
342 
343         // add type declaration source
344         stream << m_type_declaration_source.str() << "\n";
345 
346         // add external function source
347         stream << m_external_function_source.str() << "\n";
348 
349         // add kernel source
350         stream << "__kernel void " << m_name
351                << "(" << boost::join(m_args, ", ") << ")\n"
352                << "{\n" << m_source.str() << "\n}\n";
353 
354         return stream.str();
355     }
356 
compile(const context & context,const std::string & options=std::string ())357     kernel compile(const context &context, const std::string &options = std::string())
358     {
359         // generate the program source
360         std::string source = this->source();
361 
362         // generate cache key
363         std::string cache_key = "__boost_meta_kernel_" +
364             static_cast<std::string>(detail::sha1(source));
365 
366         // load program cache
367         boost::shared_ptr<program_cache> cache =
368             program_cache::get_global_cache(context);
369 
370         std::string compile_options = m_options + options;
371 
372         // load (or build) program from cache
373         ::boost::compute::program program =
374             cache->get_or_build(cache_key, compile_options, source, context);
375 
376         // create kernel
377         ::boost::compute::kernel kernel = program.create_kernel(name());
378 
379         // bind stored args
380         for(size_t i = 0; i < m_stored_args.size(); i++){
381             const detail::meta_kernel_stored_arg &arg = m_stored_args[i];
382 
383             if(arg.m_size != 0){
384                 kernel.set_arg(i, arg.m_size, arg.m_value);
385             }
386         }
387 
388         // bind buffer args
389         for(size_t i = 0; i < m_stored_buffers.size(); i++){
390             const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i];
391 
392             kernel.set_arg(bi.index, bi.m_mem);
393         }
394 
395         // bind svm args
396         for(size_t i = 0; i < m_stored_svm_ptrs.size(); i++){
397             const detail::meta_kernel_svm_info &spi = m_stored_svm_ptrs[i];
398 
399             kernel.set_arg_svm_ptr(spi.index, spi.ptr);
400         }
401 
402         return kernel;
403     }
404 
405     template<class T>
add_arg(const std::string & name)406     size_t add_arg(const std::string &name)
407     {
408         std::stringstream stream;
409         stream << type<T>() << " " << name;
410 
411         // add argument to list
412         m_args.push_back(stream.str());
413 
414         // return index
415         return m_args.size() - 1;
416     }
417 
418     template<class T>
add_arg(memory_object::address_space address_space,const std::string & name)419     size_t add_arg(memory_object::address_space address_space,
420                    const std::string &name)
421     {
422         return add_arg_with_qualifiers<T>(address_space_prefix(address_space), name);
423     }
424 
425     template<class T>
set_arg(size_t index,const T & value)426     void set_arg(size_t index, const T &value)
427     {
428         if(index >= m_stored_args.size()){
429             m_stored_args.resize(index + 1);
430         }
431 
432         m_stored_args[index] = detail::meta_kernel_stored_arg(value);
433     }
434 
set_arg(size_t index,const memory_object & mem)435     void set_arg(size_t index, const memory_object &mem)
436     {
437         set_arg<cl_mem>(index, mem.get());
438     }
439 
set_arg(size_t index,const image_sampler & sampler)440     void set_arg(size_t index, const image_sampler &sampler)
441     {
442         set_arg<cl_sampler>(index, cl_sampler(sampler));
443     }
444 
445     template<class T>
add_set_arg(const std::string & name,const T & value)446     size_t add_set_arg(const std::string &name, const T &value)
447     {
448         size_t index = add_arg<T>(name);
449         set_arg<T>(index, value);
450         return index;
451     }
452 
add_extension_pragma(const std::string & extension,const std::string & value="enable")453     void add_extension_pragma(const std::string &extension,
454                               const std::string &value = "enable")
455     {
456         m_pragmas += "#pragma OPENCL EXTENSION " + extension + " : " + value + "\n";
457     }
458 
add_extension_pragma(const std::string & extension,const std::string & value) const459     void add_extension_pragma(const std::string &extension,
460                               const std::string &value) const
461     {
462         return const_cast<meta_kernel *>(this)->add_extension_pragma(extension, value);
463     }
464 
465     template<class T>
type() const466     std::string type() const
467     {
468         std::stringstream stream;
469 
470         // const qualifier
471         if(boost::is_const<T>::value){
472             stream << "const ";
473         }
474 
475         // volatile qualifier
476         if(boost::is_volatile<T>::value){
477             stream << "volatile ";
478         }
479 
480         // type
481         typedef
482             typename boost::remove_cv<
483                 typename boost::remove_pointer<T>::type
484             >::type Type;
485         stream << type_name<Type>();
486 
487         // pointer
488         if(boost::is_pointer<T>::value){
489             stream << "*";
490         }
491 
492         // inject type pragmas and/or definitions
493         inject_type<Type>();
494 
495         return stream.str();
496     }
497 
498     template<class T>
decl(const std::string & name) const499     std::string decl(const std::string &name) const
500     {
501         return type<T>() + " " + name;
502     }
503 
504     template<class T, class Expr>
decl(const std::string & name,const Expr & init) const505     std::string decl(const std::string &name, const Expr &init) const
506     {
507         meta_kernel tmp((std::string()));
508         tmp << tmp.decl<T>(name) << " = " << init;
509         return tmp.m_source.str();
510     }
511 
512     template<class T>
var(const std::string & name) const513     detail::meta_kernel_variable<T> var(const std::string &name) const
514     {
515         type<T>();
516 
517         return make_var<T>(name);
518     }
519 
520     template<class T>
lit(const T & value) const521     detail::meta_kernel_literal<T> lit(const T &value) const
522     {
523         type<T>();
524 
525         return detail::meta_kernel_literal<T>(value);
526     }
527 
528     template<class T>
expr(const std::string & expr) const529     detail::meta_kernel_variable<T> expr(const std::string &expr) const
530     {
531         type<T>();
532 
533         return detail::meta_kernel_variable<T>(expr);
534     }
535 
536     // define stream operators for scalar and vector types
537     BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(char)
538     BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uchar)
539     BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(short)
540     BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ushort)
541     BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(int)
542     BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uint)
543     BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(long)
544     BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ulong)
545     BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(double)
546 
547     // define stream operators for float scalar and vector types
548     meta_kernel& operator<<(const float &x)
549     {
550         m_source << std::showpoint << x << 'f';
551         return *this;
552     }
553 
554     BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float2_)
555     BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float4_)
556     BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float8_)
557     BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float16_)
558 
559     // define stream operators for variable types
560     template<class T>
561     meta_kernel& operator<<(const meta_kernel_variable<T> &variable)
562     {
563         return *this << variable.name();
564     }
565 
566     // define stream operators for literal types
567     template<class T>
operator <<(const meta_kernel_literal<T> & literal)568     meta_kernel& operator<<(const meta_kernel_literal<T> &literal)
569     {
570         return *this << literal.value();
571     }
572 
operator <<(const meta_kernel_literal<bool> & literal)573     meta_kernel& operator<<(const meta_kernel_literal<bool> &literal)
574     {
575         return *this << (literal.value() ? "true" : "false");
576     }
577 
operator <<(const meta_kernel_literal<char> & literal)578     meta_kernel& operator<<(const meta_kernel_literal<char> &literal)
579     {
580         const char c = literal.value();
581 
582         switch(c){
583         // control characters
584         case '\0':
585             return *this << "'\\0'";
586         case '\a':
587             return *this << "'\\a'";
588         case '\b':
589             return *this << "'\\b'";
590         case '\t':
591             return *this << "'\\t'";
592         case '\n':
593             return *this << "'\\n'";
594         case '\v':
595             return *this << "'\\v'";
596         case '\f':
597             return *this << "'\\f'";
598         case '\r':
599             return *this << "'\\r'";
600 
601         // characters which need escaping
602         case '\"':
603         case '\'':
604         case '\?':
605         case '\\':
606             return *this << "'\\" << c << "'";
607 
608         // all other characters
609         default:
610             return *this << "'" << c << "'";
611         }
612     }
613 
operator <<(const meta_kernel_literal<signed char> & literal)614     meta_kernel& operator<<(const meta_kernel_literal<signed char> &literal)
615     {
616         return *this << lit<char>(literal.value());
617     }
618 
operator <<(const meta_kernel_literal<unsigned char> & literal)619     meta_kernel& operator<<(const meta_kernel_literal<unsigned char> &literal)
620     {
621         return *this << uint_(literal.value());
622     }
623 
624     // define stream operators for strings
operator <<(char ch)625     meta_kernel& operator<<(char ch)
626     {
627         m_source << ch;
628         return *this;
629     }
630 
operator <<(const char * string)631     meta_kernel& operator<<(const char *string)
632     {
633         m_source << string;
634         return *this;
635     }
636 
operator <<(const std::string & string)637     meta_kernel& operator<<(const std::string &string)
638     {
639         m_source << string;
640         return *this;
641     }
642 
643     template<class T>
make_var(const std::string & name)644     static detail::meta_kernel_variable<T> make_var(const std::string &name)
645     {
646         return detail::meta_kernel_variable<T>(name);
647     }
648 
649     template<class T>
make_lit(const T & value)650     static detail::meta_kernel_literal<T> make_lit(const T &value)
651     {
652         return detail::meta_kernel_literal<T>(value);
653     }
654 
655     template<class T>
make_expr(const std::string & expr)656     static detail::meta_kernel_variable<T> make_expr(const std::string &expr)
657     {
658         return detail::meta_kernel_variable<T>(expr);
659     }
660 
exec(command_queue & queue)661     event exec(command_queue &queue)
662     {
663         return exec_1d(queue, 0, 1);
664     }
665 
exec_1d(command_queue & queue,size_t global_work_offset,size_t global_work_size,const wait_list & events=wait_list ())666     event exec_1d(command_queue &queue,
667                   size_t global_work_offset,
668                   size_t global_work_size,
669                   const wait_list &events = wait_list())
670     {
671         const context &context = queue.get_context();
672 
673         ::boost::compute::kernel kernel = compile(context);
674 
675         return queue.enqueue_1d_range_kernel(
676                    kernel,
677                    global_work_offset,
678                    global_work_size,
679                    0,
680                    events
681                );
682     }
683 
exec_1d(command_queue & queue,size_t global_work_offset,size_t global_work_size,size_t local_work_size,const wait_list & events=wait_list ())684     event exec_1d(command_queue &queue,
685                  size_t global_work_offset,
686                  size_t global_work_size,
687                  size_t local_work_size,
688                  const wait_list &events = wait_list())
689     {
690         const context &context = queue.get_context();
691 
692         ::boost::compute::kernel kernel = compile(context);
693 
694         return queue.enqueue_1d_range_kernel(
695                    kernel,
696                    global_work_offset,
697                    global_work_size,
698                    local_work_size,
699                    events
700                );
701     }
702 
703     template<class T>
get_buffer_identifier(const buffer & buffer,const memory_object::address_space address_space=memory_object::global_memory)704     std::string get_buffer_identifier(const buffer &buffer,
705                                       const memory_object::address_space address_space =
706                                           memory_object::global_memory)
707     {
708         // check if we've already seen buffer
709         for(size_t i = 0; i < m_stored_buffers.size(); i++){
710             const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i];
711 
712             if(bi.m_mem == buffer.get() &&
713                bi.address_space == address_space){
714                 return bi.identifier;
715             }
716         }
717 
718         // create a new binding
719         std::string identifier =
720             "_buf" + lexical_cast<std::string>(m_stored_buffers.size());
721         size_t index = add_arg<T *>(address_space, identifier);
722 
723         // store new buffer info
724         m_stored_buffers.push_back(
725             detail::meta_kernel_buffer_info(buffer, identifier, address_space, index));
726 
727         return identifier;
728     }
729 
730     template<class T>
get_svm_identifier(const svm_ptr<T> & svm_ptr,const memory_object::address_space address_space=memory_object::global_memory)731     std::string get_svm_identifier(const svm_ptr<T> &svm_ptr,
732                                    const memory_object::address_space address_space =
733                                        memory_object::global_memory)
734     {
735         BOOST_ASSERT(
736             (address_space == memory_object::global_memory)
737                 || (address_space == memory_object::constant_memory)
738         );
739 
740         // check if we've already seen this pointer
741         for(size_t i = 0; i < m_stored_svm_ptrs.size(); i++){
742             const detail::meta_kernel_svm_info &spi = m_stored_svm_ptrs[i];
743 
744             if(spi.ptr == svm_ptr.get() &&
745                spi.address_space == address_space){
746                 return spi.identifier;
747             }
748         }
749 
750         // create a new binding
751         std::string identifier =
752             "_svm_ptr" + lexical_cast<std::string>(m_stored_svm_ptrs.size());
753         size_t index = add_arg<T *>(address_space, identifier);
754 
755         if(m_stored_svm_ptrs.empty()) {
756             m_options += std::string(" -cl-std=CL2.0");
757         }
758 
759         // store new svm pointer info
760         m_stored_svm_ptrs.push_back(
761             detail::meta_kernel_svm_info(
762                 svm_ptr, identifier, address_space, index
763             )
764         );
765 
766         return identifier;
767     }
768 
get_image_identifier(const char * qualifiers,const image2d & image)769     std::string get_image_identifier(const char *qualifiers, const image2d &image)
770     {
771         size_t index = add_arg_with_qualifiers<image2d>(qualifiers, "image");
772 
773         set_arg(index, image);
774 
775         return "image";
776     }
777 
get_sampler_identifier(bool normalized_coords,cl_addressing_mode addressing_mode,cl_filter_mode filter_mode)778     std::string get_sampler_identifier(bool normalized_coords,
779                                        cl_addressing_mode addressing_mode,
780                                        cl_filter_mode filter_mode)
781     {
782         (void) normalized_coords;
783         (void) addressing_mode;
784         (void) filter_mode;
785 
786         m_pragmas += "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n"
787                      "                          CLK_ADDRESS_NONE |\n"
788                      "                          CLK_FILTER_NEAREST;\n";
789 
790         return "sampler";
791     }
792 
793     template<class Expr>
expr_to_string(const Expr & expr)794     static std::string expr_to_string(const Expr &expr)
795     {
796         meta_kernel tmp((std::string()));
797         tmp << expr;
798         return tmp.m_source.str();
799     }
800 
801     template<class Predicate>
if_(Predicate pred) const802     detail::invoked_function<bool, boost::tuple<Predicate> > if_(Predicate pred) const
803     {
804         return detail::invoked_function<bool, boost::tuple<Predicate> >(
805             "if", std::string(), boost::make_tuple(pred)
806         );
807     }
808 
809     template<class Predicate>
else_if_(Predicate pred) const810     detail::invoked_function<bool, boost::tuple<Predicate> > else_if_(Predicate pred) const
811     {
812         return detail::invoked_function<bool, boost::tuple<Predicate> >(
813             "else if", std::string(), boost::make_tuple(pred)
814         );
815     }
816 
get_global_id(size_t dim) const817     detail::meta_kernel_variable<cl_uint> get_global_id(size_t dim) const
818     {
819         return expr<cl_uint>("get_global_id(" + lexical_cast<std::string>(dim) + ")");
820     }
821 
add_function(const std::string & name,const std::string & source)822     void add_function(const std::string &name, const std::string &source)
823     {
824         if(m_external_function_names.count(name)){
825             return;
826         }
827 
828         m_external_function_names.insert(name);
829         m_external_function_source << source << "\n";
830     }
831 
add_function(const std::string & name,const std::string & source,const std::map<std::string,std::string> & definitions)832     void add_function(const std::string &name,
833                       const std::string &source,
834                       const std::map<std::string, std::string> &definitions)
835     {
836         typedef std::map<std::string, std::string>::const_iterator iter;
837 
838         std::stringstream s;
839 
840         // add #define's
841         for(iter i = definitions.begin(); i != definitions.end(); i++){
842             s << "#define " << i->first;
843             if(!i->second.empty()){
844                 s << " " << i->second;
845             }
846             s << "\n";
847         }
848 
849         s << source << "\n";
850 
851         // add #undef's
852         for(iter i = definitions.begin(); i != definitions.end(); i++){
853             s << "#undef " << i->first << "\n";
854         }
855 
856         add_function(name, s.str());
857     }
858 
859     template<class Type>
add_type_declaration(const std::string & declaration)860     void add_type_declaration(const std::string &declaration)
861     {
862         const char *name = type_name<Type>();
863 
864         // check if the type has already been declared
865         std::string source = m_type_declaration_source.str();
866         if(source.find(name) != std::string::npos){
867             return;
868         }
869 
870         m_type_declaration_source << declaration;
871     }
872 
873     template<class Type>
inject_type() const874     void inject_type() const
875     {
876         inject_type_impl<Type>()(const_cast<meta_kernel &>(*this));
877     }
878 
879     // the insert_function_call() method inserts a call to a function with
880     // the given name tuple of argument values.
881     template<class ArgTuple>
insert_function_call(const std::string & name,const ArgTuple & args)882     void insert_function_call(const std::string &name, const ArgTuple &args)
883     {
884         *this << name << '(';
885         insert_function_call_args(args);
886         *this << ')';
887     }
888 
889     // the insert_function_call_args() method takes a tuple of argument values
890     // and inserts them into the source string with a comma in-between each.
891     // this is useful for creating function calls given a tuple of values.
insert_function_call_args(const boost::tuple<> &)892     void insert_function_call_args(const boost::tuple<>&)
893     {
894     }
895 
896     #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE(z, n, unused) \
897         inject_type<BOOST_PP_CAT(T, n)>();
898 
899     #define BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG(z, n, unused) \
900         << boost::get<BOOST_PP_DEC(n)>(args) << ", "
901 
902     #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS(z, n, unused) \
903         template<BOOST_PP_ENUM_PARAMS(n, class T)> \
904         void insert_function_call_args( \
905             const boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> &args \
906         ) \
907         { \
908             BOOST_PP_REPEAT_FROM_TO( \
909                 0, n, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE, ~ \
910             ) \
911             *this \
912                 BOOST_PP_REPEAT_FROM_TO( \
913                     1, n, BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG, ~ \
914                 ) \
915                 << boost::get<BOOST_PP_DEC(n)>(args); \
916         }
917 
918     BOOST_PP_REPEAT_FROM_TO(
919         1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS, ~
920     )
921 
922     #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE
923     #undef BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG
924     #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS
925 
address_space_prefix(const memory_object::address_space value)926     static const char* address_space_prefix(const memory_object::address_space value)
927     {
928         switch(value){
929             case memory_object::global_memory: return "__global";
930             case memory_object::local_memory: return "__local";
931             case memory_object::private_memory: return "__private";
932             case memory_object::constant_memory: return "__constant";
933         };
934 
935         return 0; // unreachable
936     }
937 
938 private:
939     template<class T>
add_arg_with_qualifiers(const char * qualifiers,const std::string & name)940     size_t add_arg_with_qualifiers(const char *qualifiers, const std::string &name)
941     {
942         size_t index = add_arg<T>(name);
943 
944         // update argument type declaration with qualifiers
945         std::stringstream s;
946         s << qualifiers << " " << m_args[index];
947         m_args[index] = s.str();
948 
949         return index;
950     }
951 
952 private:
953     std::string m_name;
954     std::stringstream m_source;
955     std::stringstream m_external_function_source;
956     std::stringstream m_type_declaration_source;
957     std::set<std::string> m_external_function_names;
958     std::vector<std::string> m_args;
959     std::string m_pragmas;
960     std::string m_options;
961     std::vector<detail::meta_kernel_stored_arg> m_stored_args;
962     std::vector<detail::meta_kernel_buffer_info> m_stored_buffers;
963     std::vector<detail::meta_kernel_svm_info> m_stored_svm_ptrs;
964 };
965 
966 template<class ResultType, class ArgTuple>
967 inline meta_kernel&
operator <<(meta_kernel & kernel,const invoked_function<ResultType,ArgTuple> & expr)968 operator<<(meta_kernel &kernel, const invoked_function<ResultType, ArgTuple> &expr)
969 {
970     if(!expr.source().empty()){
971         kernel.add_function(expr.name(), expr.source(), expr.definitions());
972     }
973 
974     kernel.insert_function_call(expr.name(), expr.args());
975 
976     return kernel;
977 }
978 
979 template<class ResultType, class ArgTuple, class CaptureTuple>
980 inline meta_kernel&
operator <<(meta_kernel & kernel,const invoked_closure<ResultType,ArgTuple,CaptureTuple> & expr)981 operator<<(meta_kernel &kernel,
982            const invoked_closure<ResultType, ArgTuple, CaptureTuple> &expr)
983 {
984     if(!expr.source().empty()){
985         kernel.add_function(expr.name(), expr.source(), expr.definitions());
986     }
987 
988     kernel << expr.name() << '(';
989     kernel.insert_function_call_args(expr.args());
990     kernel << ", ";
991     kernel.insert_function_call_args(expr.capture());
992     kernel << ')';
993 
994     return kernel;
995 }
996 
997 template<class Arg1, class Arg2, class Result>
operator <<(meta_kernel & kernel,const invoked_binary_operator<Arg1,Arg2,Result> & expr)998 inline meta_kernel& operator<<(meta_kernel &kernel,
999                                const invoked_binary_operator<Arg1,
1000                                                              Arg2,
1001                                                              Result> &expr)
1002 {
1003     return kernel << "((" << expr.arg1() << ")"
1004                   << expr.op()
1005                   << "(" << expr.arg2() << "))";
1006 }
1007 
1008 template<class T, class IndexExpr>
operator <<(meta_kernel & kernel,const detail::device_ptr_index_expr<T,IndexExpr> & expr)1009 inline meta_kernel& operator<<(meta_kernel &kernel,
1010                                const detail::device_ptr_index_expr<T, IndexExpr> &expr)
1011 {
1012     if(expr.m_index == 0){
1013         return kernel <<
1014                    kernel.get_buffer_identifier<T>(expr.m_buffer) <<
1015                    '[' << expr.m_expr << ']';
1016     }
1017     else {
1018         return kernel <<
1019                    kernel.get_buffer_identifier<T>(expr.m_buffer) <<
1020                    '[' << expr.m_index << "+(" << expr.m_expr << ")]";
1021     }
1022 }
1023 
1024 template<class T1, class T2, class IndexExpr>
operator <<(meta_kernel & kernel,const detail::device_ptr_index_expr<std::pair<T1,T2>,IndexExpr> & expr)1025 inline meta_kernel& operator<<(meta_kernel &kernel,
1026                                const detail::device_ptr_index_expr<std::pair<T1, T2>, IndexExpr> &expr)
1027 {
1028     typedef std::pair<T1, T2> T;
1029 
1030     if(expr.m_index == 0){
1031         return kernel <<
1032                    kernel.get_buffer_identifier<T>(expr.m_buffer) <<
1033                    '[' << expr.m_expr << ']';
1034     }
1035     else {
1036         return kernel <<
1037                    kernel.get_buffer_identifier<T>(expr.m_buffer) <<
1038                    '[' << expr.m_index << "+(" << expr.m_expr << ")]";
1039     }
1040 }
1041 
1042 // SVM requires OpenCL 2.0
1043 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
1044 template<class T, class IndexExpr>
operator <<(meta_kernel & kernel,const svm_ptr_index_expr<T,IndexExpr> & expr)1045 inline meta_kernel& operator<<(meta_kernel &kernel,
1046                                const svm_ptr_index_expr<T, IndexExpr> &expr)
1047 {
1048     return kernel <<
1049         kernel.get_svm_identifier<T>(expr.m_svm_ptr) <<
1050         '[' << expr.m_expr << ']';
1051 }
1052 #endif
1053 
1054 template<class Predicate, class Arg>
operator <<(meta_kernel & kernel,const invoked_unary_negate_function<Predicate,Arg> & expr)1055 inline meta_kernel& operator<<(meta_kernel &kernel,
1056                                const invoked_unary_negate_function<Predicate,
1057                                                                    Arg> &expr)
1058 {
1059     return kernel << "!(" << expr.pred()(expr.expr()) << ')';
1060 }
1061 
1062 template<class Predicate, class Arg1, class Arg2>
operator <<(meta_kernel & kernel,const invoked_binary_negate_function<Predicate,Arg1,Arg2> & expr)1063 inline meta_kernel& operator<<(meta_kernel &kernel,
1064                                const invoked_binary_negate_function<Predicate,
1065                                                                     Arg1,
1066                                                                     Arg2> &expr)
1067 {
1068     return kernel << "!(" << expr.pred()(expr.expr1(), expr.expr2()) << ')';
1069 }
1070 
1071 // get<N>() for vector types
1072 template<size_t N, class Arg, class T>
operator <<(meta_kernel & kernel,const invoked_get<N,Arg,T> & expr)1073 inline meta_kernel& operator<<(meta_kernel &kernel,
1074                                const invoked_get<N, Arg, T> &expr)
1075 {
1076     BOOST_STATIC_ASSERT(N < 16);
1077 
1078     if(N < 10){
1079         return kernel << expr.m_arg << ".s" << int_(N);
1080     }
1081     else if(N < 16){
1082 #ifdef _MSC_VER
1083 #  pragma warning(push)
1084 #  pragma warning(disable: 4307)
1085 #endif
1086         return kernel << expr.m_arg << ".s" << char('a' + (N - 10));
1087 #ifdef _MSC_VER
1088 #  pragma warning(pop)
1089 #endif
1090     }
1091 
1092     return kernel;
1093 }
1094 
1095 template<class T, class Arg>
operator <<(meta_kernel & kernel,const invoked_field<T,Arg> & expr)1096 inline meta_kernel& operator<<(meta_kernel &kernel,
1097                                const invoked_field<T, Arg> &expr)
1098 {
1099     return kernel << expr.m_arg << "." << expr.m_field;
1100 }
1101 
1102 template<class T, class Arg>
operator <<(meta_kernel & k,const invoked_as<T,Arg> & expr)1103 inline meta_kernel& operator<<(meta_kernel &k,
1104                                const invoked_as<T, Arg> &expr)
1105 {
1106     return k << "as_" << type_name<T>() << "(" << expr.m_arg << ")";
1107 }
1108 
1109 template<class T, class Arg>
operator <<(meta_kernel & k,const invoked_convert<T,Arg> & expr)1110 inline meta_kernel& operator<<(meta_kernel &k,
1111                                const invoked_convert<T, Arg> &expr)
1112 {
1113     return k << "convert_" << type_name<T>() << "(" << expr.m_arg << ")";
1114 }
1115 
1116 template<class T, class Arg>
operator <<(meta_kernel & k,const invoked_identity<T,Arg> & expr)1117 inline meta_kernel& operator<<(meta_kernel &k,
1118                                const invoked_identity<T, Arg> &expr)
1119 {
1120     return k << expr.m_arg;
1121 }
1122 
1123 template<>
1124 struct inject_type_impl<double_>
1125 {
operator ()boost::compute::detail::inject_type_impl1126     void operator()(meta_kernel &kernel)
1127     {
1128         kernel.add_extension_pragma("cl_khr_fp64", "enable");
1129     }
1130 };
1131 
1132 template<class Scalar, size_t N>
1133 struct inject_type_impl<vector_type<Scalar, N> >
1134 {
operator ()boost::compute::detail::inject_type_impl1135     void operator()(meta_kernel &kernel)
1136     {
1137         kernel.inject_type<Scalar>();
1138     }
1139 };
1140 
1141 } // end detail namespace
1142 } // end compute namespace
1143 } // end boost namespace
1144 
1145 #endif // BOOST_COMPUTE_DETAIL_META_KERNEL_HPP
1146