1 // Boost.uBLAS
2 //
3 // Copyright (c) 2018 Fady Essam
4 // Copyright (c) 2018 Stefan Seefeld
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // (See accompanying file LICENSE_1_0.txt or
8 // copy at http://www.boost.org/LICENSE_1_0.txt)
9
10 #ifndef boost_numeric_ublas_opencl_misc_hpp_
11 #define boost_numeric_ublas_opencl_misc_hpp_
12
13 #include <boost/numeric/ublas/opencl/library.hpp>
14 #include <boost/numeric/ublas/opencl/vector.hpp>
15 #include <boost/numeric/ublas/opencl/matrix.hpp>
16
17 namespace boost { namespace numeric { namespace ublas { namespace opencl {
18
19 template <typename T>
20 typename std::enable_if<is_numeric<T>::value, T>::type
a_sum(ublas::vector<T,opencl::storage> const & v,compute::command_queue & queue)21 a_sum(ublas::vector<T, opencl::storage> const &v, compute::command_queue& queue)
22 {
23 compute::vector<T> scratch_buffer(v.size(), queue.get_context());
24 compute::vector<T> result_buffer(1, queue.get_context());
25 cl_event event;
26 if (std::is_same<T, float>::value)
27 clblasSasum(v.size(),
28 result_buffer.begin().get_buffer().get(), //result buffer
29 0, //offset in result buffer
30 v.begin().get_buffer().get(), //input buffer
31 0, //offset in input buffer
32 1, //increment in input buffer
33 scratch_buffer.begin().get_buffer().get(),
34 1, //number of command queues
35 &(queue.get()), //queue
36 0, // number of events waiting list
37 NULL, //event waiting list
38 &event); //event
39 else if (std::is_same<T, double>::value)
40 clblasDasum(v.size(),
41 result_buffer.begin().get_buffer().get(), //result buffer
42 0, //offset in result buffer
43 v.begin().get_buffer().get(), //input buffer
44 0, //offset in input buffer
45 1, //increment in input buffer
46 scratch_buffer.begin().get_buffer().get(),
47 1, //number of command queues
48 &(queue.get()), //queue
49 0, // number of events waiting list
50 NULL, //event waiting list
51 &event); //event
52 else if (std::is_same<T, std::complex<float>>::value)
53 clblasScasum(v.size(),
54 result_buffer.begin().get_buffer().get(), //result buffer
55 0, //offset in result buffer
56 v.begin().get_buffer().get(), //input buffer
57 0, //offset in input buffer
58 1, //increment in input buffer
59 scratch_buffer.begin().get_buffer().get(),
60 1, //number of command queues
61 &(queue.get()), //queue
62 0, // number of events waiting list
63 NULL, //event waiting list
64 &event); //event
65 else if (std::is_same<T, std::complex<double>>::value)
66 clblasDzasum(v.size(),
67 result_buffer.begin().get_buffer().get(), //result buffer
68 0, //offset in result buffer
69 v.begin().get_buffer().get(), //input buffer
70 0, //offset in input buffer
71 1, //increment in input buffer
72 scratch_buffer.begin().get_buffer().get(),
73 1, //number of command queues
74 &(queue.get()), //queue
75 0, // number of events waiting list
76 NULL, //event waiting list
77 &event); //event
78 clWaitForEvents(1, &event);
79 return result_buffer[0];
80 }
81
82 template <typename T, typename A>
83 typename std::enable_if<is_numeric<T>::value, T>::type
a_sum(ublas::vector<T,A> const & v,compute::command_queue & queue)84 a_sum(ublas::vector<T, A> const &v, compute::command_queue& queue)
85 {
86 ublas::vector<T, opencl::storage> vdev(v, queue);
87 return a_sum(vdev, queue);
88 }
89
90 template <typename T>
91 typename std::enable_if<std::is_same<T, float>::value |
92 std::is_same<T, double>::value,
93 T>::type
norm_1(ublas::vector<T,opencl::storage> const & v,compute::command_queue & queue)94 norm_1(ublas::vector<T, opencl::storage> const &v, compute::command_queue& queue)
95 {
96 return a_sum(v, queue);
97 }
98
99 template <typename T, typename A>
100 typename std::enable_if<std::is_same<T, float>::value |
101 std::is_same<T, double>::value,
102 T>::type
norm_1(ublas::vector<T,A> const & v,compute::command_queue & queue)103 norm_1(ublas::vector<T, A> const &v, compute::command_queue& queue)
104 {
105 ublas::vector<T, opencl::storage> vdev(v, queue);
106 return norm_1(vdev, queue);
107 }
108
109 template <typename T>
110 typename std::enable_if<is_numeric<T>::value, T>::type
norm_2(ublas::vector<T,opencl::storage> const & v,compute::command_queue & queue)111 norm_2(ublas::vector<T, opencl::storage> const &v, compute::command_queue& queue)
112 {
113 compute::vector<T> scratch_buffer(2*v.size(), queue.get_context());
114 compute::vector<T> result_buffer(1, queue.get_context());
115 cl_event event;
116 if (std::is_same<T, float>::value)
117 clblasSnrm2(v.size(),
118 result_buffer.begin().get_buffer().get(), //result buffer
119 0, //offset in result buffer
120 v.begin().get_buffer().get(), //input buffer
121 0, //offset in input buffer
122 1, //increment in input buffer
123 scratch_buffer.begin().get_buffer().get(),
124 1, //number of command queues
125 &(queue.get()), //queue
126 0, // number of events waiting list
127 NULL, //event waiting list
128 &event); //event
129 else if (std::is_same<T, double>::value)
130 clblasDnrm2(v.size(),
131 result_buffer.begin().get_buffer().get(), //result buffer
132 0, //offset in result buffer
133 v.begin().get_buffer().get(), //input buffer
134 0, //offset in input buffer
135 1, //increment in input buffer
136 scratch_buffer.begin().get_buffer().get(),
137 1, //number of command queues
138 &(queue.get()), //queue
139 0, // number of events waiting list
140 NULL, //event waiting list
141 &event); //event
142 else if (std::is_same<T, std::complex<float>>::value)
143 clblasScnrm2(v.size(),
144 result_buffer.begin().get_buffer().get(), //result buffer
145 0, //offset in result buffer
146 v.begin().get_buffer().get(), //input buffer
147 0, //offset in input buffer
148 1, //increment in input buffer
149 scratch_buffer.begin().get_buffer().get(),
150 1, //number of command queues
151 &(queue.get()), //queue
152 0, // number of events waiting list
153 NULL, //event waiting list
154 &event); //event
155 else if (std::is_same<T, std::complex<double>>::value)
156 clblasDznrm2(v.size(),
157 result_buffer.begin().get_buffer().get(), //result buffer
158 0, //offset in result buffer
159 v.begin().get_buffer().get(), //input buffer
160 0, //offset in input buffer
161 1, //increment in input buffer
162 scratch_buffer.begin().get_buffer().get(),
163 1, //number of command queues
164 &(queue.get()), //queue
165 0, // number of events waiting list
166 NULL, //event waiting list
167 &event); //event
168 clWaitForEvents(1, &event);
169 return result_buffer[0];
170 }
171
172 template <typename T, typename A>
173 typename std::enable_if<is_numeric<T>::value, T>::type
norm_2(ublas::vector<T,A> const & v,compute::command_queue & queue)174 norm_2(ublas::vector<T, A> const &v, compute::command_queue& queue)
175 {
176 ublas::vector<T, opencl::storage> vdev(v, queue);
177 return norm_2(vdev, queue);
178 }
179
180 }}}}
181
182 #endif
183