• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli    Codeplay Software Ltd.
6 // Ralph Potter  Codeplay Software Ltd.
7 // Luke Iwanski  Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13 
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16 
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19 
20 #include "main.h"
21 #include <unsupported/Eigen/CXX11/Tensor>
22 
23 using Eigen::array;
24 using Eigen::SyclDevice;
25 using Eigen::Tensor;
26 using Eigen::TensorMap;
27 
28 // Functions used to compare the TensorMap implementation on the device with
29 // the equivalent on the host
30 namespace cl {
31 namespace sycl {
abs(T x)32 template <typename T> T abs(T x) { return cl::sycl::fabs(x); }
square(T x)33 template <typename T> T square(T x) { return x * x; }
cube(T x)34 template <typename T> T cube(T x) { return x * x * x; }
inverse(T x)35 template <typename T> T inverse(T x) { return T(1) / x; }
cwiseMax(T x,T y)36 template <typename T> T cwiseMax(T x, T y) { return cl::sycl::max(x, y); }
cwiseMin(T x,T y)37 template <typename T> T cwiseMin(T x, T y) { return cl::sycl::min(x, y); }
38 }
39 }
40 
41 struct EqualAssignement {
42   template <typename Lhs, typename Rhs>
operator ()EqualAssignement43   void operator()(Lhs& lhs, const Rhs& rhs) { lhs = rhs; }
44 };
45 
46 struct PlusEqualAssignement {
47   template <typename Lhs, typename Rhs>
operator ()PlusEqualAssignement48   void operator()(Lhs& lhs, const Rhs& rhs) { lhs += rhs; }
49 };
50 
51 template <typename DataType, int DataLayout,
52           typename Assignement, typename Operator>
test_unary_builtins_for_scalar(const Eigen::SyclDevice & sycl_device,const array<int64_t,3> & tensor_range)53 void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
54                                     const array<int64_t, 3>& tensor_range) {
55   Operator op;
56   Assignement asgn;
57   {
58     /* Assignement(out, Operator(in)) */
59     Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
60     Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
61     in = in.random() + DataType(0.01);
62     out = out.random() + DataType(0.01);
63     Tensor<DataType, 3, DataLayout, int64_t> reference(out);
64     DataType *gpu_data = static_cast<DataType *>(
65         sycl_device.allocate(in.size() * sizeof(DataType)));
66     DataType *gpu_data_out = static_cast<DataType *>(
67         sycl_device.allocate(out.size() * sizeof(DataType)));
68     TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu(gpu_data, tensor_range);
69     TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
70     sycl_device.memcpyHostToDevice(gpu_data, in.data(),
71                                    (in.size()) * sizeof(DataType));
72     sycl_device.memcpyHostToDevice(gpu_data_out, out.data(),
73                                    (out.size()) * sizeof(DataType));
74     auto device_expr = gpu_out.device(sycl_device);
75     asgn(device_expr, op(gpu));
76     sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
77                                    (out.size()) * sizeof(DataType));
78     for (int64_t i = 0; i < out.size(); ++i) {
79       DataType ver = reference(i);
80       asgn(ver, op(in(i)));
81       VERIFY_IS_APPROX(out(i), ver);
82     }
83     sycl_device.deallocate(gpu_data);
84     sycl_device.deallocate(gpu_data_out);
85   }
86   {
87     /* Assignement(out, Operator(out)) */
88     Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
89     out = out.random() + DataType(0.01);
90     Tensor<DataType, 3, DataLayout, int64_t> reference(out);
91     DataType *gpu_data_out = static_cast<DataType *>(
92         sycl_device.allocate(out.size() * sizeof(DataType)));
93     TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
94     sycl_device.memcpyHostToDevice(gpu_data_out, out.data(),
95                                    (out.size()) * sizeof(DataType));
96     auto device_expr = gpu_out.device(sycl_device);
97     asgn(device_expr, op(gpu_out));
98     sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
99                                    (out.size()) * sizeof(DataType));
100     for (int64_t i = 0; i < out.size(); ++i) {
101       DataType ver = reference(i);
102       asgn(ver, op(reference(i)));
103       VERIFY_IS_APPROX(out(i), ver);
104     }
105     sycl_device.deallocate(gpu_data_out);
106   }
107 }
108 
109 #define DECLARE_UNARY_STRUCT(FUNC)                                 \
110   struct op_##FUNC {                                               \
111     template <typename T>                                          \
112     auto operator()(const T& x) -> decltype(cl::sycl::FUNC(x)) {   \
113       return cl::sycl::FUNC(x);                                    \
114     }                                                              \
115     template <typename T>                                          \
116     auto operator()(const TensorMap<T>& x) -> decltype(x.FUNC()) { \
117       return x.FUNC();                                             \
118     }                                                              \
119   };
120 
121 DECLARE_UNARY_STRUCT(abs)
DECLARE_UNARY_STRUCT(sqrt)122 DECLARE_UNARY_STRUCT(sqrt)
123 DECLARE_UNARY_STRUCT(rsqrt)
124 DECLARE_UNARY_STRUCT(square)
125 DECLARE_UNARY_STRUCT(cube)
126 DECLARE_UNARY_STRUCT(inverse)
127 DECLARE_UNARY_STRUCT(tanh)
128 DECLARE_UNARY_STRUCT(exp)
129 DECLARE_UNARY_STRUCT(expm1)
130 DECLARE_UNARY_STRUCT(log)
131 DECLARE_UNARY_STRUCT(ceil)
132 DECLARE_UNARY_STRUCT(floor)
133 DECLARE_UNARY_STRUCT(round)
134 DECLARE_UNARY_STRUCT(log1p)
135 DECLARE_UNARY_STRUCT(sign)
136 DECLARE_UNARY_STRUCT(isnan)
137 DECLARE_UNARY_STRUCT(isfinite)
138 DECLARE_UNARY_STRUCT(isinf)
139 
140 template <typename DataType, int DataLayout, typename Assignement>
141 void test_unary_builtins_for_assignement(const Eigen::SyclDevice& sycl_device,
142                                          const array<int64_t, 3>& tensor_range) {
143 #define RUN_UNARY_TEST(FUNC) \
144   test_unary_builtins_for_scalar<DataType, DataLayout, Assignement, \
145                                  op_##FUNC>(sycl_device, tensor_range)
146   RUN_UNARY_TEST(abs);
147   RUN_UNARY_TEST(sqrt);
148   RUN_UNARY_TEST(rsqrt);
149   RUN_UNARY_TEST(square);
150   RUN_UNARY_TEST(cube);
151   RUN_UNARY_TEST(inverse);
152   RUN_UNARY_TEST(tanh);
153   RUN_UNARY_TEST(exp);
154   RUN_UNARY_TEST(expm1);
155   RUN_UNARY_TEST(log);
156   RUN_UNARY_TEST(ceil);
157   RUN_UNARY_TEST(floor);
158   RUN_UNARY_TEST(round);
159   RUN_UNARY_TEST(log1p);
160   RUN_UNARY_TEST(sign);
161 }
162 
163 template <typename DataType, int DataLayout, typename Operator>
test_unary_builtins_return_bool(const Eigen::SyclDevice & sycl_device,const array<int64_t,3> & tensor_range)164 void test_unary_builtins_return_bool(const Eigen::SyclDevice& sycl_device,
165                                      const array<int64_t, 3>& tensor_range) {
166   /* out = op(in) */
167   Operator op;
168   Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
169   Tensor<bool, 3, DataLayout, int64_t> out(tensor_range);
170   in = in.random() + DataType(0.01);
171   DataType *gpu_data = static_cast<DataType *>(
172       sycl_device.allocate(in.size() * sizeof(DataType)));
173   bool *gpu_data_out =
174       static_cast<bool *>(sycl_device.allocate(out.size() * sizeof(bool)));
175   TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu(gpu_data, tensor_range);
176   TensorMap<Tensor<bool, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
177   sycl_device.memcpyHostToDevice(gpu_data, in.data(),
178                                  (in.size()) * sizeof(DataType));
179   gpu_out.device(sycl_device) = op(gpu);
180   sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
181                                  (out.size()) * sizeof(bool));
182   for (int64_t i = 0; i < out.size(); ++i) {
183     VERIFY_IS_EQUAL(out(i), op(in(i)));
184   }
185   sycl_device.deallocate(gpu_data);
186   sycl_device.deallocate(gpu_data_out);
187 }
188 
189 template <typename DataType, int DataLayout>
test_unary_builtins(const Eigen::SyclDevice & sycl_device,const array<int64_t,3> & tensor_range)190 void test_unary_builtins(const Eigen::SyclDevice& sycl_device,
191                          const array<int64_t, 3>& tensor_range) {
192   test_unary_builtins_for_assignement<DataType, DataLayout,
193                                       PlusEqualAssignement>(sycl_device, tensor_range);
194   test_unary_builtins_for_assignement<DataType, DataLayout,
195                                       EqualAssignement>(sycl_device, tensor_range);
196   test_unary_builtins_return_bool<DataType, DataLayout,
197                                   op_isnan>(sycl_device, tensor_range);
198   test_unary_builtins_return_bool<DataType, DataLayout,
199                                   op_isfinite>(sycl_device, tensor_range);
200   test_unary_builtins_return_bool<DataType, DataLayout,
201                                   op_isinf>(sycl_device, tensor_range);
202 }
203 
204 template <typename DataType>
test_builtin_unary_sycl(const Eigen::SyclDevice & sycl_device)205 static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) {
206   int64_t sizeDim1 = 10;
207   int64_t sizeDim2 = 10;
208   int64_t sizeDim3 = 10;
209   array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
210 
211   test_unary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
212   test_unary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
213 }
214 
215 template <typename DataType, int DataLayout, typename Operator>
test_binary_builtins_func(const Eigen::SyclDevice & sycl_device,const array<int64_t,3> & tensor_range)216 void test_binary_builtins_func(const Eigen::SyclDevice& sycl_device,
217                                const array<int64_t, 3>& tensor_range) {
218   /* out = op(in_1, in_2) */
219   Operator op;
220   Tensor<DataType, 3, DataLayout, int64_t> in_1(tensor_range);
221   Tensor<DataType, 3, DataLayout, int64_t> in_2(tensor_range);
222   Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
223   in_1 = in_1.random() + DataType(0.01);
224   in_2 = in_2.random() + DataType(0.01);
225   Tensor<DataType, 3, DataLayout, int64_t> reference(out);
226   DataType *gpu_data_1 = static_cast<DataType *>(
227       sycl_device.allocate(in_1.size() * sizeof(DataType)));
228   DataType *gpu_data_2 = static_cast<DataType *>(
229       sycl_device.allocate(in_2.size() * sizeof(DataType)));
230   DataType *gpu_data_out = static_cast<DataType *>(
231       sycl_device.allocate(out.size() * sizeof(DataType)));
232   TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_1(gpu_data_1, tensor_range);
233   TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_2(gpu_data_2, tensor_range);
234   TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
235   sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),
236                                  (in_1.size()) * sizeof(DataType));
237   sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(),
238                                  (in_2.size()) * sizeof(DataType));
239   gpu_out.device(sycl_device) = op(gpu_1, gpu_2);
240   sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
241                                  (out.size()) * sizeof(DataType));
242   for (int64_t i = 0; i < out.size(); ++i) {
243     VERIFY_IS_APPROX(out(i), op(in_1(i), in_2(i)));
244   }
245   sycl_device.deallocate(gpu_data_1);
246   sycl_device.deallocate(gpu_data_2);
247   sycl_device.deallocate(gpu_data_out);
248 }
249 
250 template <typename DataType, int DataLayout, typename Operator>
test_binary_builtins_fixed_arg2(const Eigen::SyclDevice & sycl_device,const array<int64_t,3> & tensor_range)251 void test_binary_builtins_fixed_arg2(const Eigen::SyclDevice& sycl_device,
252                                      const array<int64_t, 3>& tensor_range) {
253   /* out = op(in_1, 2) */
254   Operator op;
255   const DataType arg2(2);
256   Tensor<DataType, 3, DataLayout, int64_t> in_1(tensor_range);
257   Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
258   in_1 = in_1.random();
259   Tensor<DataType, 3, DataLayout, int64_t> reference(out);
260   DataType *gpu_data_1 = static_cast<DataType *>(
261       sycl_device.allocate(in_1.size() * sizeof(DataType)));
262   DataType *gpu_data_out = static_cast<DataType *>(
263       sycl_device.allocate(out.size() * sizeof(DataType)));
264   TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_1(gpu_data_1, tensor_range);
265   TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
266   sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),
267                                  (in_1.size()) * sizeof(DataType));
268   gpu_out.device(sycl_device) = op(gpu_1, arg2);
269   sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
270                                  (out.size()) * sizeof(DataType));
271   for (int64_t i = 0; i < out.size(); ++i) {
272     VERIFY_IS_APPROX(out(i), op(in_1(i), arg2));
273   }
274   sycl_device.deallocate(gpu_data_1);
275   sycl_device.deallocate(gpu_data_out);
276 }
277 
278 #define DECLARE_BINARY_STRUCT(FUNC)                                                          \
279   struct op_##FUNC {                                                                         \
280     template <typename T1, typename T2>                                                      \
281     auto operator()(const T1& x, const T2& y) -> decltype(cl::sycl::FUNC(x, y)) {            \
282       return cl::sycl::FUNC(x, y);                                                           \
283     }                                                                                        \
284     template <typename T1, typename T2>                                                      \
285     auto operator()(const TensorMap<T1>& x, const TensorMap<T2>& y) -> decltype(x.FUNC(y)) { \
286       return x.FUNC(y);                                                                      \
287     }                                                                                        \
288   };
289 
290 DECLARE_BINARY_STRUCT(cwiseMax)
DECLARE_BINARY_STRUCT(cwiseMin)291 DECLARE_BINARY_STRUCT(cwiseMin)
292 
293 #define DECLARE_BINARY_STRUCT_OP(NAME, OPERATOR)                          \
294   struct op_##NAME {                                                      \
295     template <typename T1, typename T2>                                   \
296     auto operator()(const T1& x, const T2& y) -> decltype(x OPERATOR y) { \
297       return x OPERATOR y;                                                \
298     }                                                                     \
299   };
300 
301 DECLARE_BINARY_STRUCT_OP(plus, +)
302 DECLARE_BINARY_STRUCT_OP(minus, -)
303 DECLARE_BINARY_STRUCT_OP(times, *)
304 DECLARE_BINARY_STRUCT_OP(divide, /)
305 DECLARE_BINARY_STRUCT_OP(modulo, %)
306 
307 template <typename DataType, int DataLayout>
308 void test_binary_builtins(const Eigen::SyclDevice& sycl_device,
309                           const array<int64_t, 3>& tensor_range) {
310   test_binary_builtins_func<DataType, DataLayout,
311                             op_cwiseMax>(sycl_device, tensor_range);
312   test_binary_builtins_func<DataType, DataLayout,
313                             op_cwiseMin>(sycl_device, tensor_range);
314   test_binary_builtins_func<DataType, DataLayout,
315                             op_plus>(sycl_device, tensor_range);
316   test_binary_builtins_func<DataType, DataLayout,
317                             op_minus>(sycl_device, tensor_range);
318   test_binary_builtins_func<DataType, DataLayout,
319                             op_times>(sycl_device, tensor_range);
320   test_binary_builtins_func<DataType, DataLayout,
321                             op_divide>(sycl_device, tensor_range);
322 }
323 
324 template <typename DataType>
test_floating_builtin_binary_sycl(const Eigen::SyclDevice & sycl_device)325 static void test_floating_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {
326   int64_t sizeDim1 = 10;
327   int64_t sizeDim2 = 10;
328   int64_t sizeDim3 = 10;
329   array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
330   test_binary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
331   test_binary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
332 }
333 
334 template <typename DataType>
test_integer_builtin_binary_sycl(const Eigen::SyclDevice & sycl_device)335 static void test_integer_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {
336   int64_t sizeDim1 = 10;
337   int64_t sizeDim2 = 10;
338   int64_t sizeDim3 = 10;
339   array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
340   test_binary_builtins_fixed_arg2<DataType, RowMajor,
341                                   op_modulo>(sycl_device, tensor_range);
342   test_binary_builtins_fixed_arg2<DataType, ColMajor,
343                                   op_modulo>(sycl_device, tensor_range);
344 }
345 
EIGEN_DECLARE_TEST(cxx11_tensor_builtins_sycl)346 EIGEN_DECLARE_TEST(cxx11_tensor_builtins_sycl) {
347   for (const auto& device :Eigen::get_sycl_supported_devices()) {
348     QueueInterface queueInterface(device);
349     Eigen::SyclDevice sycl_device(&queueInterface);
350     CALL_SUBTEST_1(test_builtin_unary_sycl<float>(sycl_device));
351     CALL_SUBTEST_2(test_floating_builtin_binary_sycl<float>(sycl_device));
352     CALL_SUBTEST_3(test_integer_builtin_binary_sycl<int>(sycl_device));
353   }
354 }
355