1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19
20 #include "main.h"
21 #include <unsupported/Eigen/CXX11/Tensor>
22
23 using Eigen::array;
24 using Eigen::SyclDevice;
25 using Eigen::Tensor;
26 using Eigen::TensorMap;
27
28 // Functions used to compare the TensorMap implementation on the device with
29 // the equivalent on the host
30 namespace cl {
31 namespace sycl {
abs(T x)32 template <typename T> T abs(T x) { return cl::sycl::fabs(x); }
square(T x)33 template <typename T> T square(T x) { return x * x; }
cube(T x)34 template <typename T> T cube(T x) { return x * x * x; }
inverse(T x)35 template <typename T> T inverse(T x) { return T(1) / x; }
cwiseMax(T x,T y)36 template <typename T> T cwiseMax(T x, T y) { return cl::sycl::max(x, y); }
cwiseMin(T x,T y)37 template <typename T> T cwiseMin(T x, T y) { return cl::sycl::min(x, y); }
38 }
39 }
40
41 struct EqualAssignement {
42 template <typename Lhs, typename Rhs>
operator ()EqualAssignement43 void operator()(Lhs& lhs, const Rhs& rhs) { lhs = rhs; }
44 };
45
46 struct PlusEqualAssignement {
47 template <typename Lhs, typename Rhs>
operator ()PlusEqualAssignement48 void operator()(Lhs& lhs, const Rhs& rhs) { lhs += rhs; }
49 };
50
51 template <typename DataType, int DataLayout,
52 typename Assignement, typename Operator>
test_unary_builtins_for_scalar(const Eigen::SyclDevice & sycl_device,const array<int64_t,3> & tensor_range)53 void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
54 const array<int64_t, 3>& tensor_range) {
55 Operator op;
56 Assignement asgn;
57 {
58 /* Assignement(out, Operator(in)) */
59 Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
60 Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
61 in = in.random() + DataType(0.01);
62 out = out.random() + DataType(0.01);
63 Tensor<DataType, 3, DataLayout, int64_t> reference(out);
64 DataType *gpu_data = static_cast<DataType *>(
65 sycl_device.allocate(in.size() * sizeof(DataType)));
66 DataType *gpu_data_out = static_cast<DataType *>(
67 sycl_device.allocate(out.size() * sizeof(DataType)));
68 TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu(gpu_data, tensor_range);
69 TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
70 sycl_device.memcpyHostToDevice(gpu_data, in.data(),
71 (in.size()) * sizeof(DataType));
72 sycl_device.memcpyHostToDevice(gpu_data_out, out.data(),
73 (out.size()) * sizeof(DataType));
74 auto device_expr = gpu_out.device(sycl_device);
75 asgn(device_expr, op(gpu));
76 sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
77 (out.size()) * sizeof(DataType));
78 for (int64_t i = 0; i < out.size(); ++i) {
79 DataType ver = reference(i);
80 asgn(ver, op(in(i)));
81 VERIFY_IS_APPROX(out(i), ver);
82 }
83 sycl_device.deallocate(gpu_data);
84 sycl_device.deallocate(gpu_data_out);
85 }
86 {
87 /* Assignement(out, Operator(out)) */
88 Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
89 out = out.random() + DataType(0.01);
90 Tensor<DataType, 3, DataLayout, int64_t> reference(out);
91 DataType *gpu_data_out = static_cast<DataType *>(
92 sycl_device.allocate(out.size() * sizeof(DataType)));
93 TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
94 sycl_device.memcpyHostToDevice(gpu_data_out, out.data(),
95 (out.size()) * sizeof(DataType));
96 auto device_expr = gpu_out.device(sycl_device);
97 asgn(device_expr, op(gpu_out));
98 sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
99 (out.size()) * sizeof(DataType));
100 for (int64_t i = 0; i < out.size(); ++i) {
101 DataType ver = reference(i);
102 asgn(ver, op(reference(i)));
103 VERIFY_IS_APPROX(out(i), ver);
104 }
105 sycl_device.deallocate(gpu_data_out);
106 }
107 }
108
109 #define DECLARE_UNARY_STRUCT(FUNC) \
110 struct op_##FUNC { \
111 template <typename T> \
112 auto operator()(const T& x) -> decltype(cl::sycl::FUNC(x)) { \
113 return cl::sycl::FUNC(x); \
114 } \
115 template <typename T> \
116 auto operator()(const TensorMap<T>& x) -> decltype(x.FUNC()) { \
117 return x.FUNC(); \
118 } \
119 };
120
121 DECLARE_UNARY_STRUCT(abs)
DECLARE_UNARY_STRUCT(sqrt)122 DECLARE_UNARY_STRUCT(sqrt)
123 DECLARE_UNARY_STRUCT(rsqrt)
124 DECLARE_UNARY_STRUCT(square)
125 DECLARE_UNARY_STRUCT(cube)
126 DECLARE_UNARY_STRUCT(inverse)
127 DECLARE_UNARY_STRUCT(tanh)
128 DECLARE_UNARY_STRUCT(exp)
129 DECLARE_UNARY_STRUCT(expm1)
130 DECLARE_UNARY_STRUCT(log)
131 DECLARE_UNARY_STRUCT(ceil)
132 DECLARE_UNARY_STRUCT(floor)
133 DECLARE_UNARY_STRUCT(round)
134 DECLARE_UNARY_STRUCT(log1p)
135 DECLARE_UNARY_STRUCT(sign)
136 DECLARE_UNARY_STRUCT(isnan)
137 DECLARE_UNARY_STRUCT(isfinite)
138 DECLARE_UNARY_STRUCT(isinf)
139
140 template <typename DataType, int DataLayout, typename Assignement>
141 void test_unary_builtins_for_assignement(const Eigen::SyclDevice& sycl_device,
142 const array<int64_t, 3>& tensor_range) {
143 #define RUN_UNARY_TEST(FUNC) \
144 test_unary_builtins_for_scalar<DataType, DataLayout, Assignement, \
145 op_##FUNC>(sycl_device, tensor_range)
146 RUN_UNARY_TEST(abs);
147 RUN_UNARY_TEST(sqrt);
148 RUN_UNARY_TEST(rsqrt);
149 RUN_UNARY_TEST(square);
150 RUN_UNARY_TEST(cube);
151 RUN_UNARY_TEST(inverse);
152 RUN_UNARY_TEST(tanh);
153 RUN_UNARY_TEST(exp);
154 RUN_UNARY_TEST(expm1);
155 RUN_UNARY_TEST(log);
156 RUN_UNARY_TEST(ceil);
157 RUN_UNARY_TEST(floor);
158 RUN_UNARY_TEST(round);
159 RUN_UNARY_TEST(log1p);
160 RUN_UNARY_TEST(sign);
161 }
162
163 template <typename DataType, int DataLayout, typename Operator>
test_unary_builtins_return_bool(const Eigen::SyclDevice & sycl_device,const array<int64_t,3> & tensor_range)164 void test_unary_builtins_return_bool(const Eigen::SyclDevice& sycl_device,
165 const array<int64_t, 3>& tensor_range) {
166 /* out = op(in) */
167 Operator op;
168 Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
169 Tensor<bool, 3, DataLayout, int64_t> out(tensor_range);
170 in = in.random() + DataType(0.01);
171 DataType *gpu_data = static_cast<DataType *>(
172 sycl_device.allocate(in.size() * sizeof(DataType)));
173 bool *gpu_data_out =
174 static_cast<bool *>(sycl_device.allocate(out.size() * sizeof(bool)));
175 TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu(gpu_data, tensor_range);
176 TensorMap<Tensor<bool, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
177 sycl_device.memcpyHostToDevice(gpu_data, in.data(),
178 (in.size()) * sizeof(DataType));
179 gpu_out.device(sycl_device) = op(gpu);
180 sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
181 (out.size()) * sizeof(bool));
182 for (int64_t i = 0; i < out.size(); ++i) {
183 VERIFY_IS_EQUAL(out(i), op(in(i)));
184 }
185 sycl_device.deallocate(gpu_data);
186 sycl_device.deallocate(gpu_data_out);
187 }
188
189 template <typename DataType, int DataLayout>
test_unary_builtins(const Eigen::SyclDevice & sycl_device,const array<int64_t,3> & tensor_range)190 void test_unary_builtins(const Eigen::SyclDevice& sycl_device,
191 const array<int64_t, 3>& tensor_range) {
192 test_unary_builtins_for_assignement<DataType, DataLayout,
193 PlusEqualAssignement>(sycl_device, tensor_range);
194 test_unary_builtins_for_assignement<DataType, DataLayout,
195 EqualAssignement>(sycl_device, tensor_range);
196 test_unary_builtins_return_bool<DataType, DataLayout,
197 op_isnan>(sycl_device, tensor_range);
198 test_unary_builtins_return_bool<DataType, DataLayout,
199 op_isfinite>(sycl_device, tensor_range);
200 test_unary_builtins_return_bool<DataType, DataLayout,
201 op_isinf>(sycl_device, tensor_range);
202 }
203
204 template <typename DataType>
test_builtin_unary_sycl(const Eigen::SyclDevice & sycl_device)205 static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) {
206 int64_t sizeDim1 = 10;
207 int64_t sizeDim2 = 10;
208 int64_t sizeDim3 = 10;
209 array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
210
211 test_unary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
212 test_unary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
213 }
214
215 template <typename DataType, int DataLayout, typename Operator>
test_binary_builtins_func(const Eigen::SyclDevice & sycl_device,const array<int64_t,3> & tensor_range)216 void test_binary_builtins_func(const Eigen::SyclDevice& sycl_device,
217 const array<int64_t, 3>& tensor_range) {
218 /* out = op(in_1, in_2) */
219 Operator op;
220 Tensor<DataType, 3, DataLayout, int64_t> in_1(tensor_range);
221 Tensor<DataType, 3, DataLayout, int64_t> in_2(tensor_range);
222 Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
223 in_1 = in_1.random() + DataType(0.01);
224 in_2 = in_2.random() + DataType(0.01);
225 Tensor<DataType, 3, DataLayout, int64_t> reference(out);
226 DataType *gpu_data_1 = static_cast<DataType *>(
227 sycl_device.allocate(in_1.size() * sizeof(DataType)));
228 DataType *gpu_data_2 = static_cast<DataType *>(
229 sycl_device.allocate(in_2.size() * sizeof(DataType)));
230 DataType *gpu_data_out = static_cast<DataType *>(
231 sycl_device.allocate(out.size() * sizeof(DataType)));
232 TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_1(gpu_data_1, tensor_range);
233 TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_2(gpu_data_2, tensor_range);
234 TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
235 sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),
236 (in_1.size()) * sizeof(DataType));
237 sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(),
238 (in_2.size()) * sizeof(DataType));
239 gpu_out.device(sycl_device) = op(gpu_1, gpu_2);
240 sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
241 (out.size()) * sizeof(DataType));
242 for (int64_t i = 0; i < out.size(); ++i) {
243 VERIFY_IS_APPROX(out(i), op(in_1(i), in_2(i)));
244 }
245 sycl_device.deallocate(gpu_data_1);
246 sycl_device.deallocate(gpu_data_2);
247 sycl_device.deallocate(gpu_data_out);
248 }
249
250 template <typename DataType, int DataLayout, typename Operator>
test_binary_builtins_fixed_arg2(const Eigen::SyclDevice & sycl_device,const array<int64_t,3> & tensor_range)251 void test_binary_builtins_fixed_arg2(const Eigen::SyclDevice& sycl_device,
252 const array<int64_t, 3>& tensor_range) {
253 /* out = op(in_1, 2) */
254 Operator op;
255 const DataType arg2(2);
256 Tensor<DataType, 3, DataLayout, int64_t> in_1(tensor_range);
257 Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
258 in_1 = in_1.random();
259 Tensor<DataType, 3, DataLayout, int64_t> reference(out);
260 DataType *gpu_data_1 = static_cast<DataType *>(
261 sycl_device.allocate(in_1.size() * sizeof(DataType)));
262 DataType *gpu_data_out = static_cast<DataType *>(
263 sycl_device.allocate(out.size() * sizeof(DataType)));
264 TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_1(gpu_data_1, tensor_range);
265 TensorMap<Tensor<DataType, 3, DataLayout, int64_t>> gpu_out(gpu_data_out, tensor_range);
266 sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),
267 (in_1.size()) * sizeof(DataType));
268 gpu_out.device(sycl_device) = op(gpu_1, arg2);
269 sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,
270 (out.size()) * sizeof(DataType));
271 for (int64_t i = 0; i < out.size(); ++i) {
272 VERIFY_IS_APPROX(out(i), op(in_1(i), arg2));
273 }
274 sycl_device.deallocate(gpu_data_1);
275 sycl_device.deallocate(gpu_data_out);
276 }
277
278 #define DECLARE_BINARY_STRUCT(FUNC) \
279 struct op_##FUNC { \
280 template <typename T1, typename T2> \
281 auto operator()(const T1& x, const T2& y) -> decltype(cl::sycl::FUNC(x, y)) { \
282 return cl::sycl::FUNC(x, y); \
283 } \
284 template <typename T1, typename T2> \
285 auto operator()(const TensorMap<T1>& x, const TensorMap<T2>& y) -> decltype(x.FUNC(y)) { \
286 return x.FUNC(y); \
287 } \
288 };
289
290 DECLARE_BINARY_STRUCT(cwiseMax)
DECLARE_BINARY_STRUCT(cwiseMin)291 DECLARE_BINARY_STRUCT(cwiseMin)
292
293 #define DECLARE_BINARY_STRUCT_OP(NAME, OPERATOR) \
294 struct op_##NAME { \
295 template <typename T1, typename T2> \
296 auto operator()(const T1& x, const T2& y) -> decltype(x OPERATOR y) { \
297 return x OPERATOR y; \
298 } \
299 };
300
301 DECLARE_BINARY_STRUCT_OP(plus, +)
302 DECLARE_BINARY_STRUCT_OP(minus, -)
303 DECLARE_BINARY_STRUCT_OP(times, *)
304 DECLARE_BINARY_STRUCT_OP(divide, /)
305 DECLARE_BINARY_STRUCT_OP(modulo, %)
306
307 template <typename DataType, int DataLayout>
308 void test_binary_builtins(const Eigen::SyclDevice& sycl_device,
309 const array<int64_t, 3>& tensor_range) {
310 test_binary_builtins_func<DataType, DataLayout,
311 op_cwiseMax>(sycl_device, tensor_range);
312 test_binary_builtins_func<DataType, DataLayout,
313 op_cwiseMin>(sycl_device, tensor_range);
314 test_binary_builtins_func<DataType, DataLayout,
315 op_plus>(sycl_device, tensor_range);
316 test_binary_builtins_func<DataType, DataLayout,
317 op_minus>(sycl_device, tensor_range);
318 test_binary_builtins_func<DataType, DataLayout,
319 op_times>(sycl_device, tensor_range);
320 test_binary_builtins_func<DataType, DataLayout,
321 op_divide>(sycl_device, tensor_range);
322 }
323
324 template <typename DataType>
test_floating_builtin_binary_sycl(const Eigen::SyclDevice & sycl_device)325 static void test_floating_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {
326 int64_t sizeDim1 = 10;
327 int64_t sizeDim2 = 10;
328 int64_t sizeDim3 = 10;
329 array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
330 test_binary_builtins<DataType, RowMajor>(sycl_device, tensor_range);
331 test_binary_builtins<DataType, ColMajor>(sycl_device, tensor_range);
332 }
333
334 template <typename DataType>
test_integer_builtin_binary_sycl(const Eigen::SyclDevice & sycl_device)335 static void test_integer_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {
336 int64_t sizeDim1 = 10;
337 int64_t sizeDim2 = 10;
338 int64_t sizeDim3 = 10;
339 array<int64_t, 3> tensor_range = {{sizeDim1, sizeDim2, sizeDim3}};
340 test_binary_builtins_fixed_arg2<DataType, RowMajor,
341 op_modulo>(sycl_device, tensor_range);
342 test_binary_builtins_fixed_arg2<DataType, ColMajor,
343 op_modulo>(sycl_device, tensor_range);
344 }
345
EIGEN_DECLARE_TEST(cxx11_tensor_builtins_sycl)346 EIGEN_DECLARE_TEST(cxx11_tensor_builtins_sycl) {
347 for (const auto& device :Eigen::get_sycl_supported_devices()) {
348 QueueInterface queueInterface(device);
349 Eigen::SyclDevice sycl_device(&queueInterface);
350 CALL_SUBTEST_1(test_builtin_unary_sycl<float>(sycl_device));
351 CALL_SUBTEST_2(test_floating_builtin_binary_sycl<float>(sycl_device));
352 CALL_SUBTEST_3(test_integer_builtin_binary_sycl<int>(sycl_device));
353 }
354 }
355