1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19
20 #include <iostream>
21 #include <chrono>
22 #include <ctime>
23
24 #include "main.h"
25 #include <unsupported/Eigen/CXX11/Tensor>
26 #include <iomanip>
27
28 using Eigen::array;
29 using Eigen::SyclDevice;
30 using Eigen::Tensor;
31 using Eigen::TensorMap;
32 static const float error_threshold =1e-4f;
33
34
35 template <typename DataType, int DataLayout, typename IndexType>
test_larg_expr1D(const Eigen::SyclDevice & sycl_device)36 static void test_larg_expr1D(const Eigen::SyclDevice& sycl_device)
37 {
38 IndexType indim0 =53;
39 IndexType indim1= 55;
40 IndexType indim2= 51;
41 IndexType outdim0=50;
42 IndexType outdim1=55;
43 IndexType outdim2=51;
44 Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
45 Eigen::array<IndexType, 1> kernel_dims = {{4}};
46 Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
47
48 Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
49 Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims);
50 Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
51 Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
52
53 Eigen::array<IndexType, 1> dims3{{0}};
54
55 input.setRandom();
56 kernel.setRandom();
57 result.setZero();
58 result_host.setZero();
59
60 std::size_t input_bytes = input.size() * sizeof(DataType);
61 std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
62 std::size_t result_bytes = result.size() * sizeof(DataType);
63
64 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
65 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
66 DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
67
68 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
69 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
70 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
71 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
72 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
73
74 gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
75 sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
76
77 result_host=input.convolve(kernel, dims3);
78
79 for(IndexType i=0; i< outdim0; i++ ){
80 for(IndexType j=0; j< outdim1; j++ ){
81 for(IndexType k=0; k< outdim2; k++ ){
82 if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
83 std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl;
84 assert(false);
85 }
86 }
87 }
88 }
89 sycl_device.deallocate(d_input);
90 sycl_device.deallocate(d_kernel);
91 sycl_device.deallocate(d_result);
92
93 }
94
95
96 template <typename DataType, int DataLayout, typename IndexType>
test_larg_expr2D(const Eigen::SyclDevice & sycl_device)97 static void test_larg_expr2D(const Eigen::SyclDevice& sycl_device)
98 {
99 IndexType indim0 =53;
100 IndexType indim1= 55;
101 IndexType indim2= 51;
102 IndexType outdim0=50;
103 IndexType outdim1=51;
104 IndexType outdim2=51;
105 Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
106 Eigen::array<IndexType, 2> kernel_dims = {{4,5}};
107 Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
108
109 Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
110 Tensor<DataType, 2, DataLayout,IndexType> kernel(kernel_dims);
111 Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
112 Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
113
114 Eigen::array<IndexType, 2> dims3{{0,1}};
115
116 input.setRandom();
117 kernel.setRandom();
118 result.setZero();
119 result_host.setZero();
120
121 std::size_t input_bytes = input.size() * sizeof(DataType);
122 std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
123 std::size_t result_bytes = result.size() * sizeof(DataType);
124
125 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
126 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
127 DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
128
129 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
130 Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
131 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
132 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
133 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
134
135 gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
136 sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
137
138 result_host=input.convolve(kernel, dims3);
139
140 for(IndexType i=0; i< outdim0; i++ ){
141 for(IndexType j=0; j< outdim1; j++ ){
142 for(IndexType k=0; k< outdim2; k++ ){
143 if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
144 std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl;
145 assert(false);
146 }
147 }
148 }
149 }
150 sycl_device.deallocate(d_input);
151 sycl_device.deallocate(d_kernel);
152 sycl_device.deallocate(d_result);
153
154 }
155
156
157 template <typename DataType, int DataLayout, typename IndexType>
test_larg_expr3D(const Eigen::SyclDevice & sycl_device)158 static void test_larg_expr3D(const Eigen::SyclDevice& sycl_device)
159 {
160 IndexType indim0 =53;
161 IndexType indim1= 55;
162 IndexType indim2= 51;
163 IndexType outdim0=50;
164 IndexType outdim1=51;
165 IndexType outdim2=49;
166 Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
167 Eigen::array<IndexType, 3> kernel_dims = {{4,5,3}};
168 Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
169
170 Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
171 Tensor<DataType, 3, DataLayout,IndexType> kernel(kernel_dims);
172 Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
173 Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
174
175 Eigen::array<IndexType, 3> dims3{{0,1,2}};
176
177 input.setRandom();
178 kernel.setRandom();
179 result.setZero();
180 result_host.setZero();
181
182 std::size_t input_bytes = input.size() * sizeof(DataType);
183 std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
184 std::size_t result_bytes = result.size() * sizeof(DataType);
185
186 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
187 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
188 DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
189
190 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
191 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
192 Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
193 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
194 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
195
196 gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
197 sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
198
199 result_host=input.convolve(kernel, dims3);
200
201 for(IndexType i=0; i< outdim0; i++ ){
202 for(IndexType j=0; j< outdim1; j++ ){
203 for(IndexType k=0; k< outdim2; k++ ){
204 if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
205 std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl;
206 assert(false);
207 }
208 }
209 }
210 }
211 sycl_device.deallocate(d_input);
212 sycl_device.deallocate(d_kernel);
213 sycl_device.deallocate(d_result);
214
215 }
216
217
218 template <typename DataType, int DataLayout, typename IndexType>
test_evals(const Eigen::SyclDevice & sycl_device)219 static void test_evals(const Eigen::SyclDevice& sycl_device)
220 {
221 Eigen::array<IndexType, 2> input_dims = {{3, 3}};
222 Eigen::array<IndexType, 1> kernel_dims = {{2}};
223 Eigen::array<IndexType, 2> result_dims = {{2, 3}};
224
225 Tensor<DataType, 2, DataLayout, IndexType> input(input_dims);
226 Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims);
227 Tensor<DataType, 2, DataLayout,IndexType> result(result_dims);
228
229 Eigen::array<IndexType, 1> dims3{{0}};
230
231 input.setRandom();
232 kernel.setRandom();
233 result.setZero();
234
235 std::size_t input_bytes = input.size() * sizeof(DataType);
236 std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
237 std::size_t result_bytes = result.size() * sizeof(DataType);
238
239 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
240 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
241 DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
242
243 Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_input(d_input, input_dims);
244 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
245 Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_result(d_result, result_dims);
246 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
247 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
248
249 gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
250 sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
251
252 VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1)); // index 0
253 VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1)); // index 2
254 VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1)); // index 4
255 VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1)); // index 1
256 VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1)); // index 3
257 VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5
258
259 sycl_device.deallocate(d_input);
260 sycl_device.deallocate(d_kernel);
261 sycl_device.deallocate(d_result);
262 }
263
264 template <typename DataType, int DataLayout, typename IndexType>
test_expr(const Eigen::SyclDevice & sycl_device)265 static void test_expr(const Eigen::SyclDevice& sycl_device)
266 {
267 Eigen::array<IndexType, 2> input_dims = {{3, 3}};
268 Eigen::array<IndexType, 2> kernel_dims = {{2, 2}};
269 Eigen::array<IndexType, 2> result_dims = {{2, 2}};
270
271 Tensor<DataType, 2, DataLayout, IndexType> input(input_dims);
272 Tensor<DataType, 2, DataLayout, IndexType> kernel(kernel_dims);
273 Tensor<DataType, 2, DataLayout, IndexType> result(result_dims);
274
275 input.setRandom();
276 kernel.setRandom();
277 Eigen::array<IndexType, 2> dims;
278 dims[0] = 0;
279 dims[1] = 1;
280
281 std::size_t input_bytes = input.size() * sizeof(DataType);
282 std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
283 std::size_t result_bytes = result.size() * sizeof(DataType);
284
285 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
286 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
287 DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
288
289 Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_input(d_input, input_dims);
290 Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
291 Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_result(d_result, result_dims);
292 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
293 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
294
295 gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims);
296 sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
297
298 VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) +
299 input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1));
300 VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) +
301 input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1));
302 VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) +
303 input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1));
304 VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) +
305 input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1));
306
307 sycl_device.deallocate(d_input);
308 sycl_device.deallocate(d_kernel);
309 sycl_device.deallocate(d_result);
310 }
311
312
313 template <typename DataType, int DataLayout, typename IndexType>
test_modes(const Eigen::SyclDevice & sycl_device)314 static void test_modes(const Eigen::SyclDevice& sycl_device){
315
316 Eigen::array<IndexType, 1> input_dims = {{3}};
317 Eigen::array<IndexType, 1> kernel_dims = {{3}};
318
319 Tensor<DataType, 1, DataLayout, IndexType> input(input_dims);
320 Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
321
322 input.setRandom();
323 kernel.setRandom();
324 Eigen::array<IndexType, 1> dims;
325 dims[0] = 0;
326
327 input(0) = 1.0f;
328 input(1) = 2.0f;
329 input(2) = 3.0f;
330 kernel(0) = 0.5f;
331 kernel(1) = 1.0f;
332 kernel(2) = 0.0f;
333
334 Eigen::array<std::pair<IndexType, IndexType>, 1> padding;
335
336 // Emulate VALID mode (as defined in
337 // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
338 padding[0] = std::make_pair(0, 0);
339 Tensor<DataType, 1, DataLayout, IndexType> valid(1);
340
341 std::size_t input_bytes = input.size() * sizeof(DataType);
342 std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
343 std::size_t valid_bytes = valid.size() * sizeof(DataType);
344
345 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
346 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
347 DataType * d_valid = static_cast<DataType*>(sycl_device.allocate(valid_bytes));
348
349 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims);
350 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
351 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_valid(d_valid, valid.dimensions());
352 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
353 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
354
355 gpu_valid.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
356 sycl_device.memcpyDeviceToHost(valid.data(), d_valid, valid_bytes);
357
358 VERIFY_IS_EQUAL(valid.dimension(0), 1);
359 VERIFY_IS_APPROX(valid(0), 2.5f);
360
361 // Emulate SAME mode (as defined in
362 // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
363 padding[0] = std::make_pair(1, 1);
364 Tensor<DataType, 1, DataLayout, IndexType> same(3);
365 std::size_t same_bytes = same.size() * sizeof(DataType);
366 DataType * d_same = static_cast<DataType*>(sycl_device.allocate(same_bytes));
367 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_same(d_same, same.dimensions());
368 gpu_same.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
369 sycl_device.memcpyDeviceToHost(same.data(), d_same, same_bytes);
370
371 VERIFY_IS_EQUAL(same.dimension(0), 3);
372 VERIFY_IS_APPROX(same(0), 1.0f);
373 VERIFY_IS_APPROX(same(1), 2.5f);
374 VERIFY_IS_APPROX(same(2), 4.0f);
375
376 // Emulate FULL mode (as defined in
377 // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
378 padding[0] = std::make_pair(2, 2);
379
380 Tensor<DataType, 1, DataLayout, IndexType> full(5);
381 std::size_t full_bytes = full.size() * sizeof(DataType);
382 DataType * d_full = static_cast<DataType*>(sycl_device.allocate(full_bytes));
383 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_full(d_full, full.dimensions());
384 gpu_full.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
385 sycl_device.memcpyDeviceToHost(full.data(), d_full, full_bytes);
386
387 VERIFY_IS_EQUAL(full.dimension(0), 5);
388 VERIFY_IS_APPROX(full(0), 0.0f);
389 VERIFY_IS_APPROX(full(1), 1.0f);
390 VERIFY_IS_APPROX(full(2), 2.5f);
391 VERIFY_IS_APPROX(full(3), 4.0f);
392 VERIFY_IS_APPROX(full(4), 1.5f);
393
394 sycl_device.deallocate(d_input);
395 sycl_device.deallocate(d_kernel);
396 sycl_device.deallocate(d_valid);
397 sycl_device.deallocate(d_same);
398 sycl_device.deallocate(d_full);
399
400 }
401
402 template <typename DataType, int DataLayout, typename IndexType>
test_strides(const Eigen::SyclDevice & sycl_device)403 static void test_strides(const Eigen::SyclDevice& sycl_device){
404
405 Eigen::array<IndexType, 1> input_dims = {{13}};
406 Eigen::array<IndexType, 1> kernel_dims = {{3}};
407
408 Tensor<DataType, 1, DataLayout, IndexType> input(input_dims);
409 Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
410 Tensor<DataType, 1, DataLayout, IndexType> result(2);
411
412 input.setRandom();
413 kernel.setRandom();
414 Eigen::array<IndexType, 1> dims;
415 dims[0] = 0;
416
417 Eigen::array<IndexType, 1> stride_of_3;
418 stride_of_3[0] = 3;
419 Eigen::array<IndexType, 1> stride_of_2;
420 stride_of_2[0] = 2;
421
422 std::size_t input_bytes = input.size() * sizeof(DataType);
423 std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
424 std::size_t result_bytes = result.size() * sizeof(DataType);
425
426 DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
427 DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
428 DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
429
430 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims);
431 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
432 Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_result(d_result, result.dimensions());
433 sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
434 sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
435
436 gpu_result.device(sycl_device)=gpu_input.stride(stride_of_3).convolve(gpu_kernel, dims).stride(stride_of_2);
437 sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
438
439 VERIFY_IS_EQUAL(result.dimension(0), 2);
440 VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) +
441 input(6)*kernel(2)));
442 VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) +
443 input(12)*kernel(2)));
444 }
445
tensorConvolutionPerDevice(Dev_selector & s)446 template <typename Dev_selector> void tensorConvolutionPerDevice(Dev_selector& s){
447 QueueInterface queueInterface(s);
448 auto sycl_device=Eigen::SyclDevice(&queueInterface);
449 test_larg_expr1D<float, RowMajor, int64_t>(sycl_device);
450 test_larg_expr1D<float, ColMajor, int64_t>(sycl_device);
451 test_larg_expr2D<float, RowMajor, int64_t>(sycl_device);
452 test_larg_expr2D<float, ColMajor, int64_t>(sycl_device);
453 test_larg_expr3D<float, RowMajor, int64_t>(sycl_device);
454 test_larg_expr3D<float, ColMajor, int64_t>(sycl_device);
455 test_evals<float, ColMajor, int64_t>(sycl_device);
456 test_evals<float, RowMajor, int64_t>(sycl_device);
457 test_expr<float, ColMajor, int64_t>(sycl_device);
458 test_expr<float, RowMajor, int64_t>(sycl_device);
459 test_modes<float, ColMajor, int64_t>(sycl_device);
460 test_modes<float, RowMajor, int64_t>(sycl_device);
461 test_strides<float, ColMajor, int64_t>(sycl_device);
462 test_strides<float, RowMajor, int64_t>(sycl_device);
463 }
464
EIGEN_DECLARE_TEST(cxx11_tensor_convolution_sycl)465 EIGEN_DECLARE_TEST(cxx11_tensor_convolution_sycl) {
466 for (const auto& device :Eigen::get_sycl_supported_devices()) {
467 CALL_SUBTEST(tensorConvolutionPerDevice(device));
468 }
469 }
470