• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli    Codeplay Software Ltd.
6 // Ralph Potter  Codeplay Software Ltd.
7 // Luke Iwanski  Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13 
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16 
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19 
20 #include "main.h"
21 #include <unsupported/Eigen/CXX11/Tensor>
22 
23 using Eigen::Tensor;
24 static const int DataLayout = ColMajor;
25 
26 template <typename DataType, typename IndexType>
test_simple_image_patch_sycl(const Eigen::SyclDevice & sycl_device)27 static void test_simple_image_patch_sycl(const Eigen::SyclDevice& sycl_device)
28 {
29   IndexType sizeDim1 = 2;
30   IndexType sizeDim2 = 3;
31   IndexType sizeDim3 = 5;
32   IndexType sizeDim4 = 7;
33   array<IndexType, 4> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
34   array<IndexType, 4> tensorRowMajorRange = {{sizeDim4, sizeDim3, sizeDim2, sizeDim1}};
35   Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
36   Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
37   tensor_col_major.setRandom();
38 
39   DataType* gpu_data_col_major  = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
40   DataType* gpu_data_row_major  = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
41   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
42   TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
43 
44   sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
45   gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
46   sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
47 
48   VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
49   VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
50   VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
51   VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
52 
53   // Single pixel patch: ColMajor
54   array<IndexType, 5> patchColMajorTensorRange={{sizeDim1, 1, 1, sizeDim2*sizeDim3, sizeDim4}};
55   Tensor<DataType, 5, DataLayout,IndexType> single_patch_col_major(patchColMajorTensorRange);
56   size_t patchTensorBuffSize =single_patch_col_major.size()*sizeof(DataType);
57   DataType* gpu_data_single_patch_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
58   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_single_patch_col_major(gpu_data_single_patch_col_major, patchColMajorTensorRange);
59   gpu_single_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(1, 1);
60   sycl_device.memcpyDeviceToHost(single_patch_col_major.data(), gpu_data_single_patch_col_major, patchTensorBuffSize);
61 
62   VERIFY_IS_EQUAL(single_patch_col_major.dimension(0), 2);
63   VERIFY_IS_EQUAL(single_patch_col_major.dimension(1), 1);
64   VERIFY_IS_EQUAL(single_patch_col_major.dimension(2), 1);
65   VERIFY_IS_EQUAL(single_patch_col_major.dimension(3), 3*5);
66   VERIFY_IS_EQUAL(single_patch_col_major.dimension(4), 7);
67 
68   // Single pixel patch: RowMajor
69   array<IndexType, 5> patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, 1, 1, sizeDim1}};
70   Tensor<DataType, 5, RowMajor,IndexType> single_patch_row_major(patchRowMajorTensorRange);
71   patchTensorBuffSize =single_patch_row_major.size()*sizeof(DataType);
72   DataType* gpu_data_single_patch_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
73   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_single_patch_row_major(gpu_data_single_patch_row_major, patchRowMajorTensorRange);
74   gpu_single_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(1, 1);
75   sycl_device.memcpyDeviceToHost(single_patch_row_major.data(), gpu_data_single_patch_row_major, patchTensorBuffSize);
76 
77   VERIFY_IS_EQUAL(single_patch_row_major.dimension(0), 7);
78   VERIFY_IS_EQUAL(single_patch_row_major.dimension(1), 3*5);
79   VERIFY_IS_EQUAL(single_patch_row_major.dimension(2), 1);
80   VERIFY_IS_EQUAL(single_patch_row_major.dimension(3), 1);
81   VERIFY_IS_EQUAL(single_patch_row_major.dimension(4), 2);
82 
83   for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
84     // ColMajor
85     if (tensor_col_major.data()[i] != single_patch_col_major.data()[i]) {
86       std::cout << "Mismatch detected at index colmajor " << i << " : "
87            << tensor_col_major.data()[i] << " vs " << single_patch_col_major.data()[i]
88            << std::endl;
89     }
90     VERIFY_IS_EQUAL(single_patch_col_major.data()[i], tensor_col_major.data()[i]);
91     // RowMajor
92     if (tensor_row_major.data()[i] != single_patch_row_major.data()[i]) {
93       std::cout << "Mismatch detected at index row major" << i << " : "
94            << tensor_row_major.data()[i] << " vs "
95            << single_patch_row_major.data()[i] << std::endl;
96     }
97     VERIFY_IS_EQUAL(single_patch_row_major.data()[i],
98                     tensor_row_major.data()[i]);
99     VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]);
100     VERIFY_IS_EQUAL(single_patch_col_major.data()[i],
101                     single_patch_row_major.data()[i]);
102   }
103 
104 
105   // Entire image patch: ColMajor
106   patchColMajorTensorRange={{sizeDim1, sizeDim2, sizeDim3, sizeDim2*sizeDim3, sizeDim4}};
107   Tensor<DataType, 5, DataLayout,IndexType> entire_image_patch_col_major(patchColMajorTensorRange);
108   patchTensorBuffSize =entire_image_patch_col_major.size()*sizeof(DataType);
109   DataType* gpu_data_entire_image_patch_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
110   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_entire_image_patch_col_major(gpu_data_entire_image_patch_col_major, patchColMajorTensorRange);
111   gpu_entire_image_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(3, 5);
112   sycl_device.memcpyDeviceToHost(entire_image_patch_col_major.data(), gpu_data_entire_image_patch_col_major, patchTensorBuffSize);
113 
114   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(0), 2);
115   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(1), 3);
116   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(2), 5);
117   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(3), 3*5);
118   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(4), 7);
119 
120   // Entire image patch: RowMajor
121   patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, sizeDim3, sizeDim2, sizeDim1}};
122   Tensor<DataType, 5, RowMajor,IndexType> entire_image_patch_row_major(patchRowMajorTensorRange);
123   patchTensorBuffSize =entire_image_patch_row_major.size()*sizeof(DataType);
124   DataType* gpu_data_entire_image_patch_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
125   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_entire_image_patch_row_major(gpu_data_entire_image_patch_row_major, patchRowMajorTensorRange);
126   gpu_entire_image_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(3, 5);
127   sycl_device.memcpyDeviceToHost(entire_image_patch_row_major.data(), gpu_data_entire_image_patch_row_major, patchTensorBuffSize);
128 
129   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 7);
130   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 3*5);
131   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 5);
132   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 3);
133   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(4), 2);
134 
135   for (IndexType i = 0; i < 3; ++i) {
136     for (IndexType j = 0; j < 5; ++j) {
137       IndexType patchId = i+3*j;
138       for (IndexType r = 0; r < 3; ++r) {
139         for (IndexType c = 0; c < 5; ++c) {
140           for (IndexType d = 0; d < 2; ++d) {
141             for (IndexType b = 0; b < 7; ++b) {
142               DataType expected_col_major = 0.0f;
143               DataType expected_row_major = 0.0f;
144               if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) {
145                 expected_col_major = tensor_col_major(d, r-1+i, c-2+j, b);
146                 expected_row_major = tensor_row_major(b, c-2+j, r-1+i, d);
147               }
148               // ColMajor
149               if (entire_image_patch_col_major(d, r, c, patchId, b) != expected_col_major) {
150                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
151               }
152               VERIFY_IS_EQUAL(entire_image_patch_col_major(d, r, c, patchId, b), expected_col_major);
153               // RowMajor
154               if (entire_image_patch_row_major(b, patchId, c, r, d) !=
155                   expected_row_major) {
156                 std::cout << "Mismatch detected at index i=" << i << " j=" << j
157                      << " r=" << r << " c=" << c << " d=" << d << " b=" << b
158                      << std::endl;
159               }
160               VERIFY_IS_EQUAL(entire_image_patch_row_major(b, patchId, c, r, d),
161                               expected_row_major);
162               // Check that ColMajor and RowMajor agree.
163               VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
164             }
165           }
166         }
167       }
168     }
169   }
170 
171   // 2D patch: ColMajor
172   patchColMajorTensorRange={{sizeDim1, 2, 2, sizeDim2*sizeDim3, sizeDim4}};
173   Tensor<DataType, 5, DataLayout,IndexType> twod_patch_col_major(patchColMajorTensorRange);
174   patchTensorBuffSize =twod_patch_col_major.size()*sizeof(DataType);
175   DataType* gpu_data_twod_patch_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
176   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_twod_patch_col_major(gpu_data_twod_patch_col_major, patchColMajorTensorRange);
177   gpu_twod_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(2, 2);
178   sycl_device.memcpyDeviceToHost(twod_patch_col_major.data(), gpu_data_twod_patch_col_major, patchTensorBuffSize);
179 
180   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(0), 2);
181   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(1), 2);
182   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(2), 2);
183   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(3), 3*5);
184   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(4), 7);
185 
186   // 2D patch: RowMajor
187   patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, 2, 2, sizeDim1}};
188   Tensor<DataType, 5, RowMajor,IndexType> twod_patch_row_major(patchRowMajorTensorRange);
189   patchTensorBuffSize =twod_patch_row_major.size()*sizeof(DataType);
190   DataType* gpu_data_twod_patch_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
191   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_twod_patch_row_major(gpu_data_twod_patch_row_major, patchRowMajorTensorRange);
192   gpu_twod_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(2, 2);
193   sycl_device.memcpyDeviceToHost(twod_patch_row_major.data(), gpu_data_twod_patch_row_major, patchTensorBuffSize);
194 
195   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 7);
196   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 3*5);
197   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2);
198   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2);
199   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(4), 2);
200 
201 
202   // Based on the calculation described in TensorTraits.h, padding happens to be 0.
203   IndexType row_padding = 0;
204   IndexType col_padding = 0;
205   IndexType stride = 1;
206 
207   for (IndexType i = 0; i < 3; ++i) {
208     for (IndexType j = 0; j < 5; ++j) {
209       IndexType patchId = i+3*j;
210       for (IndexType r = 0; r < 2; ++r) {
211         for (IndexType c = 0; c < 2; ++c) {
212           for (IndexType d = 0; d < 2; ++d) {
213             for (IndexType b = 0; b < 7; ++b) {
214               DataType expected_col_major = 0.0f;
215               DataType expected_row_major = 0.0f;
216               IndexType row_offset = r*stride + i - row_padding;
217               IndexType col_offset = c*stride + j - col_padding;
218               // ColMajor
219               if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_col_major.dimension(1) && col_offset < tensor_col_major.dimension(2)) {
220                 expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
221               }
222               if (twod_patch_col_major(d, r, c, patchId, b) != expected_col_major) {
223                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
224               }
225               VERIFY_IS_EQUAL(twod_patch_col_major(d, r, c, patchId, b), expected_col_major);
226 
227               // RowMajor
228               if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(2) && col_offset < tensor_row_major.dimension(1)) {
229                 expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
230 
231               }
232               if (twod_patch_row_major(b, patchId, c, r, d) != expected_row_major) {
233                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
234               }
235               VERIFY_IS_EQUAL(twod_patch_row_major(b, patchId, c, r, d), expected_row_major);
236               // Check that ColMajor and RowMajor agree.
237               VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
238             }
239           }
240         }
241       }
242     }
243   }
244 
245   sycl_device.deallocate(gpu_data_col_major);
246   sycl_device.deallocate(gpu_data_row_major);
247   sycl_device.deallocate(gpu_data_single_patch_col_major);
248   sycl_device.deallocate(gpu_data_single_patch_row_major);
249   sycl_device.deallocate(gpu_data_entire_image_patch_col_major);
250   sycl_device.deallocate(gpu_data_entire_image_patch_row_major);
251   sycl_device.deallocate(gpu_data_twod_patch_col_major);
252   sycl_device.deallocate(gpu_data_twod_patch_row_major);
253 
254 }
255 
256 
257 // Verifies VALID padding (no padding) with incrementing values.
258 template <typename DataType, typename IndexType>
test_patch_padding_valid_sycl(const Eigen::SyclDevice & sycl_device)259 static void test_patch_padding_valid_sycl(const Eigen::SyclDevice& sycl_device){
260   IndexType input_depth = 3;
261   IndexType input_rows = 3;
262   IndexType input_cols = 3;
263   IndexType input_batches = 1;
264   IndexType ksize = 2;  // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
265   IndexType stride = 2;  // Only same stride is supported.
266 
267   array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
268   array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
269   Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
270   Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
271 
272   DataType* gpu_data_col_major  = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
273   DataType* gpu_data_row_major  = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
274   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
275   TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
276 
277   sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
278   gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
279   sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
280 
281   VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
282   VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
283   VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
284   VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
285 
286   // Initializes tensor with incrementing numbers.
287   for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
288     tensor_col_major.data()[i] = i + 1;
289   }
290   // ColMajor
291   array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 1, input_batches}};
292   Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange);
293   size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType);
294   DataType* gpu_data_result_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
295   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange);
296   gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
297   sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize);
298 
299   VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth);  // depth
300   VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize);  // kernel rows
301   VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize);  // kernel cols
302   VERIFY_IS_EQUAL(result_col_major.dimension(3), 1);  // number of patches
303   VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches);  // number of batches
304 
305   // RowMajor
306   array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 1, ksize, ksize, input_depth }};
307   Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange);
308   patchTensorBuffSize =result_row_major.size()*sizeof(DataType);
309   DataType* gpu_data_result_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
310   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange);
311   gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
312   sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize);
313 
314   VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4));
315   VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3));
316   VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2));
317   VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1));
318   VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0));
319 
320   // No padding is carried out.
321   IndexType row_padding = 0;
322   IndexType col_padding = 0;
323 
324   for (IndexType i = 0; (i+stride+ksize-1) < input_rows; i += stride) {  // input rows
325     for (IndexType j = 0; (j+stride+ksize-1) < input_cols; j += stride) {  // input cols
326       IndexType patchId = i+input_rows*j;
327       for (IndexType r = 0; r < ksize; ++r) {  // patch rows
328         for (IndexType c = 0; c < ksize; ++c) {  // patch cols
329           for (IndexType d = 0; d < input_depth; ++d) {  // depth
330             for (IndexType b = 0; b < input_batches; ++b) {  // batch
331               DataType expected_col_major = 0.0f;
332               DataType expected_row_major = 0.0f;
333               IndexType row_offset = r + i - row_padding;
334               IndexType col_offset = c + j - col_padding;
335               if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
336                 expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
337                 expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
338               }
339               // ColMajor
340               if (result_col_major(d, r, c, patchId, b) != expected_col_major) {
341                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
342               }
343               VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major);
344               // RowMajor
345               if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
346                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
347               }
348               VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
349               // Check that ColMajor and RowMajor agree.
350               VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
351             }
352           }
353         }
354       }
355     }
356   }
357   sycl_device.deallocate(gpu_data_col_major);
358   sycl_device.deallocate(gpu_data_row_major);
359   sycl_device.deallocate(gpu_data_result_col_major);
360   sycl_device.deallocate(gpu_data_result_row_major);
361 }
362 
363 // Verifies VALID padding (no padding) with the same value.
364 template <typename DataType, typename IndexType>
test_patch_padding_valid_same_value_sycl(const Eigen::SyclDevice & sycl_device)365 static void test_patch_padding_valid_same_value_sycl(const Eigen::SyclDevice& sycl_device){
366   IndexType input_depth = 1;
367   IndexType input_rows = 5;
368   IndexType input_cols = 5;
369   IndexType input_batches = 2;
370   IndexType ksize = 3;  // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
371   IndexType stride = 2;  // Only same stride is supported.
372   // ColMajor
373 
374   array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
375   array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
376   Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
377   Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
378 
379   DataType* gpu_data_col_major  = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
380   DataType* gpu_data_row_major  = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
381   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
382   TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
383   gpu_col_major.device(sycl_device)=gpu_col_major.constant(11.0f);
384   gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
385   sycl_device.memcpyDeviceToHost(tensor_col_major.data(), gpu_data_col_major, (tensor_col_major.size())*sizeof(DataType));
386   sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_row_major.size())*sizeof(DataType));
387   VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
388   VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
389   VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
390   VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
391 
392   array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 4, input_batches}};
393   Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange);
394   size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType);
395   DataType* gpu_data_result_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
396   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange);
397   gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
398   sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize);
399 
400   VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth);  // depth
401   VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize);  // kernel rows
402   VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize);  // kernel cols
403   VERIFY_IS_EQUAL(result_col_major.dimension(3), 4);  // number of patches
404   VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches);  // number of batches
405 
406   // RowMajor
407   array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 4, ksize, ksize, input_depth }};
408   Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange);
409   patchTensorBuffSize =result_row_major.size()*sizeof(DataType);
410   DataType* gpu_data_result_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
411   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange);
412   gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
413   sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize);
414 
415   VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4));
416   VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3));
417   VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2));
418   VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1));
419   VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0));
420 
421   // No padding is carried out.
422   IndexType row_padding = 0;
423   IndexType col_padding = 0;
424 
425   for (IndexType i = 0; (i+stride+ksize-1) <= input_rows; i += stride) {  // input rows
426     for (IndexType j = 0; (j+stride+ksize-1) <= input_cols; j += stride) {  // input cols
427       IndexType patchId = i+input_rows*j;
428       for (IndexType r = 0; r < ksize; ++r) {  // patch rows
429         for (IndexType c = 0; c < ksize; ++c) {  // patch cols
430           for (IndexType d = 0; d < input_depth; ++d) {  // depth
431             for (IndexType b = 0; b < input_batches; ++b) {  // batch
432               DataType expected_col_major = 0.0f;
433               DataType expected_row_major = 0.0f;
434               IndexType row_offset = r + i - row_padding;
435               IndexType col_offset = c + j - col_padding;
436               if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
437                 expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
438                 expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
439               }
440               // ColMajor
441               if (result_col_major(d, r, c, patchId, b) != expected_col_major) {
442                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
443               }
444               VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major);
445               // RowMajor
446               if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
447                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
448               }
449               VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
450               // Check that ColMajor and RowMajor agree.
451               VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
452             }
453           }
454         }
455       }
456     }
457   }
458 }
459 
460 // Verifies SAME padding.
461 template <typename DataType, typename IndexType>
test_patch_padding_same_sycl(const Eigen::SyclDevice & sycl_device)462 static void test_patch_padding_same_sycl(const Eigen::SyclDevice& sycl_device){
463   IndexType input_depth = 3;
464   IndexType input_rows = 4;
465   IndexType input_cols = 2;
466   IndexType input_batches = 1;
467   IndexType ksize = 2;  // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
468   IndexType stride = 2;  // Only same stride is supported.
469 
470   // ColMajor
471   array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
472   array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
473   Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
474   Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
475 
476   DataType* gpu_data_col_major  = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
477   DataType* gpu_data_row_major  = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
478   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
479   TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
480 
481   sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
482   gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
483   sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
484 
485   VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
486   VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
487   VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
488   VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
489 
490   // Initializes tensor with incrementing numbers.
491   for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
492     tensor_col_major.data()[i] = i + 1;
493   }
494 
495 array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 2, input_batches}};
496 Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange);
497 size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType);
498 DataType* gpu_data_result_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
499 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange);
500 gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME);
501 sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize);
502 
503 
504   VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth);  // depth
505   VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize);  // kernel rows
506   VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize);  // kernel cols
507   VERIFY_IS_EQUAL(result_col_major.dimension(3), 2);  // number of patches
508   VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches);  // number of batches
509 
510   // RowMajor
511 
512   array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 2, ksize, ksize, input_depth }};
513   Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange);
514   patchTensorBuffSize =result_row_major.size()*sizeof(DataType);
515   DataType* gpu_data_result_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
516   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange);
517   gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME);
518   sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize);
519 
520   VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4));
521   VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3));
522   VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2));
523   VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1));
524   VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0));
525 
526   // Based on the calculation described in TensorTraits.h, padding happens to be 0.
527   IndexType row_padding = 0;
528   IndexType col_padding = 0;
529 
530   for (IndexType i = 0; (i+stride+ksize-1) <= input_rows; i += stride) {  // input rows
531     for (IndexType j = 0; (j+stride+ksize-1) <= input_cols; j += stride) {  // input cols
532       IndexType patchId = i+input_rows*j;
533       for (IndexType r = 0; r < ksize; ++r) {  // patch rows
534         for (IndexType c = 0; c < ksize; ++c) {  // patch cols
535           for (IndexType d = 0; d < input_depth; ++d) {  // depth
536             for (IndexType b = 0; b < input_batches; ++b) {  // batch
537               DataType expected_col_major = 0.0f;
538               DataType expected_row_major = 0.0f;
539               IndexType row_offset = r*stride + i - row_padding;
540               IndexType col_offset = c*stride + j - col_padding;
541               if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
542                 expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
543                 expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
544               }
545               // ColMajor
546               if (result_col_major(d, r, c, patchId, b) != expected_col_major) {
547                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
548               }
549               VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major);
550               // RowMajor
551               if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
552                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
553               }
554               VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
555               // Check that ColMajor and RowMajor agree.
556               VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
557             }
558           }
559         }
560       }
561     }
562   }
563 }
564 
565 
566 template <typename DataType, typename IndexType>
test_patch_no_extra_dim_sycl(const Eigen::SyclDevice & sycl_device)567 static void test_patch_no_extra_dim_sycl(const Eigen::SyclDevice& sycl_device){
568 
569   IndexType sizeDim1 = 2;
570   IndexType sizeDim2 = 3;
571   IndexType sizeDim3 = 5;
572 
573   // ColMajor
574   array<IndexType, 3> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3}};
575   array<IndexType, 3> tensorRowMajorRange = {{sizeDim3, sizeDim2, sizeDim1}};
576   Tensor<DataType, 3, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
577   tensor_col_major.setRandom();
578   Tensor<DataType, 3, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
579 
580   DataType* gpu_data_col_major  = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
581   DataType* gpu_data_row_major  = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
582   TensorMap<Tensor<DataType, 3, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
583   TensorMap<Tensor<DataType, 3, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
584 
585   sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
586   gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
587   sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_row_major.size())*sizeof(DataType));
588 
589   VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(2));
590   VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(1));
591   VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(0));
592 
593 
594   // Single pixel patch: ColMajor
595   array<IndexType, 4> patchColMajorTensorRange={{sizeDim1, 1, 1, sizeDim2*sizeDim3}};
596   Tensor<DataType, 4, DataLayout,IndexType> single_patch_col_major(patchColMajorTensorRange);
597   size_t patchTensorBuffSize =single_patch_col_major.size()*sizeof(DataType);
598   DataType* gpu_data_single_patch_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
599   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_single_patch_col_major(gpu_data_single_patch_col_major, patchColMajorTensorRange);
600   gpu_single_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(1, 1);
601   sycl_device.memcpyDeviceToHost(single_patch_col_major.data(), gpu_data_single_patch_col_major, patchTensorBuffSize);
602 
603   VERIFY_IS_EQUAL(single_patch_col_major.dimension(0), sizeDim1);
604   VERIFY_IS_EQUAL(single_patch_col_major.dimension(1), 1);
605   VERIFY_IS_EQUAL(single_patch_col_major.dimension(2), 1);
606   VERIFY_IS_EQUAL(single_patch_col_major.dimension(3), sizeDim2*sizeDim3);
607 
608   // Single pixel patch: RowMajor
609   array<IndexType, 4> patchRowMajorTensorRange={{sizeDim2*sizeDim3, 1, 1, sizeDim1}};
610   Tensor<DataType, 4, RowMajor,IndexType> single_patch_row_major(patchRowMajorTensorRange);
611   patchTensorBuffSize =single_patch_row_major.size()*sizeof(DataType);
612   DataType* gpu_data_single_patch_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
613   TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_single_patch_row_major(gpu_data_single_patch_row_major, patchRowMajorTensorRange);
614   gpu_single_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(1, 1);
615   sycl_device.memcpyDeviceToHost(single_patch_row_major.data(), gpu_data_single_patch_row_major, patchTensorBuffSize);
616 
617   VERIFY_IS_EQUAL(single_patch_row_major.dimension(0), sizeDim2*sizeDim3);
618   VERIFY_IS_EQUAL(single_patch_row_major.dimension(1), 1);
619   VERIFY_IS_EQUAL(single_patch_row_major.dimension(2), 1);
620   VERIFY_IS_EQUAL(single_patch_row_major.dimension(3), sizeDim1);
621 
622   for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
623     // ColMajor
624     if (tensor_col_major.data()[i] != single_patch_col_major.data()[i]) {
625       std::cout << "Mismatch detected at index " << i << " : " << tensor_col_major.data()[i] << " vs " << single_patch_col_major.data()[i] << std::endl;
626     }
627     VERIFY_IS_EQUAL(single_patch_col_major.data()[i], tensor_col_major.data()[i]);
628     // RowMajor
629     if (tensor_row_major.data()[i] != single_patch_row_major.data()[i]) {
630       std::cout << "Mismatch detected at index " << i << " : "
631            << tensor_col_major.data()[i] << " vs "
632            << single_patch_row_major.data()[i] << std::endl;
633     }
634     VERIFY_IS_EQUAL(single_patch_row_major.data()[i],
635                     tensor_row_major.data()[i]);
636     VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]);
637     VERIFY_IS_EQUAL(single_patch_col_major.data()[i],
638                     single_patch_row_major.data()[i]);
639   }
640 
641   // Entire image patch: ColMajor
642   patchColMajorTensorRange={{sizeDim1, sizeDim2, sizeDim3, sizeDim2*sizeDim3}};
643   Tensor<DataType, 4, DataLayout,IndexType> entire_image_patch_col_major(patchColMajorTensorRange);
644   patchTensorBuffSize =entire_image_patch_col_major.size()*sizeof(DataType);
645   DataType* gpu_data_entire_image_patch_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
646   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_entire_image_patch_col_major(gpu_data_entire_image_patch_col_major, patchColMajorTensorRange);
647   gpu_entire_image_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(3, 5);
648   sycl_device.memcpyDeviceToHost(entire_image_patch_col_major.data(), gpu_data_entire_image_patch_col_major, patchTensorBuffSize);
649 
650   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(0), 2);
651   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(1), 3);
652   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(2), 5);
653   VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(3), 3*5);
654 
655   // Entire image patch: RowMajor
656 patchRowMajorTensorRange={{sizeDim2*sizeDim3, sizeDim3, sizeDim2, sizeDim1}};
657 Tensor<DataType, 4, RowMajor,IndexType> entire_image_patch_row_major(patchRowMajorTensorRange);
658 patchTensorBuffSize =entire_image_patch_row_major.size()*sizeof(DataType);
659 DataType* gpu_data_entire_image_patch_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
660 TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_entire_image_patch_row_major(gpu_data_entire_image_patch_row_major, patchRowMajorTensorRange);
661 gpu_entire_image_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(3, 5);
662 sycl_device.memcpyDeviceToHost(entire_image_patch_row_major.data(), gpu_data_entire_image_patch_row_major, patchTensorBuffSize);
663   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 3*5);
664   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 5);
665   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 3);
666   VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 2);
667 
668   for (IndexType i = 0; i < 3; ++i) {
669     for (IndexType j = 0; j < 5; ++j) {
670       IndexType patchId = i+3*j;
671       for (IndexType r = 0; r < 3; ++r) {
672         for (IndexType c = 0; c < 5; ++c) {
673           for (IndexType d = 0; d < 2; ++d) {
674             DataType expected_col_major = 0.0f;
675             DataType expected_row_major = 0.0f;
676             if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) {
677               expected_col_major = tensor_col_major(d, r-1+i, c-2+j);
678               expected_row_major = tensor_row_major(c-2+j, r-1+i, d);
679             }
680             // ColMajor
681             if (entire_image_patch_col_major(d, r, c, patchId) != expected_col_major) {
682               std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
683             }
684             VERIFY_IS_EQUAL(entire_image_patch_col_major(d, r, c, patchId), expected_col_major);
685             // RowMajor
686             if (entire_image_patch_row_major(patchId, c, r, d) !=
687                 expected_row_major) {
688               std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
689             }
690             VERIFY_IS_EQUAL(entire_image_patch_row_major(patchId, c, r, d),
691                             expected_row_major);
692             // Check that ColMajor and RowMajor agree.
693             VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
694           }
695         }
696       }
697     }
698   }
699 
700   // 2D patch: ColMajor
701   patchColMajorTensorRange={{sizeDim1, 2, 2, sizeDim2*sizeDim3}};
702   Tensor<DataType, 4, DataLayout,IndexType> twod_patch_col_major(patchColMajorTensorRange);
703   patchTensorBuffSize =twod_patch_col_major.size()*sizeof(DataType);
704   DataType* gpu_data_twod_patch_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
705   TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_twod_patch_col_major(gpu_data_twod_patch_col_major, patchColMajorTensorRange);
706   gpu_twod_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(2, 2);
707   sycl_device.memcpyDeviceToHost(twod_patch_col_major.data(), gpu_data_twod_patch_col_major, patchTensorBuffSize);
708 
709   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(0), 2);
710   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(1), 2);
711   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(2), 2);
712   VERIFY_IS_EQUAL(twod_patch_col_major.dimension(3), 3*5);
713 
714   // 2D patch: RowMajor
715   patchRowMajorTensorRange={{sizeDim2*sizeDim3, 2, 2, sizeDim1}};
716   Tensor<DataType, 4, RowMajor,IndexType> twod_patch_row_major(patchRowMajorTensorRange);
717   patchTensorBuffSize =twod_patch_row_major.size()*sizeof(DataType);
718   DataType* gpu_data_twod_patch_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
719   TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_twod_patch_row_major(gpu_data_twod_patch_row_major, patchRowMajorTensorRange);
720   gpu_twod_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(2, 2);
721   sycl_device.memcpyDeviceToHost(twod_patch_row_major.data(), gpu_data_twod_patch_row_major, patchTensorBuffSize);
722   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 3*5);
723   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 2);
724   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2);
725   VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2);
726 
727   // Based on the calculation described in TensorTraits.h, padding happens to be 0.
728   IndexType row_padding = 0;
729   IndexType col_padding = 0;
730   IndexType stride = 1;
731 
732   for (IndexType i = 0; i < 3; ++i) {
733     for (IndexType j = 0; j < 5; ++j) {
734       IndexType patchId = i+3*j;
735       for (IndexType r = 0; r < 2; ++r) {
736         for (IndexType c = 0; c < 2; ++c) {
737           for (IndexType d = 0; d < 2; ++d) {
738             DataType expected_col_major = 0.0f;
739             DataType expected_row_major = 0.0f;
740             IndexType row_offset = r*stride + i - row_padding;
741             IndexType col_offset = c*stride + j - col_padding;
742             // ColMajor
743             if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_col_major.dimension(1) && col_offset < tensor_col_major.dimension(2)) {
744               expected_col_major = tensor_col_major(d, row_offset, col_offset);
745             }
746             if (twod_patch_col_major(d, r, c, patchId) != expected_col_major) {
747               std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
748             }
749             VERIFY_IS_EQUAL(twod_patch_col_major(d, r, c, patchId), expected_col_major);
750             // RowMajor
751             if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(1) && col_offset < tensor_row_major.dimension(0)) {
752               expected_row_major = tensor_row_major(col_offset, row_offset, d);
753             }
754             if (twod_patch_row_major(patchId, c, r, d) != expected_row_major) {
755               std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
756             }
757             VERIFY_IS_EQUAL(twod_patch_row_major(patchId, c, r, d), expected_row_major);
758             // Check that ColMajor and RowMajor agree.
759             VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
760           }
761         }
762       }
763     }
764   }
765 
766   sycl_device.deallocate(gpu_data_col_major);
767   sycl_device.deallocate(gpu_data_row_major);
768   sycl_device.deallocate(gpu_data_single_patch_col_major);
769   sycl_device.deallocate(gpu_data_single_patch_row_major);
770   sycl_device.deallocate(gpu_data_entire_image_patch_col_major);
771   sycl_device.deallocate(gpu_data_entire_image_patch_row_major);
772   sycl_device.deallocate(gpu_data_twod_patch_col_major);
773   sycl_device.deallocate(gpu_data_twod_patch_row_major);
774 }
775 
776 template <typename DataType, typename IndexType>
test_imagenet_patches_sycl(const Eigen::SyclDevice & sycl_device)777 static void test_imagenet_patches_sycl(const Eigen::SyclDevice& sycl_device)
778 {
779   // Test the code on typical configurations used by the 'imagenet' benchmarks at
780   // https://github.com/soumith/convnet-benchmarks
781   // ColMajor
782   IndexType sizeDim1 = 3;
783   IndexType sizeDim2 = 128;
784   IndexType sizeDim3 = 128;
785   IndexType sizeDim4 = 16;
786   array<IndexType, 4> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
787   Tensor<DataType, 4, DataLayout,IndexType> l_in_col_major(tensorColMajorRange);
788   l_in_col_major.setRandom();
789 
790   DataType* gpu_data_l_in_col_major  = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
791   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_l_in_col_major(gpu_data_l_in_col_major, tensorColMajorRange);
792 
793   sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
794 
795   array<IndexType, 5> patchTensorRange={{sizeDim1, 11, 11, sizeDim2*sizeDim3, sizeDim4}};
796   Tensor<DataType, 5, DataLayout,IndexType> l_out_col_major(patchTensorRange);
797   size_t patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
798   DataType* gpu_data_l_out_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
799   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_l_out_col_major(gpu_data_l_out_col_major, patchTensorRange);
800   gpu_l_out_col_major.device(sycl_device)=gpu_l_in_col_major.extract_image_patches(11, 11);
801   sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
802 
803   VERIFY_IS_EQUAL(l_out_col_major.dimension(0), sizeDim1);
804   VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 11);
805   VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 11);
806   VERIFY_IS_EQUAL(l_out_col_major.dimension(3), sizeDim2*sizeDim3);
807   VERIFY_IS_EQUAL(l_out_col_major.dimension(4), sizeDim4);
808 
809   // RowMajor
810   patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 11, 11, sizeDim1}};
811   Tensor<DataType, 5, RowMajor,IndexType> l_out_row_major(patchTensorRange);
812   patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
813   DataType* gpu_data_l_out_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
814   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_l_out_row_major(gpu_data_l_out_row_major, patchTensorRange);
815   gpu_l_out_row_major.device(sycl_device)=gpu_l_in_col_major.swap_layout().extract_image_patches(11, 11);
816   sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
817 
818   VERIFY_IS_EQUAL(l_out_row_major.dimension(0), sizeDim4);
819   VERIFY_IS_EQUAL(l_out_row_major.dimension(1), sizeDim2*sizeDim3);
820   VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 11);
821   VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 11);
822   VERIFY_IS_EQUAL(l_out_row_major.dimension(4), sizeDim1);
823 
824   for (IndexType b = 0; b < 16; ++b) {
825     for (IndexType i = 0; i < 128; ++i) {
826       for (IndexType j = 0; j < 128; ++j) {
827         IndexType patchId = i+128*j;
828         for (IndexType c = 0; c < 11; ++c) {
829           for (IndexType r = 0; r < 11; ++r) {
830             for (IndexType d = 0; d < 3; ++d) {
831               DataType expected = 0.0f;
832               if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) {
833                 expected = l_in_col_major(d, r-5+i, c-5+j, b);
834               }
835               // ColMajor
836               if (l_out_col_major(d, r, c, patchId, b) != expected) {
837                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
838               }
839               VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
840               // RowMajor
841               if (l_out_row_major(b, patchId, c, r, d) !=
842                   expected) {
843                 std::cout << "Mismatch detected at index i=" << i << " j=" << j
844                      << " r=" << r << " c=" << c << " d=" << d << " b=" << b
845                      << std::endl;
846               }
847               VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d),
848                               expected);
849             }
850           }
851         }
852       }
853     }
854   }
855 
856   // ColMajor
857   sycl_device.deallocate(gpu_data_l_in_col_major);
858   sycl_device.deallocate(gpu_data_l_out_col_major);
859   sizeDim1 = 16;
860   sizeDim2 = 64;
861   sizeDim3 = 64;
862   sizeDim4 = 32;
863   tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
864   l_in_col_major.resize(tensorColMajorRange);
865   l_in_col_major.setRandom();
866   gpu_data_l_in_col_major  = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
867   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize1(gpu_data_l_in_col_major, tensorColMajorRange);
868 
869   patchTensorRange={{sizeDim1, 9, 9, sizeDim2*sizeDim3, sizeDim4}};
870   l_out_col_major.resize(patchTensorRange);
871   patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
872   gpu_data_l_out_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
873   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize1(gpu_data_l_out_col_major, patchTensorRange);
874   sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
875   gpu_l_out_col_major_resize1.device(sycl_device)=gpu_l_in_col_major_resize1.extract_image_patches(9, 9);
876   sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
877   VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 16);
878   VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 9);
879   VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 9);
880   VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 64*64);
881   VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32);
882 
883 // RowMajor
884   sycl_device.deallocate(gpu_data_l_out_row_major);
885   patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 9, 9 ,sizeDim1}};
886   l_out_row_major.resize(patchTensorRange);
887   patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
888   gpu_data_l_out_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
889   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize1(gpu_data_l_out_row_major, patchTensorRange);
890   gpu_l_out_row_major_resize1.device(sycl_device)=gpu_l_in_col_major_resize1.swap_layout().extract_image_patches(9, 9);
891   sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
892 
893   VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
894   VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 64*64);
895   VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 9);
896   VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 9);
897   VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 16);
898 
899   for (IndexType b = 0; b < 32; ++b) {
900     for (IndexType i = 0; i < 64; ++i) {
901       for (IndexType j = 0; j < 64; ++j) {
902         IndexType patchId = i+64*j;
903         for (IndexType c = 0; c < 9; ++c) {
904           for (IndexType r = 0; r < 9; ++r) {
905             for (IndexType d = 0; d < 16; ++d) {
906               DataType expected = 0.0f;
907               if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) {
908                 expected = l_in_col_major(d, r-4+i, c-4+j, b);
909               }
910               // ColMajor
911               if (l_out_col_major(d, r, c, patchId, b) != expected) {
912                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
913               }
914               VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
915               // RowMajor
916               if (l_out_row_major(b, patchId, c, r, d) != expected) {
917                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
918               }
919               VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
920             }
921           }
922         }
923       }
924     }
925   }
926 
927   // ColMajor
928 
929   sycl_device.deallocate(gpu_data_l_in_col_major);
930   sycl_device.deallocate(gpu_data_l_out_col_major);
931   sizeDim1 = 32;
932   sizeDim2 = 16;
933   sizeDim3 = 16;
934   sizeDim4 = 32;
935   tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
936   l_in_col_major.resize(tensorColMajorRange);
937   l_in_col_major.setRandom();
938   gpu_data_l_in_col_major  = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
939   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize2(gpu_data_l_in_col_major, tensorColMajorRange);
940 
941   patchTensorRange={{sizeDim1, 7, 7, sizeDim2*sizeDim3, sizeDim4}};
942   l_out_col_major.resize(patchTensorRange);
943   patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
944   gpu_data_l_out_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
945   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize2(gpu_data_l_out_col_major, patchTensorRange);
946   sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
947   gpu_l_out_col_major_resize2.device(sycl_device)=gpu_l_in_col_major_resize2.extract_image_patches(7, 7);
948   sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
949 
950   VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 32);
951   VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 7);
952   VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 7);
953   VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 16*16);
954   VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32);
955 
956   // RowMajor
957   sycl_device.deallocate(gpu_data_l_out_row_major);
958   patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 7, 7 ,sizeDim1}};
959   l_out_row_major.resize(patchTensorRange);
960   patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
961   gpu_data_l_out_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
962   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize2(gpu_data_l_out_row_major, patchTensorRange);
963   gpu_l_out_row_major_resize2.device(sycl_device)=gpu_l_in_col_major_resize2.swap_layout().extract_image_patches(7, 7);
964   sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
965 
966   VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
967   VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 16*16);
968   VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 7);
969   VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 7);
970   VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 32);
971 
972   for (IndexType b = 0; b < 32; ++b) {
973     for (IndexType i = 0; i < 16; ++i) {
974       for (IndexType j = 0; j < 16; ++j) {
975         IndexType patchId = i+16*j;
976         for (IndexType c = 0; c < 7; ++c) {
977           for (IndexType r = 0; r < 7; ++r) {
978             for (IndexType d = 0; d < 32; ++d) {
979               DataType expected = 0.0f;
980               if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) {
981                 expected = l_in_col_major(d, r-3+i, c-3+j, b);
982               }
983               // ColMajor
984               if (l_out_col_major(d, r, c, patchId, b) != expected) {
985                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
986               }
987               VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
988               // RowMajor
989               if (l_out_row_major(b, patchId, c, r, d) != expected) {
990                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
991               }
992               VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
993             }
994           }
995         }
996       }
997     }
998   }
999 
1000   // ColMajor
1001   sycl_device.deallocate(gpu_data_l_in_col_major);
1002   sycl_device.deallocate(gpu_data_l_out_col_major);
1003   sizeDim1 = 64;
1004   sizeDim2 = 13;
1005   sizeDim3 = 13;
1006   sizeDim4 = 32;
1007   tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
1008   l_in_col_major.resize(tensorColMajorRange);
1009   l_in_col_major.setRandom();
1010   gpu_data_l_in_col_major  = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
1011   TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize3(gpu_data_l_in_col_major, tensorColMajorRange);
1012 
1013   patchTensorRange={{sizeDim1, 3, 3, sizeDim2*sizeDim3, sizeDim4}};
1014   l_out_col_major.resize(patchTensorRange);
1015   patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
1016   gpu_data_l_out_col_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
1017   TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize3(gpu_data_l_out_col_major, patchTensorRange);
1018   sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
1019   gpu_l_out_col_major_resize3.device(sycl_device)=gpu_l_in_col_major_resize3.extract_image_patches(3, 3);
1020   sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
1021 
1022   VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 64);
1023   VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 3);
1024   VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 3);
1025   VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 13*13);
1026   VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32);
1027 
1028   // RowMajor
1029   sycl_device.deallocate(gpu_data_l_out_row_major);
1030   patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 3, 3 ,sizeDim1}};
1031   l_out_row_major.resize(patchTensorRange);
1032   patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
1033   gpu_data_l_out_row_major  = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
1034   TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize3(gpu_data_l_out_row_major, patchTensorRange);
1035   gpu_l_out_row_major_resize3.device(sycl_device)=gpu_l_in_col_major_resize3.swap_layout().extract_image_patches(3, 3);
1036   sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
1037 
1038   VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
1039   VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 13*13);
1040   VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 3);
1041   VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 3);
1042   VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 64);
1043 
1044   for (IndexType b = 0; b < 32; ++b) {
1045     for (IndexType i = 0; i < 13; ++i) {
1046       for (IndexType j = 0; j < 13; ++j) {
1047         IndexType patchId = i+13*j;
1048         for (IndexType c = 0; c < 3; ++c) {
1049           for (IndexType r = 0; r < 3; ++r) {
1050             for (IndexType d = 0; d < 64; ++d) {
1051               DataType expected = 0.0f;
1052               if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) {
1053                 expected = l_in_col_major(d, r-1+i, c-1+j, b);
1054               }
1055               // ColMajor
1056               if (l_out_col_major(d, r, c, patchId, b) != expected) {
1057                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
1058               }
1059               VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
1060               // RowMajor
1061               if (l_out_row_major(b, patchId, c, r, d) != expected) {
1062                 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
1063               }
1064               VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
1065             }
1066           }
1067         }
1068       }
1069     }
1070   }
1071   sycl_device.deallocate(gpu_data_l_in_col_major);
1072   sycl_device.deallocate(gpu_data_l_out_col_major);
1073   sycl_device.deallocate(gpu_data_l_out_row_major);
1074 }
1075 
1076 
sycl_tensor_image_patch_test_per_device(dev_Selector s)1077 template<typename DataType, typename dev_Selector> void sycl_tensor_image_patch_test_per_device(dev_Selector s){
1078 QueueInterface queueInterface(s);
1079 auto sycl_device = Eigen::SyclDevice(&queueInterface);
1080 test_simple_image_patch_sycl<DataType, int64_t>(sycl_device);
1081 test_patch_padding_valid_sycl<DataType, int64_t>(sycl_device);
1082 test_patch_padding_valid_same_value_sycl<DataType, int64_t>(sycl_device);
1083 test_patch_padding_same_sycl<DataType, int64_t>(sycl_device);
1084 test_patch_no_extra_dim_sycl<DataType, int64_t>(sycl_device);
1085 test_imagenet_patches_sycl<DataType, int64_t>(sycl_device);
1086 }
EIGEN_DECLARE_TEST(cxx11_tensor_image_patch_sycl)1087 EIGEN_DECLARE_TEST(cxx11_tensor_image_patch_sycl)
1088 {
1089 for (const auto& device :Eigen::get_sycl_supported_devices()) {
1090   CALL_SUBTEST(sycl_tensor_image_patch_test_per_device<float>(device));
1091 }
1092 }
1093