1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19
20 #include "main.h"
21 #include <unsupported/Eigen/CXX11/Tensor>
22
23 using Eigen::Tensor;
24 static const int DataLayout = ColMajor;
25
26 template <typename DataType, typename IndexType>
test_simple_image_patch_sycl(const Eigen::SyclDevice & sycl_device)27 static void test_simple_image_patch_sycl(const Eigen::SyclDevice& sycl_device)
28 {
29 IndexType sizeDim1 = 2;
30 IndexType sizeDim2 = 3;
31 IndexType sizeDim3 = 5;
32 IndexType sizeDim4 = 7;
33 array<IndexType, 4> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
34 array<IndexType, 4> tensorRowMajorRange = {{sizeDim4, sizeDim3, sizeDim2, sizeDim1}};
35 Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
36 Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
37 tensor_col_major.setRandom();
38
39 DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
40 DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
41 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
42 TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
43
44 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
45 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
46 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
47
48 VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
49 VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
50 VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
51 VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
52
53 // Single pixel patch: ColMajor
54 array<IndexType, 5> patchColMajorTensorRange={{sizeDim1, 1, 1, sizeDim2*sizeDim3, sizeDim4}};
55 Tensor<DataType, 5, DataLayout,IndexType> single_patch_col_major(patchColMajorTensorRange);
56 size_t patchTensorBuffSize =single_patch_col_major.size()*sizeof(DataType);
57 DataType* gpu_data_single_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
58 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_single_patch_col_major(gpu_data_single_patch_col_major, patchColMajorTensorRange);
59 gpu_single_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(1, 1);
60 sycl_device.memcpyDeviceToHost(single_patch_col_major.data(), gpu_data_single_patch_col_major, patchTensorBuffSize);
61
62 VERIFY_IS_EQUAL(single_patch_col_major.dimension(0), 2);
63 VERIFY_IS_EQUAL(single_patch_col_major.dimension(1), 1);
64 VERIFY_IS_EQUAL(single_patch_col_major.dimension(2), 1);
65 VERIFY_IS_EQUAL(single_patch_col_major.dimension(3), 3*5);
66 VERIFY_IS_EQUAL(single_patch_col_major.dimension(4), 7);
67
68 // Single pixel patch: RowMajor
69 array<IndexType, 5> patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, 1, 1, sizeDim1}};
70 Tensor<DataType, 5, RowMajor,IndexType> single_patch_row_major(patchRowMajorTensorRange);
71 patchTensorBuffSize =single_patch_row_major.size()*sizeof(DataType);
72 DataType* gpu_data_single_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
73 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_single_patch_row_major(gpu_data_single_patch_row_major, patchRowMajorTensorRange);
74 gpu_single_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(1, 1);
75 sycl_device.memcpyDeviceToHost(single_patch_row_major.data(), gpu_data_single_patch_row_major, patchTensorBuffSize);
76
77 VERIFY_IS_EQUAL(single_patch_row_major.dimension(0), 7);
78 VERIFY_IS_EQUAL(single_patch_row_major.dimension(1), 3*5);
79 VERIFY_IS_EQUAL(single_patch_row_major.dimension(2), 1);
80 VERIFY_IS_EQUAL(single_patch_row_major.dimension(3), 1);
81 VERIFY_IS_EQUAL(single_patch_row_major.dimension(4), 2);
82
83 for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
84 // ColMajor
85 if (tensor_col_major.data()[i] != single_patch_col_major.data()[i]) {
86 std::cout << "Mismatch detected at index colmajor " << i << " : "
87 << tensor_col_major.data()[i] << " vs " << single_patch_col_major.data()[i]
88 << std::endl;
89 }
90 VERIFY_IS_EQUAL(single_patch_col_major.data()[i], tensor_col_major.data()[i]);
91 // RowMajor
92 if (tensor_row_major.data()[i] != single_patch_row_major.data()[i]) {
93 std::cout << "Mismatch detected at index row major" << i << " : "
94 << tensor_row_major.data()[i] << " vs "
95 << single_patch_row_major.data()[i] << std::endl;
96 }
97 VERIFY_IS_EQUAL(single_patch_row_major.data()[i],
98 tensor_row_major.data()[i]);
99 VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]);
100 VERIFY_IS_EQUAL(single_patch_col_major.data()[i],
101 single_patch_row_major.data()[i]);
102 }
103
104
105 // Entire image patch: ColMajor
106 patchColMajorTensorRange={{sizeDim1, sizeDim2, sizeDim3, sizeDim2*sizeDim3, sizeDim4}};
107 Tensor<DataType, 5, DataLayout,IndexType> entire_image_patch_col_major(patchColMajorTensorRange);
108 patchTensorBuffSize =entire_image_patch_col_major.size()*sizeof(DataType);
109 DataType* gpu_data_entire_image_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
110 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_entire_image_patch_col_major(gpu_data_entire_image_patch_col_major, patchColMajorTensorRange);
111 gpu_entire_image_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(3, 5);
112 sycl_device.memcpyDeviceToHost(entire_image_patch_col_major.data(), gpu_data_entire_image_patch_col_major, patchTensorBuffSize);
113
114 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(0), 2);
115 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(1), 3);
116 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(2), 5);
117 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(3), 3*5);
118 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(4), 7);
119
120 // Entire image patch: RowMajor
121 patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, sizeDim3, sizeDim2, sizeDim1}};
122 Tensor<DataType, 5, RowMajor,IndexType> entire_image_patch_row_major(patchRowMajorTensorRange);
123 patchTensorBuffSize =entire_image_patch_row_major.size()*sizeof(DataType);
124 DataType* gpu_data_entire_image_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
125 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_entire_image_patch_row_major(gpu_data_entire_image_patch_row_major, patchRowMajorTensorRange);
126 gpu_entire_image_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(3, 5);
127 sycl_device.memcpyDeviceToHost(entire_image_patch_row_major.data(), gpu_data_entire_image_patch_row_major, patchTensorBuffSize);
128
129 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 7);
130 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 3*5);
131 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 5);
132 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 3);
133 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(4), 2);
134
135 for (IndexType i = 0; i < 3; ++i) {
136 for (IndexType j = 0; j < 5; ++j) {
137 IndexType patchId = i+3*j;
138 for (IndexType r = 0; r < 3; ++r) {
139 for (IndexType c = 0; c < 5; ++c) {
140 for (IndexType d = 0; d < 2; ++d) {
141 for (IndexType b = 0; b < 7; ++b) {
142 DataType expected_col_major = 0.0f;
143 DataType expected_row_major = 0.0f;
144 if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) {
145 expected_col_major = tensor_col_major(d, r-1+i, c-2+j, b);
146 expected_row_major = tensor_row_major(b, c-2+j, r-1+i, d);
147 }
148 // ColMajor
149 if (entire_image_patch_col_major(d, r, c, patchId, b) != expected_col_major) {
150 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
151 }
152 VERIFY_IS_EQUAL(entire_image_patch_col_major(d, r, c, patchId, b), expected_col_major);
153 // RowMajor
154 if (entire_image_patch_row_major(b, patchId, c, r, d) !=
155 expected_row_major) {
156 std::cout << "Mismatch detected at index i=" << i << " j=" << j
157 << " r=" << r << " c=" << c << " d=" << d << " b=" << b
158 << std::endl;
159 }
160 VERIFY_IS_EQUAL(entire_image_patch_row_major(b, patchId, c, r, d),
161 expected_row_major);
162 // Check that ColMajor and RowMajor agree.
163 VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
164 }
165 }
166 }
167 }
168 }
169 }
170
171 // 2D patch: ColMajor
172 patchColMajorTensorRange={{sizeDim1, 2, 2, sizeDim2*sizeDim3, sizeDim4}};
173 Tensor<DataType, 5, DataLayout,IndexType> twod_patch_col_major(patchColMajorTensorRange);
174 patchTensorBuffSize =twod_patch_col_major.size()*sizeof(DataType);
175 DataType* gpu_data_twod_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
176 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_twod_patch_col_major(gpu_data_twod_patch_col_major, patchColMajorTensorRange);
177 gpu_twod_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(2, 2);
178 sycl_device.memcpyDeviceToHost(twod_patch_col_major.data(), gpu_data_twod_patch_col_major, patchTensorBuffSize);
179
180 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(0), 2);
181 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(1), 2);
182 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(2), 2);
183 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(3), 3*5);
184 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(4), 7);
185
186 // 2D patch: RowMajor
187 patchRowMajorTensorRange={{sizeDim4, sizeDim2*sizeDim3, 2, 2, sizeDim1}};
188 Tensor<DataType, 5, RowMajor,IndexType> twod_patch_row_major(patchRowMajorTensorRange);
189 patchTensorBuffSize =twod_patch_row_major.size()*sizeof(DataType);
190 DataType* gpu_data_twod_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
191 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_twod_patch_row_major(gpu_data_twod_patch_row_major, patchRowMajorTensorRange);
192 gpu_twod_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(2, 2);
193 sycl_device.memcpyDeviceToHost(twod_patch_row_major.data(), gpu_data_twod_patch_row_major, patchTensorBuffSize);
194
195 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 7);
196 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 3*5);
197 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2);
198 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2);
199 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(4), 2);
200
201
202 // Based on the calculation described in TensorTraits.h, padding happens to be 0.
203 IndexType row_padding = 0;
204 IndexType col_padding = 0;
205 IndexType stride = 1;
206
207 for (IndexType i = 0; i < 3; ++i) {
208 for (IndexType j = 0; j < 5; ++j) {
209 IndexType patchId = i+3*j;
210 for (IndexType r = 0; r < 2; ++r) {
211 for (IndexType c = 0; c < 2; ++c) {
212 for (IndexType d = 0; d < 2; ++d) {
213 for (IndexType b = 0; b < 7; ++b) {
214 DataType expected_col_major = 0.0f;
215 DataType expected_row_major = 0.0f;
216 IndexType row_offset = r*stride + i - row_padding;
217 IndexType col_offset = c*stride + j - col_padding;
218 // ColMajor
219 if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_col_major.dimension(1) && col_offset < tensor_col_major.dimension(2)) {
220 expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
221 }
222 if (twod_patch_col_major(d, r, c, patchId, b) != expected_col_major) {
223 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
224 }
225 VERIFY_IS_EQUAL(twod_patch_col_major(d, r, c, patchId, b), expected_col_major);
226
227 // RowMajor
228 if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(2) && col_offset < tensor_row_major.dimension(1)) {
229 expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
230
231 }
232 if (twod_patch_row_major(b, patchId, c, r, d) != expected_row_major) {
233 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
234 }
235 VERIFY_IS_EQUAL(twod_patch_row_major(b, patchId, c, r, d), expected_row_major);
236 // Check that ColMajor and RowMajor agree.
237 VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
238 }
239 }
240 }
241 }
242 }
243 }
244
245 sycl_device.deallocate(gpu_data_col_major);
246 sycl_device.deallocate(gpu_data_row_major);
247 sycl_device.deallocate(gpu_data_single_patch_col_major);
248 sycl_device.deallocate(gpu_data_single_patch_row_major);
249 sycl_device.deallocate(gpu_data_entire_image_patch_col_major);
250 sycl_device.deallocate(gpu_data_entire_image_patch_row_major);
251 sycl_device.deallocate(gpu_data_twod_patch_col_major);
252 sycl_device.deallocate(gpu_data_twod_patch_row_major);
253
254 }
255
256
257 // Verifies VALID padding (no padding) with incrementing values.
258 template <typename DataType, typename IndexType>
test_patch_padding_valid_sycl(const Eigen::SyclDevice & sycl_device)259 static void test_patch_padding_valid_sycl(const Eigen::SyclDevice& sycl_device){
260 IndexType input_depth = 3;
261 IndexType input_rows = 3;
262 IndexType input_cols = 3;
263 IndexType input_batches = 1;
264 IndexType ksize = 2; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
265 IndexType stride = 2; // Only same stride is supported.
266
267 array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
268 array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
269 Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
270 Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
271
272 DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
273 DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
274 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
275 TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
276
277 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
278 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
279 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
280
281 VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
282 VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
283 VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
284 VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
285
286 // Initializes tensor with incrementing numbers.
287 for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
288 tensor_col_major.data()[i] = i + 1;
289 }
290 // ColMajor
291 array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 1, input_batches}};
292 Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange);
293 size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType);
294 DataType* gpu_data_result_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
295 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange);
296 gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
297 sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize);
298
299 VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth); // depth
300 VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize); // kernel rows
301 VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize); // kernel cols
302 VERIFY_IS_EQUAL(result_col_major.dimension(3), 1); // number of patches
303 VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches); // number of batches
304
305 // RowMajor
306 array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 1, ksize, ksize, input_depth }};
307 Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange);
308 patchTensorBuffSize =result_row_major.size()*sizeof(DataType);
309 DataType* gpu_data_result_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
310 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange);
311 gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
312 sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize);
313
314 VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4));
315 VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3));
316 VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2));
317 VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1));
318 VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0));
319
320 // No padding is carried out.
321 IndexType row_padding = 0;
322 IndexType col_padding = 0;
323
324 for (IndexType i = 0; (i+stride+ksize-1) < input_rows; i += stride) { // input rows
325 for (IndexType j = 0; (j+stride+ksize-1) < input_cols; j += stride) { // input cols
326 IndexType patchId = i+input_rows*j;
327 for (IndexType r = 0; r < ksize; ++r) { // patch rows
328 for (IndexType c = 0; c < ksize; ++c) { // patch cols
329 for (IndexType d = 0; d < input_depth; ++d) { // depth
330 for (IndexType b = 0; b < input_batches; ++b) { // batch
331 DataType expected_col_major = 0.0f;
332 DataType expected_row_major = 0.0f;
333 IndexType row_offset = r + i - row_padding;
334 IndexType col_offset = c + j - col_padding;
335 if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
336 expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
337 expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
338 }
339 // ColMajor
340 if (result_col_major(d, r, c, patchId, b) != expected_col_major) {
341 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
342 }
343 VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major);
344 // RowMajor
345 if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
346 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
347 }
348 VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
349 // Check that ColMajor and RowMajor agree.
350 VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
351 }
352 }
353 }
354 }
355 }
356 }
357 sycl_device.deallocate(gpu_data_col_major);
358 sycl_device.deallocate(gpu_data_row_major);
359 sycl_device.deallocate(gpu_data_result_col_major);
360 sycl_device.deallocate(gpu_data_result_row_major);
361 }
362
363 // Verifies VALID padding (no padding) with the same value.
364 template <typename DataType, typename IndexType>
test_patch_padding_valid_same_value_sycl(const Eigen::SyclDevice & sycl_device)365 static void test_patch_padding_valid_same_value_sycl(const Eigen::SyclDevice& sycl_device){
366 IndexType input_depth = 1;
367 IndexType input_rows = 5;
368 IndexType input_cols = 5;
369 IndexType input_batches = 2;
370 IndexType ksize = 3; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
371 IndexType stride = 2; // Only same stride is supported.
372 // ColMajor
373
374 array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
375 array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
376 Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
377 Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
378
379 DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
380 DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
381 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
382 TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
383 gpu_col_major.device(sycl_device)=gpu_col_major.constant(11.0f);
384 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
385 sycl_device.memcpyDeviceToHost(tensor_col_major.data(), gpu_data_col_major, (tensor_col_major.size())*sizeof(DataType));
386 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_row_major.size())*sizeof(DataType));
387 VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
388 VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
389 VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
390 VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
391
392 array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 4, input_batches}};
393 Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange);
394 size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType);
395 DataType* gpu_data_result_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
396 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange);
397 gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
398 sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize);
399
400 VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth); // depth
401 VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize); // kernel rows
402 VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize); // kernel cols
403 VERIFY_IS_EQUAL(result_col_major.dimension(3), 4); // number of patches
404 VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches); // number of batches
405
406 // RowMajor
407 array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 4, ksize, ksize, input_depth }};
408 Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange);
409 patchTensorBuffSize =result_row_major.size()*sizeof(DataType);
410 DataType* gpu_data_result_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
411 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange);
412 gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID);
413 sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize);
414
415 VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4));
416 VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3));
417 VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2));
418 VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1));
419 VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0));
420
421 // No padding is carried out.
422 IndexType row_padding = 0;
423 IndexType col_padding = 0;
424
425 for (IndexType i = 0; (i+stride+ksize-1) <= input_rows; i += stride) { // input rows
426 for (IndexType j = 0; (j+stride+ksize-1) <= input_cols; j += stride) { // input cols
427 IndexType patchId = i+input_rows*j;
428 for (IndexType r = 0; r < ksize; ++r) { // patch rows
429 for (IndexType c = 0; c < ksize; ++c) { // patch cols
430 for (IndexType d = 0; d < input_depth; ++d) { // depth
431 for (IndexType b = 0; b < input_batches; ++b) { // batch
432 DataType expected_col_major = 0.0f;
433 DataType expected_row_major = 0.0f;
434 IndexType row_offset = r + i - row_padding;
435 IndexType col_offset = c + j - col_padding;
436 if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
437 expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
438 expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
439 }
440 // ColMajor
441 if (result_col_major(d, r, c, patchId, b) != expected_col_major) {
442 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
443 }
444 VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major);
445 // RowMajor
446 if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
447 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
448 }
449 VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
450 // Check that ColMajor and RowMajor agree.
451 VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
452 }
453 }
454 }
455 }
456 }
457 }
458 }
459
460 // Verifies SAME padding.
461 template <typename DataType, typename IndexType>
test_patch_padding_same_sycl(const Eigen::SyclDevice & sycl_device)462 static void test_patch_padding_same_sycl(const Eigen::SyclDevice& sycl_device){
463 IndexType input_depth = 3;
464 IndexType input_rows = 4;
465 IndexType input_cols = 2;
466 IndexType input_batches = 1;
467 IndexType ksize = 2; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>.
468 IndexType stride = 2; // Only same stride is supported.
469
470 // ColMajor
471 array<IndexType, 4> tensorColMajorRange = {{input_depth, input_rows, input_cols, input_batches}};
472 array<IndexType, 4> tensorRowMajorRange = {{input_batches, input_cols, input_rows, input_depth}};
473 Tensor<DataType, 4, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
474 Tensor<DataType, 4, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
475
476 DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
477 DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
478 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
479 TensorMap<Tensor<DataType, 4, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
480
481 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
482 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
483 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
484
485 VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(3));
486 VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(2));
487 VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(1));
488 VERIFY_IS_EQUAL(tensor_col_major.dimension(3), tensor_row_major.dimension(0));
489
490 // Initializes tensor with incrementing numbers.
491 for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
492 tensor_col_major.data()[i] = i + 1;
493 }
494
495 array<IndexType, 5> patchColMajorTensorRange={{input_depth, ksize, ksize, 2, input_batches}};
496 Tensor<DataType, 5, DataLayout,IndexType> result_col_major(patchColMajorTensorRange);
497 size_t patchTensorBuffSize =result_col_major.size()*sizeof(DataType);
498 DataType* gpu_data_result_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
499 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_result_col_major(gpu_data_result_col_major, patchColMajorTensorRange);
500 gpu_result_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME);
501 sycl_device.memcpyDeviceToHost(result_col_major.data(), gpu_data_result_col_major, patchTensorBuffSize);
502
503
504 VERIFY_IS_EQUAL(result_col_major.dimension(0), input_depth); // depth
505 VERIFY_IS_EQUAL(result_col_major.dimension(1), ksize); // kernel rows
506 VERIFY_IS_EQUAL(result_col_major.dimension(2), ksize); // kernel cols
507 VERIFY_IS_EQUAL(result_col_major.dimension(3), 2); // number of patches
508 VERIFY_IS_EQUAL(result_col_major.dimension(4), input_batches); // number of batches
509
510 // RowMajor
511
512 array<IndexType, 5> patchRowMajorTensorRange={{input_batches, 2, ksize, ksize, input_depth }};
513 Tensor<DataType, 5, RowMajor,IndexType> result_row_major(patchRowMajorTensorRange);
514 patchTensorBuffSize =result_row_major.size()*sizeof(DataType);
515 DataType* gpu_data_result_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
516 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_result_row_major(gpu_data_result_row_major, patchRowMajorTensorRange);
517 gpu_result_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME);
518 sycl_device.memcpyDeviceToHost(result_row_major.data(), gpu_data_result_row_major, patchTensorBuffSize);
519
520 VERIFY_IS_EQUAL(result_col_major.dimension(0), result_row_major.dimension(4));
521 VERIFY_IS_EQUAL(result_col_major.dimension(1), result_row_major.dimension(3));
522 VERIFY_IS_EQUAL(result_col_major.dimension(2), result_row_major.dimension(2));
523 VERIFY_IS_EQUAL(result_col_major.dimension(3), result_row_major.dimension(1));
524 VERIFY_IS_EQUAL(result_col_major.dimension(4), result_row_major.dimension(0));
525
526 // Based on the calculation described in TensorTraits.h, padding happens to be 0.
527 IndexType row_padding = 0;
528 IndexType col_padding = 0;
529
530 for (IndexType i = 0; (i+stride+ksize-1) <= input_rows; i += stride) { // input rows
531 for (IndexType j = 0; (j+stride+ksize-1) <= input_cols; j += stride) { // input cols
532 IndexType patchId = i+input_rows*j;
533 for (IndexType r = 0; r < ksize; ++r) { // patch rows
534 for (IndexType c = 0; c < ksize; ++c) { // patch cols
535 for (IndexType d = 0; d < input_depth; ++d) { // depth
536 for (IndexType b = 0; b < input_batches; ++b) { // batch
537 DataType expected_col_major = 0.0f;
538 DataType expected_row_major = 0.0f;
539 IndexType row_offset = r*stride + i - row_padding;
540 IndexType col_offset = c*stride + j - col_padding;
541 if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) {
542 expected_col_major = tensor_col_major(d, row_offset, col_offset, b);
543 expected_row_major = tensor_row_major(b, col_offset, row_offset, d);
544 }
545 // ColMajor
546 if (result_col_major(d, r, c, patchId, b) != expected_col_major) {
547 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
548 }
549 VERIFY_IS_EQUAL(result_col_major(d, r, c, patchId, b), expected_col_major);
550 // RowMajor
551 if (result_row_major(b, patchId, c, r, d) != expected_row_major) {
552 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
553 }
554 VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major);
555 // Check that ColMajor and RowMajor agree.
556 VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
557 }
558 }
559 }
560 }
561 }
562 }
563 }
564
565
566 template <typename DataType, typename IndexType>
test_patch_no_extra_dim_sycl(const Eigen::SyclDevice & sycl_device)567 static void test_patch_no_extra_dim_sycl(const Eigen::SyclDevice& sycl_device){
568
569 IndexType sizeDim1 = 2;
570 IndexType sizeDim2 = 3;
571 IndexType sizeDim3 = 5;
572
573 // ColMajor
574 array<IndexType, 3> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3}};
575 array<IndexType, 3> tensorRowMajorRange = {{sizeDim3, sizeDim2, sizeDim1}};
576 Tensor<DataType, 3, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
577 tensor_col_major.setRandom();
578 Tensor<DataType, 3, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
579
580 DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
581 DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
582 TensorMap<Tensor<DataType, 3, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
583 TensorMap<Tensor<DataType, 3, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
584
585 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
586 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
587 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_row_major.size())*sizeof(DataType));
588
589 VERIFY_IS_EQUAL(tensor_col_major.dimension(0), tensor_row_major.dimension(2));
590 VERIFY_IS_EQUAL(tensor_col_major.dimension(1), tensor_row_major.dimension(1));
591 VERIFY_IS_EQUAL(tensor_col_major.dimension(2), tensor_row_major.dimension(0));
592
593
594 // Single pixel patch: ColMajor
595 array<IndexType, 4> patchColMajorTensorRange={{sizeDim1, 1, 1, sizeDim2*sizeDim3}};
596 Tensor<DataType, 4, DataLayout,IndexType> single_patch_col_major(patchColMajorTensorRange);
597 size_t patchTensorBuffSize =single_patch_col_major.size()*sizeof(DataType);
598 DataType* gpu_data_single_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
599 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_single_patch_col_major(gpu_data_single_patch_col_major, patchColMajorTensorRange);
600 gpu_single_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(1, 1);
601 sycl_device.memcpyDeviceToHost(single_patch_col_major.data(), gpu_data_single_patch_col_major, patchTensorBuffSize);
602
603 VERIFY_IS_EQUAL(single_patch_col_major.dimension(0), sizeDim1);
604 VERIFY_IS_EQUAL(single_patch_col_major.dimension(1), 1);
605 VERIFY_IS_EQUAL(single_patch_col_major.dimension(2), 1);
606 VERIFY_IS_EQUAL(single_patch_col_major.dimension(3), sizeDim2*sizeDim3);
607
608 // Single pixel patch: RowMajor
609 array<IndexType, 4> patchRowMajorTensorRange={{sizeDim2*sizeDim3, 1, 1, sizeDim1}};
610 Tensor<DataType, 4, RowMajor,IndexType> single_patch_row_major(patchRowMajorTensorRange);
611 patchTensorBuffSize =single_patch_row_major.size()*sizeof(DataType);
612 DataType* gpu_data_single_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
613 TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_single_patch_row_major(gpu_data_single_patch_row_major, patchRowMajorTensorRange);
614 gpu_single_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(1, 1);
615 sycl_device.memcpyDeviceToHost(single_patch_row_major.data(), gpu_data_single_patch_row_major, patchTensorBuffSize);
616
617 VERIFY_IS_EQUAL(single_patch_row_major.dimension(0), sizeDim2*sizeDim3);
618 VERIFY_IS_EQUAL(single_patch_row_major.dimension(1), 1);
619 VERIFY_IS_EQUAL(single_patch_row_major.dimension(2), 1);
620 VERIFY_IS_EQUAL(single_patch_row_major.dimension(3), sizeDim1);
621
622 for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
623 // ColMajor
624 if (tensor_col_major.data()[i] != single_patch_col_major.data()[i]) {
625 std::cout << "Mismatch detected at index " << i << " : " << tensor_col_major.data()[i] << " vs " << single_patch_col_major.data()[i] << std::endl;
626 }
627 VERIFY_IS_EQUAL(single_patch_col_major.data()[i], tensor_col_major.data()[i]);
628 // RowMajor
629 if (tensor_row_major.data()[i] != single_patch_row_major.data()[i]) {
630 std::cout << "Mismatch detected at index " << i << " : "
631 << tensor_col_major.data()[i] << " vs "
632 << single_patch_row_major.data()[i] << std::endl;
633 }
634 VERIFY_IS_EQUAL(single_patch_row_major.data()[i],
635 tensor_row_major.data()[i]);
636 VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]);
637 VERIFY_IS_EQUAL(single_patch_col_major.data()[i],
638 single_patch_row_major.data()[i]);
639 }
640
641 // Entire image patch: ColMajor
642 patchColMajorTensorRange={{sizeDim1, sizeDim2, sizeDim3, sizeDim2*sizeDim3}};
643 Tensor<DataType, 4, DataLayout,IndexType> entire_image_patch_col_major(patchColMajorTensorRange);
644 patchTensorBuffSize =entire_image_patch_col_major.size()*sizeof(DataType);
645 DataType* gpu_data_entire_image_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
646 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_entire_image_patch_col_major(gpu_data_entire_image_patch_col_major, patchColMajorTensorRange);
647 gpu_entire_image_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(3, 5);
648 sycl_device.memcpyDeviceToHost(entire_image_patch_col_major.data(), gpu_data_entire_image_patch_col_major, patchTensorBuffSize);
649
650 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(0), 2);
651 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(1), 3);
652 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(2), 5);
653 VERIFY_IS_EQUAL(entire_image_patch_col_major.dimension(3), 3*5);
654
655 // Entire image patch: RowMajor
656 patchRowMajorTensorRange={{sizeDim2*sizeDim3, sizeDim3, sizeDim2, sizeDim1}};
657 Tensor<DataType, 4, RowMajor,IndexType> entire_image_patch_row_major(patchRowMajorTensorRange);
658 patchTensorBuffSize =entire_image_patch_row_major.size()*sizeof(DataType);
659 DataType* gpu_data_entire_image_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
660 TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_entire_image_patch_row_major(gpu_data_entire_image_patch_row_major, patchRowMajorTensorRange);
661 gpu_entire_image_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(3, 5);
662 sycl_device.memcpyDeviceToHost(entire_image_patch_row_major.data(), gpu_data_entire_image_patch_row_major, patchTensorBuffSize);
663 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 3*5);
664 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 5);
665 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 3);
666 VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 2);
667
668 for (IndexType i = 0; i < 3; ++i) {
669 for (IndexType j = 0; j < 5; ++j) {
670 IndexType patchId = i+3*j;
671 for (IndexType r = 0; r < 3; ++r) {
672 for (IndexType c = 0; c < 5; ++c) {
673 for (IndexType d = 0; d < 2; ++d) {
674 DataType expected_col_major = 0.0f;
675 DataType expected_row_major = 0.0f;
676 if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) {
677 expected_col_major = tensor_col_major(d, r-1+i, c-2+j);
678 expected_row_major = tensor_row_major(c-2+j, r-1+i, d);
679 }
680 // ColMajor
681 if (entire_image_patch_col_major(d, r, c, patchId) != expected_col_major) {
682 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
683 }
684 VERIFY_IS_EQUAL(entire_image_patch_col_major(d, r, c, patchId), expected_col_major);
685 // RowMajor
686 if (entire_image_patch_row_major(patchId, c, r, d) !=
687 expected_row_major) {
688 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
689 }
690 VERIFY_IS_EQUAL(entire_image_patch_row_major(patchId, c, r, d),
691 expected_row_major);
692 // Check that ColMajor and RowMajor agree.
693 VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
694 }
695 }
696 }
697 }
698 }
699
700 // 2D patch: ColMajor
701 patchColMajorTensorRange={{sizeDim1, 2, 2, sizeDim2*sizeDim3}};
702 Tensor<DataType, 4, DataLayout,IndexType> twod_patch_col_major(patchColMajorTensorRange);
703 patchTensorBuffSize =twod_patch_col_major.size()*sizeof(DataType);
704 DataType* gpu_data_twod_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
705 TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_twod_patch_col_major(gpu_data_twod_patch_col_major, patchColMajorTensorRange);
706 gpu_twod_patch_col_major.device(sycl_device)=gpu_col_major.extract_image_patches(2, 2);
707 sycl_device.memcpyDeviceToHost(twod_patch_col_major.data(), gpu_data_twod_patch_col_major, patchTensorBuffSize);
708
709 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(0), 2);
710 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(1), 2);
711 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(2), 2);
712 VERIFY_IS_EQUAL(twod_patch_col_major.dimension(3), 3*5);
713
714 // 2D patch: RowMajor
715 patchRowMajorTensorRange={{sizeDim2*sizeDim3, 2, 2, sizeDim1}};
716 Tensor<DataType, 4, RowMajor,IndexType> twod_patch_row_major(patchRowMajorTensorRange);
717 patchTensorBuffSize =twod_patch_row_major.size()*sizeof(DataType);
718 DataType* gpu_data_twod_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
719 TensorMap<Tensor<DataType, 4, RowMajor,IndexType>> gpu_twod_patch_row_major(gpu_data_twod_patch_row_major, patchRowMajorTensorRange);
720 gpu_twod_patch_row_major.device(sycl_device)=gpu_row_major.extract_image_patches(2, 2);
721 sycl_device.memcpyDeviceToHost(twod_patch_row_major.data(), gpu_data_twod_patch_row_major, patchTensorBuffSize);
722 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 3*5);
723 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 2);
724 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2);
725 VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2);
726
727 // Based on the calculation described in TensorTraits.h, padding happens to be 0.
728 IndexType row_padding = 0;
729 IndexType col_padding = 0;
730 IndexType stride = 1;
731
732 for (IndexType i = 0; i < 3; ++i) {
733 for (IndexType j = 0; j < 5; ++j) {
734 IndexType patchId = i+3*j;
735 for (IndexType r = 0; r < 2; ++r) {
736 for (IndexType c = 0; c < 2; ++c) {
737 for (IndexType d = 0; d < 2; ++d) {
738 DataType expected_col_major = 0.0f;
739 DataType expected_row_major = 0.0f;
740 IndexType row_offset = r*stride + i - row_padding;
741 IndexType col_offset = c*stride + j - col_padding;
742 // ColMajor
743 if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_col_major.dimension(1) && col_offset < tensor_col_major.dimension(2)) {
744 expected_col_major = tensor_col_major(d, row_offset, col_offset);
745 }
746 if (twod_patch_col_major(d, r, c, patchId) != expected_col_major) {
747 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
748 }
749 VERIFY_IS_EQUAL(twod_patch_col_major(d, r, c, patchId), expected_col_major);
750 // RowMajor
751 if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(1) && col_offset < tensor_row_major.dimension(0)) {
752 expected_row_major = tensor_row_major(col_offset, row_offset, d);
753 }
754 if (twod_patch_row_major(patchId, c, r, d) != expected_row_major) {
755 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl;
756 }
757 VERIFY_IS_EQUAL(twod_patch_row_major(patchId, c, r, d), expected_row_major);
758 // Check that ColMajor and RowMajor agree.
759 VERIFY_IS_EQUAL(expected_col_major, expected_row_major);
760 }
761 }
762 }
763 }
764 }
765
766 sycl_device.deallocate(gpu_data_col_major);
767 sycl_device.deallocate(gpu_data_row_major);
768 sycl_device.deallocate(gpu_data_single_patch_col_major);
769 sycl_device.deallocate(gpu_data_single_patch_row_major);
770 sycl_device.deallocate(gpu_data_entire_image_patch_col_major);
771 sycl_device.deallocate(gpu_data_entire_image_patch_row_major);
772 sycl_device.deallocate(gpu_data_twod_patch_col_major);
773 sycl_device.deallocate(gpu_data_twod_patch_row_major);
774 }
775
776 template <typename DataType, typename IndexType>
test_imagenet_patches_sycl(const Eigen::SyclDevice & sycl_device)777 static void test_imagenet_patches_sycl(const Eigen::SyclDevice& sycl_device)
778 {
779 // Test the code on typical configurations used by the 'imagenet' benchmarks at
780 // https://github.com/soumith/convnet-benchmarks
781 // ColMajor
782 IndexType sizeDim1 = 3;
783 IndexType sizeDim2 = 128;
784 IndexType sizeDim3 = 128;
785 IndexType sizeDim4 = 16;
786 array<IndexType, 4> tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
787 Tensor<DataType, 4, DataLayout,IndexType> l_in_col_major(tensorColMajorRange);
788 l_in_col_major.setRandom();
789
790 DataType* gpu_data_l_in_col_major = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
791 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>> gpu_l_in_col_major(gpu_data_l_in_col_major, tensorColMajorRange);
792
793 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
794
795 array<IndexType, 5> patchTensorRange={{sizeDim1, 11, 11, sizeDim2*sizeDim3, sizeDim4}};
796 Tensor<DataType, 5, DataLayout,IndexType> l_out_col_major(patchTensorRange);
797 size_t patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
798 DataType* gpu_data_l_out_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
799 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_l_out_col_major(gpu_data_l_out_col_major, patchTensorRange);
800 gpu_l_out_col_major.device(sycl_device)=gpu_l_in_col_major.extract_image_patches(11, 11);
801 sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
802
803 VERIFY_IS_EQUAL(l_out_col_major.dimension(0), sizeDim1);
804 VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 11);
805 VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 11);
806 VERIFY_IS_EQUAL(l_out_col_major.dimension(3), sizeDim2*sizeDim3);
807 VERIFY_IS_EQUAL(l_out_col_major.dimension(4), sizeDim4);
808
809 // RowMajor
810 patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 11, 11, sizeDim1}};
811 Tensor<DataType, 5, RowMajor,IndexType> l_out_row_major(patchTensorRange);
812 patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
813 DataType* gpu_data_l_out_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
814 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>> gpu_l_out_row_major(gpu_data_l_out_row_major, patchTensorRange);
815 gpu_l_out_row_major.device(sycl_device)=gpu_l_in_col_major.swap_layout().extract_image_patches(11, 11);
816 sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
817
818 VERIFY_IS_EQUAL(l_out_row_major.dimension(0), sizeDim4);
819 VERIFY_IS_EQUAL(l_out_row_major.dimension(1), sizeDim2*sizeDim3);
820 VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 11);
821 VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 11);
822 VERIFY_IS_EQUAL(l_out_row_major.dimension(4), sizeDim1);
823
824 for (IndexType b = 0; b < 16; ++b) {
825 for (IndexType i = 0; i < 128; ++i) {
826 for (IndexType j = 0; j < 128; ++j) {
827 IndexType patchId = i+128*j;
828 for (IndexType c = 0; c < 11; ++c) {
829 for (IndexType r = 0; r < 11; ++r) {
830 for (IndexType d = 0; d < 3; ++d) {
831 DataType expected = 0.0f;
832 if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) {
833 expected = l_in_col_major(d, r-5+i, c-5+j, b);
834 }
835 // ColMajor
836 if (l_out_col_major(d, r, c, patchId, b) != expected) {
837 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
838 }
839 VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
840 // RowMajor
841 if (l_out_row_major(b, patchId, c, r, d) !=
842 expected) {
843 std::cout << "Mismatch detected at index i=" << i << " j=" << j
844 << " r=" << r << " c=" << c << " d=" << d << " b=" << b
845 << std::endl;
846 }
847 VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d),
848 expected);
849 }
850 }
851 }
852 }
853 }
854 }
855
856 // ColMajor
857 sycl_device.deallocate(gpu_data_l_in_col_major);
858 sycl_device.deallocate(gpu_data_l_out_col_major);
859 sizeDim1 = 16;
860 sizeDim2 = 64;
861 sizeDim3 = 64;
862 sizeDim4 = 32;
863 tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
864 l_in_col_major.resize(tensorColMajorRange);
865 l_in_col_major.setRandom();
866 gpu_data_l_in_col_major = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
867 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize1(gpu_data_l_in_col_major, tensorColMajorRange);
868
869 patchTensorRange={{sizeDim1, 9, 9, sizeDim2*sizeDim3, sizeDim4}};
870 l_out_col_major.resize(patchTensorRange);
871 patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
872 gpu_data_l_out_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
873 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize1(gpu_data_l_out_col_major, patchTensorRange);
874 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
875 gpu_l_out_col_major_resize1.device(sycl_device)=gpu_l_in_col_major_resize1.extract_image_patches(9, 9);
876 sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
877 VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 16);
878 VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 9);
879 VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 9);
880 VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 64*64);
881 VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32);
882
883 // RowMajor
884 sycl_device.deallocate(gpu_data_l_out_row_major);
885 patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 9, 9 ,sizeDim1}};
886 l_out_row_major.resize(patchTensorRange);
887 patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
888 gpu_data_l_out_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
889 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize1(gpu_data_l_out_row_major, patchTensorRange);
890 gpu_l_out_row_major_resize1.device(sycl_device)=gpu_l_in_col_major_resize1.swap_layout().extract_image_patches(9, 9);
891 sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
892
893 VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
894 VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 64*64);
895 VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 9);
896 VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 9);
897 VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 16);
898
899 for (IndexType b = 0; b < 32; ++b) {
900 for (IndexType i = 0; i < 64; ++i) {
901 for (IndexType j = 0; j < 64; ++j) {
902 IndexType patchId = i+64*j;
903 for (IndexType c = 0; c < 9; ++c) {
904 for (IndexType r = 0; r < 9; ++r) {
905 for (IndexType d = 0; d < 16; ++d) {
906 DataType expected = 0.0f;
907 if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) {
908 expected = l_in_col_major(d, r-4+i, c-4+j, b);
909 }
910 // ColMajor
911 if (l_out_col_major(d, r, c, patchId, b) != expected) {
912 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
913 }
914 VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
915 // RowMajor
916 if (l_out_row_major(b, patchId, c, r, d) != expected) {
917 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
918 }
919 VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
920 }
921 }
922 }
923 }
924 }
925 }
926
927 // ColMajor
928
929 sycl_device.deallocate(gpu_data_l_in_col_major);
930 sycl_device.deallocate(gpu_data_l_out_col_major);
931 sizeDim1 = 32;
932 sizeDim2 = 16;
933 sizeDim3 = 16;
934 sizeDim4 = 32;
935 tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
936 l_in_col_major.resize(tensorColMajorRange);
937 l_in_col_major.setRandom();
938 gpu_data_l_in_col_major = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
939 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize2(gpu_data_l_in_col_major, tensorColMajorRange);
940
941 patchTensorRange={{sizeDim1, 7, 7, sizeDim2*sizeDim3, sizeDim4}};
942 l_out_col_major.resize(patchTensorRange);
943 patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
944 gpu_data_l_out_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
945 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize2(gpu_data_l_out_col_major, patchTensorRange);
946 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
947 gpu_l_out_col_major_resize2.device(sycl_device)=gpu_l_in_col_major_resize2.extract_image_patches(7, 7);
948 sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
949
950 VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 32);
951 VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 7);
952 VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 7);
953 VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 16*16);
954 VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32);
955
956 // RowMajor
957 sycl_device.deallocate(gpu_data_l_out_row_major);
958 patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 7, 7 ,sizeDim1}};
959 l_out_row_major.resize(patchTensorRange);
960 patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
961 gpu_data_l_out_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
962 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize2(gpu_data_l_out_row_major, patchTensorRange);
963 gpu_l_out_row_major_resize2.device(sycl_device)=gpu_l_in_col_major_resize2.swap_layout().extract_image_patches(7, 7);
964 sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
965
966 VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
967 VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 16*16);
968 VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 7);
969 VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 7);
970 VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 32);
971
972 for (IndexType b = 0; b < 32; ++b) {
973 for (IndexType i = 0; i < 16; ++i) {
974 for (IndexType j = 0; j < 16; ++j) {
975 IndexType patchId = i+16*j;
976 for (IndexType c = 0; c < 7; ++c) {
977 for (IndexType r = 0; r < 7; ++r) {
978 for (IndexType d = 0; d < 32; ++d) {
979 DataType expected = 0.0f;
980 if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) {
981 expected = l_in_col_major(d, r-3+i, c-3+j, b);
982 }
983 // ColMajor
984 if (l_out_col_major(d, r, c, patchId, b) != expected) {
985 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
986 }
987 VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
988 // RowMajor
989 if (l_out_row_major(b, patchId, c, r, d) != expected) {
990 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
991 }
992 VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
993 }
994 }
995 }
996 }
997 }
998 }
999
1000 // ColMajor
1001 sycl_device.deallocate(gpu_data_l_in_col_major);
1002 sycl_device.deallocate(gpu_data_l_out_col_major);
1003 sizeDim1 = 64;
1004 sizeDim2 = 13;
1005 sizeDim3 = 13;
1006 sizeDim4 = 32;
1007 tensorColMajorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
1008 l_in_col_major.resize(tensorColMajorRange);
1009 l_in_col_major.setRandom();
1010 gpu_data_l_in_col_major = static_cast<DataType*>(sycl_device.allocate(l_in_col_major.size()*sizeof(DataType)));
1011 TensorMap<Tensor<DataType, 4, ColMajor, IndexType>>gpu_l_in_col_major_resize3(gpu_data_l_in_col_major, tensorColMajorRange);
1012
1013 patchTensorRange={{sizeDim1, 3, 3, sizeDim2*sizeDim3, sizeDim4}};
1014 l_out_col_major.resize(patchTensorRange);
1015 patchTensorBuffSize =l_out_col_major.size()*sizeof(DataType);
1016 gpu_data_l_out_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
1017 TensorMap<Tensor<DataType, 5, DataLayout,IndexType>>gpu_l_out_col_major_resize3(gpu_data_l_out_col_major, patchTensorRange);
1018 sycl_device.memcpyHostToDevice(gpu_data_l_in_col_major, l_in_col_major.data(),(l_in_col_major.size())*sizeof(DataType));
1019 gpu_l_out_col_major_resize3.device(sycl_device)=gpu_l_in_col_major_resize3.extract_image_patches(3, 3);
1020 sycl_device.memcpyDeviceToHost(l_out_col_major.data(), gpu_data_l_out_col_major, patchTensorBuffSize);
1021
1022 VERIFY_IS_EQUAL(l_out_col_major.dimension(0), 64);
1023 VERIFY_IS_EQUAL(l_out_col_major.dimension(1), 3);
1024 VERIFY_IS_EQUAL(l_out_col_major.dimension(2), 3);
1025 VERIFY_IS_EQUAL(l_out_col_major.dimension(3), 13*13);
1026 VERIFY_IS_EQUAL(l_out_col_major.dimension(4), 32);
1027
1028 // RowMajor
1029 sycl_device.deallocate(gpu_data_l_out_row_major);
1030 patchTensorRange={{sizeDim4, sizeDim2*sizeDim3, 3, 3 ,sizeDim1}};
1031 l_out_row_major.resize(patchTensorRange);
1032 patchTensorBuffSize =l_out_row_major.size()*sizeof(DataType);
1033 gpu_data_l_out_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
1034 TensorMap<Tensor<DataType, 5, RowMajor,IndexType>>gpu_l_out_row_major_resize3(gpu_data_l_out_row_major, patchTensorRange);
1035 gpu_l_out_row_major_resize3.device(sycl_device)=gpu_l_in_col_major_resize3.swap_layout().extract_image_patches(3, 3);
1036 sycl_device.memcpyDeviceToHost(l_out_row_major.data(), gpu_data_l_out_row_major, patchTensorBuffSize);
1037
1038 VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32);
1039 VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 13*13);
1040 VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 3);
1041 VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 3);
1042 VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 64);
1043
1044 for (IndexType b = 0; b < 32; ++b) {
1045 for (IndexType i = 0; i < 13; ++i) {
1046 for (IndexType j = 0; j < 13; ++j) {
1047 IndexType patchId = i+13*j;
1048 for (IndexType c = 0; c < 3; ++c) {
1049 for (IndexType r = 0; r < 3; ++r) {
1050 for (IndexType d = 0; d < 64; ++d) {
1051 DataType expected = 0.0f;
1052 if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) {
1053 expected = l_in_col_major(d, r-1+i, c-1+j, b);
1054 }
1055 // ColMajor
1056 if (l_out_col_major(d, r, c, patchId, b) != expected) {
1057 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
1058 }
1059 VERIFY_IS_EQUAL(l_out_col_major(d, r, c, patchId, b), expected);
1060 // RowMajor
1061 if (l_out_row_major(b, patchId, c, r, d) != expected) {
1062 std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl;
1063 }
1064 VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected);
1065 }
1066 }
1067 }
1068 }
1069 }
1070 }
1071 sycl_device.deallocate(gpu_data_l_in_col_major);
1072 sycl_device.deallocate(gpu_data_l_out_col_major);
1073 sycl_device.deallocate(gpu_data_l_out_row_major);
1074 }
1075
1076
sycl_tensor_image_patch_test_per_device(dev_Selector s)1077 template<typename DataType, typename dev_Selector> void sycl_tensor_image_patch_test_per_device(dev_Selector s){
1078 QueueInterface queueInterface(s);
1079 auto sycl_device = Eigen::SyclDevice(&queueInterface);
1080 test_simple_image_patch_sycl<DataType, int64_t>(sycl_device);
1081 test_patch_padding_valid_sycl<DataType, int64_t>(sycl_device);
1082 test_patch_padding_valid_same_value_sycl<DataType, int64_t>(sycl_device);
1083 test_patch_padding_same_sycl<DataType, int64_t>(sycl_device);
1084 test_patch_no_extra_dim_sycl<DataType, int64_t>(sycl_device);
1085 test_imagenet_patches_sycl<DataType, int64_t>(sycl_device);
1086 }
EIGEN_DECLARE_TEST(cxx11_tensor_image_patch_sycl)1087 EIGEN_DECLARE_TEST(cxx11_tensor_image_patch_sycl)
1088 {
1089 for (const auto& device :Eigen::get_sycl_supported_devices()) {
1090 CALL_SUBTEST(sycl_tensor_image_patch_test_per_device<float>(device));
1091 }
1092 }
1093