1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19
20 #include "main.h"
21 #include <unsupported/Eigen/CXX11/Tensor>
22
23 using Eigen::Tensor;
24 static const int DataLayout = ColMajor;
25
26 template <typename DataType, typename IndexType>
test_single_voxel_patch_sycl(const Eigen::SyclDevice & sycl_device)27 static void test_single_voxel_patch_sycl(const Eigen::SyclDevice& sycl_device)
28 {
29
30 IndexType sizeDim0 = 4;
31 IndexType sizeDim1 = 2;
32 IndexType sizeDim2 = 3;
33 IndexType sizeDim3 = 5;
34 IndexType sizeDim4 = 7;
35 array<IndexType, 5> tensorColMajorRange = {{sizeDim0, sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
36 array<IndexType, 5> tensorRowMajorRange = {{sizeDim4, sizeDim3, sizeDim2, sizeDim1, sizeDim0}};
37 Tensor<DataType, 5, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
38 Tensor<DataType, 5, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
39 tensor_col_major.setRandom();
40
41
42 DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
43 DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
44 TensorMap<Tensor<DataType, 5, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
45 TensorMap<Tensor<DataType, 5, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
46
47 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
48 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
49
50
51 // single volume patch: ColMajor
52 array<IndexType, 6> patchColMajorTensorRange={{sizeDim0,1, 1, 1, sizeDim1*sizeDim2*sizeDim3, sizeDim4}};
53 Tensor<DataType, 6, DataLayout,IndexType> single_voxel_patch_col_major(patchColMajorTensorRange);
54 size_t patchTensorBuffSize =single_voxel_patch_col_major.size()*sizeof(DataType);
55 DataType* gpu_data_single_voxel_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
56 TensorMap<Tensor<DataType, 6, DataLayout,IndexType>> gpu_single_voxel_patch_col_major(gpu_data_single_voxel_patch_col_major, patchColMajorTensorRange);
57 gpu_single_voxel_patch_col_major.device(sycl_device)=gpu_col_major.extract_volume_patches(1, 1, 1);
58 sycl_device.memcpyDeviceToHost(single_voxel_patch_col_major.data(), gpu_data_single_voxel_patch_col_major, patchTensorBuffSize);
59
60
61 VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(0), 4);
62 VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(1), 1);
63 VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(2), 1);
64 VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(3), 1);
65 VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(4), 2 * 3 * 5);
66 VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(5), 7);
67
68 array<IndexType, 6> patchRowMajorTensorRange={{sizeDim4, sizeDim1*sizeDim2*sizeDim3, 1, 1, 1, sizeDim0}};
69 Tensor<DataType, 6, RowMajor,IndexType> single_voxel_patch_row_major(patchRowMajorTensorRange);
70 patchTensorBuffSize =single_voxel_patch_row_major.size()*sizeof(DataType);
71 DataType* gpu_data_single_voxel_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
72 TensorMap<Tensor<DataType, 6, RowMajor,IndexType>> gpu_single_voxel_patch_row_major(gpu_data_single_voxel_patch_row_major, patchRowMajorTensorRange);
73 gpu_single_voxel_patch_row_major.device(sycl_device)=gpu_row_major.extract_volume_patches(1, 1, 1);
74 sycl_device.memcpyDeviceToHost(single_voxel_patch_row_major.data(), gpu_data_single_voxel_patch_row_major, patchTensorBuffSize);
75
76 VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(0), 7);
77 VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(1), 2 * 3 * 5);
78 VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(2), 1);
79 VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(3), 1);
80 VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(4), 1);
81 VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(5), 4);
82
83 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
84 for (IndexType i = 0; i < tensor_col_major.size(); ++i) {
85 VERIFY_IS_EQUAL(tensor_col_major.data()[i], single_voxel_patch_col_major.data()[i]);
86 VERIFY_IS_EQUAL(tensor_row_major.data()[i], single_voxel_patch_row_major.data()[i]);
87 VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]);
88 }
89
90
91 sycl_device.deallocate(gpu_data_col_major);
92 sycl_device.deallocate(gpu_data_row_major);
93 sycl_device.deallocate(gpu_data_single_voxel_patch_col_major);
94 sycl_device.deallocate(gpu_data_single_voxel_patch_row_major);
95 }
96
97 template <typename DataType, typename IndexType>
test_entire_volume_patch_sycl(const Eigen::SyclDevice & sycl_device)98 static void test_entire_volume_patch_sycl(const Eigen::SyclDevice& sycl_device)
99 {
100 const int depth = 4;
101 const int patch_z = 2;
102 const int patch_y = 3;
103 const int patch_x = 5;
104 const int batch = 7;
105
106 array<IndexType, 5> tensorColMajorRange = {{depth, patch_z, patch_y, patch_x, batch}};
107 array<IndexType, 5> tensorRowMajorRange = {{batch, patch_x, patch_y, patch_z, depth}};
108 Tensor<DataType, 5, DataLayout,IndexType> tensor_col_major(tensorColMajorRange);
109 Tensor<DataType, 5, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange);
110 tensor_col_major.setRandom();
111
112
113 DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType)));
114 DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType)));
115 TensorMap<Tensor<DataType, 5, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange);
116 TensorMap<Tensor<DataType, 5, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange);
117
118 sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType));
119 gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout();
120 sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType));
121
122
123 // single volume patch: ColMajor
124 array<IndexType, 6> patchColMajorTensorRange={{depth,patch_z, patch_y, patch_x, patch_z*patch_y*patch_x, batch}};
125 Tensor<DataType, 6, DataLayout,IndexType> entire_volume_patch_col_major(patchColMajorTensorRange);
126 size_t patchTensorBuffSize =entire_volume_patch_col_major.size()*sizeof(DataType);
127 DataType* gpu_data_entire_volume_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
128 TensorMap<Tensor<DataType, 6, DataLayout,IndexType>> gpu_entire_volume_patch_col_major(gpu_data_entire_volume_patch_col_major, patchColMajorTensorRange);
129 gpu_entire_volume_patch_col_major.device(sycl_device)=gpu_col_major.extract_volume_patches(patch_z, patch_y, patch_x);
130 sycl_device.memcpyDeviceToHost(entire_volume_patch_col_major.data(), gpu_data_entire_volume_patch_col_major, patchTensorBuffSize);
131
132
133 // Tensor<float, 5> tensor(depth, patch_z, patch_y, patch_x, batch);
134 // tensor.setRandom();
135 // Tensor<float, 5, RowMajor> tensor_row_major = tensor.swap_layout();
136
137 //Tensor<float, 6> entire_volume_patch;
138 //entire_volume_patch = tensor.extract_volume_patches(patch_z, patch_y, patch_x);
139 VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(0), depth);
140 VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(1), patch_z);
141 VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(2), patch_y);
142 VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(3), patch_x);
143 VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(4), patch_z * patch_y * patch_x);
144 VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(5), batch);
145
146 // Tensor<float, 6, RowMajor> entire_volume_patch_row_major;
147 //entire_volume_patch_row_major = tensor_row_major.extract_volume_patches(patch_z, patch_y, patch_x);
148
149 array<IndexType, 6> patchRowMajorTensorRange={{batch,patch_z*patch_y*patch_x, patch_x, patch_y, patch_z, depth}};
150 Tensor<DataType, 6, RowMajor,IndexType> entire_volume_patch_row_major(patchRowMajorTensorRange);
151 patchTensorBuffSize =entire_volume_patch_row_major.size()*sizeof(DataType);
152 DataType* gpu_data_entire_volume_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize));
153 TensorMap<Tensor<DataType, 6, RowMajor,IndexType>> gpu_entire_volume_patch_row_major(gpu_data_entire_volume_patch_row_major, patchRowMajorTensorRange);
154 gpu_entire_volume_patch_row_major.device(sycl_device)=gpu_row_major.extract_volume_patches(patch_z, patch_y, patch_x);
155 sycl_device.memcpyDeviceToHost(entire_volume_patch_row_major.data(), gpu_data_entire_volume_patch_row_major, patchTensorBuffSize);
156
157
158 VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(0), batch);
159 VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(1), patch_z * patch_y * patch_x);
160 VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(2), patch_x);
161 VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(3), patch_y);
162 VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(4), patch_z);
163 VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(5), depth);
164
165 const int dz = patch_z - 1;
166 const int dy = patch_y - 1;
167 const int dx = patch_x - 1;
168
169 const int forward_pad_z = dz / 2;
170 const int forward_pad_y = dy / 2;
171 const int forward_pad_x = dx / 2;
172
173 for (int pz = 0; pz < patch_z; pz++) {
174 for (int py = 0; py < patch_y; py++) {
175 for (int px = 0; px < patch_x; px++) {
176 const int patchId = pz + patch_z * (py + px * patch_y);
177 for (int z = 0; z < patch_z; z++) {
178 for (int y = 0; y < patch_y; y++) {
179 for (int x = 0; x < patch_x; x++) {
180 for (int b = 0; b < batch; b++) {
181 for (int d = 0; d < depth; d++) {
182 float expected = 0.0f;
183 float expected_row_major = 0.0f;
184 const int eff_z = z - forward_pad_z + pz;
185 const int eff_y = y - forward_pad_y + py;
186 const int eff_x = x - forward_pad_x + px;
187 if (eff_z >= 0 && eff_y >= 0 && eff_x >= 0 &&
188 eff_z < patch_z && eff_y < patch_y && eff_x < patch_x) {
189 expected = tensor_col_major(d, eff_z, eff_y, eff_x, b);
190 expected_row_major = tensor_row_major(b, eff_x, eff_y, eff_z, d);
191 }
192 VERIFY_IS_EQUAL(entire_volume_patch_col_major(d, z, y, x, patchId, b), expected);
193 VERIFY_IS_EQUAL(entire_volume_patch_row_major(b, patchId, x, y, z, d), expected_row_major);
194 }
195 }
196 }
197 }
198 }
199 }
200 }
201 }
202 sycl_device.deallocate(gpu_data_col_major);
203 sycl_device.deallocate(gpu_data_row_major);
204 sycl_device.deallocate(gpu_data_entire_volume_patch_col_major);
205 sycl_device.deallocate(gpu_data_entire_volume_patch_row_major);
206 }
207
208
209
sycl_tensor_volume_patch_test_per_device(dev_Selector s)210 template<typename DataType, typename dev_Selector> void sycl_tensor_volume_patch_test_per_device(dev_Selector s){
211 QueueInterface queueInterface(s);
212 auto sycl_device = Eigen::SyclDevice(&queueInterface);
213 std::cout << "Running on " << s.template get_info<cl::sycl::info::device::name>() << std::endl;
214 test_single_voxel_patch_sycl<DataType, int64_t>(sycl_device);
215 test_entire_volume_patch_sycl<DataType, int64_t>(sycl_device);
216 }
EIGEN_DECLARE_TEST(cxx11_tensor_volume_patch_sycl)217 EIGEN_DECLARE_TEST(cxx11_tensor_volume_patch_sycl)
218 {
219 for (const auto& device :Eigen::get_sycl_supported_devices()) {
220 CALL_SUBTEST(sycl_tensor_volume_patch_test_per_device<float>(device));
221 }
222 }
223