1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2015
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19
20 #include "main.h"
21 #include <unsupported/Eigen/CXX11/Tensor>
22
23 template <typename DataType, int DataLayout, typename IndexType>
test_simple_reverse(const Eigen::SyclDevice & sycl_device)24 static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) {
25 IndexType dim1 = 2;
26 IndexType dim2 = 3;
27 IndexType dim3 = 5;
28 IndexType dim4 = 7;
29
30 array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}};
31 Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
32 Tensor<DataType, 4, DataLayout, IndexType> reversed_tensor(tensorRange);
33 tensor.setRandom();
34
35 array<bool, 4> dim_rev;
36 dim_rev[0] = false;
37 dim_rev[1] = true;
38 dim_rev[2] = true;
39 dim_rev[3] = false;
40
41 DataType* gpu_in_data = static_cast<DataType*>(
42 sycl_device.allocate(tensor.dimensions().TotalSize() * sizeof(DataType)));
43 DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate(
44 reversed_tensor.dimensions().TotalSize() * sizeof(DataType)));
45
46 TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data,
47 tensorRange);
48 TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu(gpu_out_data,
49 tensorRange);
50
51 sycl_device.memcpyHostToDevice(
52 gpu_in_data, tensor.data(),
53 (tensor.dimensions().TotalSize()) * sizeof(DataType));
54 out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
55 sycl_device.memcpyDeviceToHost(
56 reversed_tensor.data(), gpu_out_data,
57 reversed_tensor.dimensions().TotalSize() * sizeof(DataType));
58 // Check that the CPU and GPU reductions return the same result.
59 for (IndexType i = 0; i < 2; ++i) {
60 for (IndexType j = 0; j < 3; ++j) {
61 for (IndexType k = 0; k < 5; ++k) {
62 for (IndexType l = 0; l < 7; ++l) {
63 VERIFY_IS_EQUAL(tensor(i, j, k, l),
64 reversed_tensor(i, 2 - j, 4 - k, l));
65 }
66 }
67 }
68 }
69 dim_rev[0] = true;
70 dim_rev[1] = false;
71 dim_rev[2] = false;
72 dim_rev[3] = false;
73
74 out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
75 sycl_device.memcpyDeviceToHost(
76 reversed_tensor.data(), gpu_out_data,
77 reversed_tensor.dimensions().TotalSize() * sizeof(DataType));
78
79 for (IndexType i = 0; i < 2; ++i) {
80 for (IndexType j = 0; j < 3; ++j) {
81 for (IndexType k = 0; k < 5; ++k) {
82 for (IndexType l = 0; l < 7; ++l) {
83 VERIFY_IS_EQUAL(tensor(i, j, k, l), reversed_tensor(1 - i, j, k, l));
84 }
85 }
86 }
87 }
88
89 dim_rev[0] = true;
90 dim_rev[1] = false;
91 dim_rev[2] = false;
92 dim_rev[3] = true;
93 out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
94 sycl_device.memcpyDeviceToHost(
95 reversed_tensor.data(), gpu_out_data,
96 reversed_tensor.dimensions().TotalSize() * sizeof(DataType));
97
98 for (IndexType i = 0; i < 2; ++i) {
99 for (IndexType j = 0; j < 3; ++j) {
100 for (IndexType k = 0; k < 5; ++k) {
101 for (IndexType l = 0; l < 7; ++l) {
102 VERIFY_IS_EQUAL(tensor(i, j, k, l),
103 reversed_tensor(1 - i, j, k, 6 - l));
104 }
105 }
106 }
107 }
108
109 sycl_device.deallocate(gpu_in_data);
110 sycl_device.deallocate(gpu_out_data);
111 }
112
113 template <typename DataType, int DataLayout, typename IndexType>
test_expr_reverse(const Eigen::SyclDevice & sycl_device,bool LValue)114 static void test_expr_reverse(const Eigen::SyclDevice& sycl_device,
115 bool LValue) {
116 IndexType dim1 = 2;
117 IndexType dim2 = 3;
118 IndexType dim3 = 5;
119 IndexType dim4 = 7;
120
121 array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}};
122 Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
123 Tensor<DataType, 4, DataLayout, IndexType> expected(tensorRange);
124 Tensor<DataType, 4, DataLayout, IndexType> result(tensorRange);
125 tensor.setRandom();
126
127 array<bool, 4> dim_rev;
128 dim_rev[0] = false;
129 dim_rev[1] = true;
130 dim_rev[2] = false;
131 dim_rev[3] = true;
132
133 DataType* gpu_in_data = static_cast<DataType*>(
134 sycl_device.allocate(tensor.dimensions().TotalSize() * sizeof(DataType)));
135 DataType* gpu_out_data_expected = static_cast<DataType*>(sycl_device.allocate(
136 expected.dimensions().TotalSize() * sizeof(DataType)));
137 DataType* gpu_out_data_result = static_cast<DataType*>(
138 sycl_device.allocate(result.dimensions().TotalSize() * sizeof(DataType)));
139
140 TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data,
141 tensorRange);
142 TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_expected(
143 gpu_out_data_expected, tensorRange);
144 TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_result(
145 gpu_out_data_result, tensorRange);
146
147 sycl_device.memcpyHostToDevice(
148 gpu_in_data, tensor.data(),
149 (tensor.dimensions().TotalSize()) * sizeof(DataType));
150
151 if (LValue) {
152 out_gpu_expected.reverse(dim_rev).device(sycl_device) = in_gpu;
153 } else {
154 out_gpu_expected.device(sycl_device) = in_gpu.reverse(dim_rev);
155 }
156 sycl_device.memcpyDeviceToHost(
157 expected.data(), gpu_out_data_expected,
158 expected.dimensions().TotalSize() * sizeof(DataType));
159
160 array<IndexType, 4> src_slice_dim;
161 src_slice_dim[0] = 2;
162 src_slice_dim[1] = 3;
163 src_slice_dim[2] = 1;
164 src_slice_dim[3] = 7;
165 array<IndexType, 4> src_slice_start;
166 src_slice_start[0] = 0;
167 src_slice_start[1] = 0;
168 src_slice_start[2] = 0;
169 src_slice_start[3] = 0;
170 array<IndexType, 4> dst_slice_dim = src_slice_dim;
171 array<IndexType, 4> dst_slice_start = src_slice_start;
172
173 for (IndexType i = 0; i < 5; ++i) {
174 if (LValue) {
175 out_gpu_result.slice(dst_slice_start, dst_slice_dim)
176 .reverse(dim_rev)
177 .device(sycl_device) = in_gpu.slice(src_slice_start, src_slice_dim);
178 } else {
179 out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
180 in_gpu.slice(src_slice_start, src_slice_dim).reverse(dim_rev);
181 }
182 src_slice_start[2] += 1;
183 dst_slice_start[2] += 1;
184 }
185 sycl_device.memcpyDeviceToHost(
186 result.data(), gpu_out_data_result,
187 result.dimensions().TotalSize() * sizeof(DataType));
188
189 for (IndexType i = 0; i < expected.dimension(0); ++i) {
190 for (IndexType j = 0; j < expected.dimension(1); ++j) {
191 for (IndexType k = 0; k < expected.dimension(2); ++k) {
192 for (IndexType l = 0; l < expected.dimension(3); ++l) {
193 VERIFY_IS_EQUAL(result(i, j, k, l), expected(i, j, k, l));
194 }
195 }
196 }
197 }
198
199 dst_slice_start[2] = 0;
200 result.setRandom();
201 sycl_device.memcpyHostToDevice(
202 gpu_out_data_result, result.data(),
203 (result.dimensions().TotalSize()) * sizeof(DataType));
204 for (IndexType i = 0; i < 5; ++i) {
205 if (LValue) {
206 out_gpu_result.slice(dst_slice_start, dst_slice_dim)
207 .reverse(dim_rev)
208 .device(sycl_device) = in_gpu.slice(dst_slice_start, dst_slice_dim);
209 } else {
210 out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
211 in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim);
212 }
213 dst_slice_start[2] += 1;
214 }
215 sycl_device.memcpyDeviceToHost(
216 result.data(), gpu_out_data_result,
217 result.dimensions().TotalSize() * sizeof(DataType));
218
219 for (IndexType i = 0; i < expected.dimension(0); ++i) {
220 for (IndexType j = 0; j < expected.dimension(1); ++j) {
221 for (IndexType k = 0; k < expected.dimension(2); ++k) {
222 for (IndexType l = 0; l < expected.dimension(3); ++l) {
223 VERIFY_IS_EQUAL(result(i, j, k, l), expected(i, j, k, l));
224 }
225 }
226 }
227 }
228 }
229
230 template <typename DataType>
sycl_reverse_test_per_device(const cl::sycl::device & d)231 void sycl_reverse_test_per_device(const cl::sycl::device& d) {
232 QueueInterface queueInterface(d);
233 auto sycl_device = Eigen::SyclDevice(&queueInterface);
234 test_simple_reverse<DataType, RowMajor, int64_t>(sycl_device);
235 test_simple_reverse<DataType, ColMajor, int64_t>(sycl_device);
236 test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, false);
237 test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, false);
238 test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, true);
239 test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, true);
240 }
EIGEN_DECLARE_TEST(cxx11_tensor_reverse_sycl)241 EIGEN_DECLARE_TEST(cxx11_tensor_reverse_sycl) {
242 for (const auto& device : Eigen::get_sycl_supported_devices()) {
243 std::cout << "Running on "
244 << device.get_info<cl::sycl::info::device::name>() << std::endl;
245 CALL_SUBTEST_1(sycl_reverse_test_per_device<short>(device));
246 CALL_SUBTEST_2(sycl_reverse_test_per_device<int>(device));
247 CALL_SUBTEST_3(sycl_reverse_test_per_device<unsigned int>(device));
248 #ifdef EIGEN_SYCL_DOUBLE_SUPPORT
249 CALL_SUBTEST_4(sycl_reverse_test_per_device<double>(device));
250 #endif
251 CALL_SUBTEST_5(sycl_reverse_test_per_device<float>(device));
252 }
253 }
254