1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19 #define EIGEN_HAS_CONSTEXPR 1
20
21 #include "main.h"
22
23 #include <unsupported/Eigen/CXX11/Tensor>
24
25 using Eigen::array;
26 using Eigen::SyclDevice;
27 using Eigen::Tensor;
28 using Eigen::TensorMap;
29
30 template <typename DataType, int Layout, typename DenseIndex>
test_sycl_simple_argmax(const Eigen::SyclDevice & sycl_device)31 static void test_sycl_simple_argmax(const Eigen::SyclDevice& sycl_device) {
32 Tensor<DataType, 3, Layout, DenseIndex> in(Eigen::array<DenseIndex, 3>{{2, 2, 2}});
33 Tensor<DenseIndex, 0, Layout, DenseIndex> out_max;
34 Tensor<DenseIndex, 0, Layout, DenseIndex> out_min;
35 in.setRandom();
36 in *= in.constant(100.0);
37 in(0, 0, 0) = -1000.0;
38 in(1, 1, 1) = 1000.0;
39
40 std::size_t in_bytes = in.size() * sizeof(DataType);
41 std::size_t out_bytes = out_max.size() * sizeof(DenseIndex);
42
43 DataType* d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes));
44 DenseIndex* d_out_max = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
45 DenseIndex* d_out_min = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
46
47 Eigen::TensorMap<Eigen::Tensor<DataType, 3, Layout, DenseIndex> > gpu_in(d_in,
48 Eigen::array<DenseIndex, 3>{{2, 2, 2}});
49 Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout, DenseIndex> > gpu_out_max(d_out_max);
50 Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout, DenseIndex> > gpu_out_min(d_out_min);
51 sycl_device.memcpyHostToDevice(d_in, in.data(), in_bytes);
52
53 gpu_out_max.device(sycl_device) = gpu_in.argmax();
54 gpu_out_min.device(sycl_device) = gpu_in.argmin();
55
56 sycl_device.memcpyDeviceToHost(out_max.data(), d_out_max, out_bytes);
57 sycl_device.memcpyDeviceToHost(out_min.data(), d_out_min, out_bytes);
58
59 VERIFY_IS_EQUAL(out_max(), 2 * 2 * 2 - 1);
60 VERIFY_IS_EQUAL(out_min(), 0);
61
62 sycl_device.deallocate(d_in);
63 sycl_device.deallocate(d_out_max);
64 sycl_device.deallocate(d_out_min);
65 }
66
67 template <typename DataType, int DataLayout, typename DenseIndex>
test_sycl_argmax_dim(const Eigen::SyclDevice & sycl_device)68 static void test_sycl_argmax_dim(const Eigen::SyclDevice& sycl_device) {
69 DenseIndex sizeDim0 = 9;
70 DenseIndex sizeDim1 = 3;
71 DenseIndex sizeDim2 = 5;
72 DenseIndex sizeDim3 = 7;
73 Tensor<DataType, 4, DataLayout, DenseIndex> tensor(sizeDim0, sizeDim1, sizeDim2, sizeDim3);
74
75 std::vector<DenseIndex> dims;
76 dims.push_back(sizeDim0);
77 dims.push_back(sizeDim1);
78 dims.push_back(sizeDim2);
79 dims.push_back(sizeDim3);
80 for (DenseIndex dim = 0; dim < 4; ++dim) {
81 array<DenseIndex, 3> out_shape;
82 for (DenseIndex d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d + 1];
83
84 Tensor<DenseIndex, 3, DataLayout, DenseIndex> tensor_arg(out_shape);
85
86 array<DenseIndex, 4> ix;
87 for (DenseIndex i = 0; i < sizeDim0; ++i) {
88 for (DenseIndex j = 0; j < sizeDim1; ++j) {
89 for (DenseIndex k = 0; k < sizeDim2; ++k) {
90 for (DenseIndex l = 0; l < sizeDim3; ++l) {
91 ix[0] = i;
92 ix[1] = j;
93 ix[2] = k;
94 ix[3] = l;
95 // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l)
96 // = 10.0
97 tensor(ix) = (ix[dim] != 0) ? -1.0 : 10.0;
98 }
99 }
100 }
101 }
102
103 std::size_t in_bytes = tensor.size() * sizeof(DataType);
104 std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex);
105
106 DataType* d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes));
107 DenseIndex* d_out = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
108
109 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, DenseIndex> > gpu_in(
110 d_in, Eigen::array<DenseIndex, 4>{{sizeDim0, sizeDim1, sizeDim2, sizeDim3}});
111 Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout, DenseIndex> > gpu_out(d_out, out_shape);
112
113 sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
114 gpu_out.device(sycl_device) = gpu_in.argmax(dim);
115 sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
116
117 VERIFY_IS_EQUAL(static_cast<size_t>(tensor_arg.size()),
118 size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.dimension(dim)));
119
120 for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
121 // Expect max to be in the first index of the reduced dimension
122 VERIFY_IS_EQUAL(tensor_arg.data()[n], 0);
123 }
124
125 sycl_device.synchronize();
126
127 for (DenseIndex i = 0; i < sizeDim0; ++i) {
128 for (DenseIndex j = 0; j < sizeDim1; ++j) {
129 for (DenseIndex k = 0; k < sizeDim2; ++k) {
130 for (DenseIndex l = 0; l < sizeDim3; ++l) {
131 ix[0] = i;
132 ix[1] = j;
133 ix[2] = k;
134 ix[3] = l;
135 // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0
136 tensor(ix) = (ix[dim] != tensor.dimension(dim) - 1) ? -1.0 : 20.0;
137 }
138 }
139 }
140 }
141
142 sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
143 gpu_out.device(sycl_device) = gpu_in.argmax(dim);
144 sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
145
146 for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
147 // Expect max to be in the last index of the reduced dimension
148 VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
149 }
150 sycl_device.deallocate(d_in);
151 sycl_device.deallocate(d_out);
152 }
153 }
154
155 template <typename DataType, int DataLayout, typename DenseIndex>
test_sycl_argmin_dim(const Eigen::SyclDevice & sycl_device)156 static void test_sycl_argmin_dim(const Eigen::SyclDevice& sycl_device) {
157 DenseIndex sizeDim0 = 9;
158 DenseIndex sizeDim1 = 3;
159 DenseIndex sizeDim2 = 5;
160 DenseIndex sizeDim3 = 7;
161 Tensor<DataType, 4, DataLayout, DenseIndex> tensor(sizeDim0, sizeDim1, sizeDim2, sizeDim3);
162
163 std::vector<DenseIndex> dims;
164 dims.push_back(sizeDim0);
165 dims.push_back(sizeDim1);
166 dims.push_back(sizeDim2);
167 dims.push_back(sizeDim3);
168 for (DenseIndex dim = 0; dim < 4; ++dim) {
169 array<DenseIndex, 3> out_shape;
170 for (DenseIndex d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d + 1];
171
172 Tensor<DenseIndex, 3, DataLayout, DenseIndex> tensor_arg(out_shape);
173
174 array<DenseIndex, 4> ix;
175 for (DenseIndex i = 0; i < sizeDim0; ++i) {
176 for (DenseIndex j = 0; j < sizeDim1; ++j) {
177 for (DenseIndex k = 0; k < sizeDim2; ++k) {
178 for (DenseIndex l = 0; l < sizeDim3; ++l) {
179 ix[0] = i;
180 ix[1] = j;
181 ix[2] = k;
182 ix[3] = l;
183 // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = -10.0
184 tensor(ix) = (ix[dim] != 0) ? 1.0 : -10.0;
185 }
186 }
187 }
188 }
189
190 std::size_t in_bytes = tensor.size() * sizeof(DataType);
191 std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex);
192
193 DataType* d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes));
194 DenseIndex* d_out = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
195
196 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, DenseIndex> > gpu_in(
197 d_in, Eigen::array<DenseIndex, 4>{{sizeDim0, sizeDim1, sizeDim2, sizeDim3}});
198 Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout, DenseIndex> > gpu_out(d_out, out_shape);
199
200 sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
201 gpu_out.device(sycl_device) = gpu_in.argmin(dim);
202 sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
203
204 VERIFY_IS_EQUAL(static_cast<size_t>(tensor_arg.size()),
205 size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.dimension(dim)));
206
207 for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
208 // Expect max to be in the first index of the reduced dimension
209 VERIFY_IS_EQUAL(tensor_arg.data()[n], 0);
210 }
211
212 sycl_device.synchronize();
213
214 for (DenseIndex i = 0; i < sizeDim0; ++i) {
215 for (DenseIndex j = 0; j < sizeDim1; ++j) {
216 for (DenseIndex k = 0; k < sizeDim2; ++k) {
217 for (DenseIndex l = 0; l < sizeDim3; ++l) {
218 ix[0] = i;
219 ix[1] = j;
220 ix[2] = k;
221 ix[3] = l;
222 // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = -20.0
223 tensor(ix) = (ix[dim] != tensor.dimension(dim) - 1) ? 1.0 : -20.0;
224 }
225 }
226 }
227 }
228
229 sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
230 gpu_out.device(sycl_device) = gpu_in.argmin(dim);
231 sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
232
233 for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
234 // Expect max to be in the last index of the reduced dimension
235 VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
236 }
237 sycl_device.deallocate(d_in);
238 sycl_device.deallocate(d_out);
239 }
240 }
241
242 template <typename DataType, typename Device_Selector>
sycl_argmax_test_per_device(const Device_Selector & d)243 void sycl_argmax_test_per_device(const Device_Selector& d) {
244 QueueInterface queueInterface(d);
245 auto sycl_device = Eigen::SyclDevice(&queueInterface);
246 test_sycl_simple_argmax<DataType, RowMajor, int64_t>(sycl_device);
247 test_sycl_simple_argmax<DataType, ColMajor, int64_t>(sycl_device);
248 test_sycl_argmax_dim<DataType, ColMajor, int64_t>(sycl_device);
249 test_sycl_argmax_dim<DataType, RowMajor, int64_t>(sycl_device);
250 test_sycl_argmin_dim<DataType, ColMajor, int64_t>(sycl_device);
251 test_sycl_argmin_dim<DataType, RowMajor, int64_t>(sycl_device);
252 }
253
EIGEN_DECLARE_TEST(cxx11_tensor_argmax_sycl)254 EIGEN_DECLARE_TEST(cxx11_tensor_argmax_sycl) {
255 for (const auto& device : Eigen::get_sycl_supported_devices()) {
256 CALL_SUBTEST(sycl_argmax_test_per_device<float>(device));
257 }
258 }
259