1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19
20 #include <iostream>
21 #include <chrono>
22 #include <ctime>
23
24 #include "main.h"
25 #include <unsupported/Eigen/CXX11/Tensor>
26
27 using Eigen::array;
28 using Eigen::SyclDevice;
29 using Eigen::Tensor;
30 using Eigen::TensorMap;
31
32
33 template <typename DataType, int DataLayout, typename IndexType>
test_simple_striding(const Eigen::SyclDevice & sycl_device)34 static void test_simple_striding(const Eigen::SyclDevice& sycl_device)
35 {
36
37 Eigen::array<IndexType, 4> tensor_dims = {{2,3,5,7}};
38 Eigen::array<IndexType, 4> stride_dims = {{1,1,3,3}};
39
40
41 Tensor<DataType, 4, DataLayout, IndexType> tensor(tensor_dims);
42 Tensor<DataType, 4, DataLayout,IndexType> no_stride(tensor_dims);
43 Tensor<DataType, 4, DataLayout,IndexType> stride(stride_dims);
44
45
46 std::size_t tensor_bytes = tensor.size() * sizeof(DataType);
47 std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType);
48 std::size_t stride_bytes = stride.size() * sizeof(DataType);
49 DataType * d_tensor = static_cast<DataType*>(sycl_device.allocate(tensor_bytes));
50 DataType * d_no_stride = static_cast<DataType*>(sycl_device.allocate(no_stride_bytes));
51 DataType * d_stride = static_cast<DataType*>(sycl_device.allocate(stride_bytes));
52
53 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_tensor(d_tensor, tensor_dims);
54 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_no_stride(d_no_stride, tensor_dims);
55 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_stride(d_stride, stride_dims);
56
57
58 tensor.setRandom();
59 array<IndexType, 4> strides;
60 strides[0] = 1;
61 strides[1] = 1;
62 strides[2] = 1;
63 strides[3] = 1;
64 sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes);
65 gpu_no_stride.device(sycl_device)=gpu_tensor.stride(strides);
66 sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes);
67
68 //no_stride = tensor.stride(strides);
69
70 VERIFY_IS_EQUAL(no_stride.dimension(0), 2);
71 VERIFY_IS_EQUAL(no_stride.dimension(1), 3);
72 VERIFY_IS_EQUAL(no_stride.dimension(2), 5);
73 VERIFY_IS_EQUAL(no_stride.dimension(3), 7);
74
75 for (IndexType i = 0; i < 2; ++i) {
76 for (IndexType j = 0; j < 3; ++j) {
77 for (IndexType k = 0; k < 5; ++k) {
78 for (IndexType l = 0; l < 7; ++l) {
79 VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l));
80 }
81 }
82 }
83 }
84
85 strides[0] = 2;
86 strides[1] = 4;
87 strides[2] = 2;
88 strides[3] = 3;
89 //Tensor<float, 4, DataLayout> stride;
90 // stride = tensor.stride(strides);
91
92 gpu_stride.device(sycl_device)=gpu_tensor.stride(strides);
93 sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes);
94
95 VERIFY_IS_EQUAL(stride.dimension(0), 1);
96 VERIFY_IS_EQUAL(stride.dimension(1), 1);
97 VERIFY_IS_EQUAL(stride.dimension(2), 3);
98 VERIFY_IS_EQUAL(stride.dimension(3), 3);
99
100 for (IndexType i = 0; i < 1; ++i) {
101 for (IndexType j = 0; j < 1; ++j) {
102 for (IndexType k = 0; k < 3; ++k) {
103 for (IndexType l = 0; l < 3; ++l) {
104 VERIFY_IS_EQUAL(tensor(2*i,4*j,2*k,3*l), stride(i,j,k,l));
105 }
106 }
107 }
108 }
109
110 sycl_device.deallocate(d_tensor);
111 sycl_device.deallocate(d_no_stride);
112 sycl_device.deallocate(d_stride);
113 }
114
115 template <typename DataType, int DataLayout, typename IndexType>
test_striding_as_lvalue(const Eigen::SyclDevice & sycl_device)116 static void test_striding_as_lvalue(const Eigen::SyclDevice& sycl_device)
117 {
118
119 Eigen::array<IndexType, 4> tensor_dims = {{2,3,5,7}};
120 Eigen::array<IndexType, 4> stride_dims = {{3,12,10,21}};
121
122
123 Tensor<DataType, 4, DataLayout, IndexType> tensor(tensor_dims);
124 Tensor<DataType, 4, DataLayout,IndexType> no_stride(stride_dims);
125 Tensor<DataType, 4, DataLayout,IndexType> stride(stride_dims);
126
127
128 std::size_t tensor_bytes = tensor.size() * sizeof(DataType);
129 std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType);
130 std::size_t stride_bytes = stride.size() * sizeof(DataType);
131
132 DataType * d_tensor = static_cast<DataType*>(sycl_device.allocate(tensor_bytes));
133 DataType * d_no_stride = static_cast<DataType*>(sycl_device.allocate(no_stride_bytes));
134 DataType * d_stride = static_cast<DataType*>(sycl_device.allocate(stride_bytes));
135
136 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_tensor(d_tensor, tensor_dims);
137 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_no_stride(d_no_stride, stride_dims);
138 Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_stride(d_stride, stride_dims);
139
140 //Tensor<float, 4, DataLayout> tensor(2,3,5,7);
141 tensor.setRandom();
142 array<IndexType, 4> strides;
143 strides[0] = 2;
144 strides[1] = 4;
145 strides[2] = 2;
146 strides[3] = 3;
147
148 // Tensor<float, 4, DataLayout> result(3, 12, 10, 21);
149 // result.stride(strides) = tensor;
150 sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes);
151 gpu_stride.stride(strides).device(sycl_device)=gpu_tensor;
152 sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes);
153
154 for (IndexType i = 0; i < 2; ++i) {
155 for (IndexType j = 0; j < 3; ++j) {
156 for (IndexType k = 0; k < 5; ++k) {
157 for (IndexType l = 0; l < 7; ++l) {
158 VERIFY_IS_EQUAL(tensor(i,j,k,l), stride(2*i,4*j,2*k,3*l));
159 }
160 }
161 }
162 }
163
164 array<IndexType, 4> no_strides;
165 no_strides[0] = 1;
166 no_strides[1] = 1;
167 no_strides[2] = 1;
168 no_strides[3] = 1;
169 // Tensor<float, 4, DataLayout> result2(3, 12, 10, 21);
170 // result2.stride(strides) = tensor.stride(no_strides);
171
172 gpu_no_stride.stride(strides).device(sycl_device)=gpu_tensor.stride(no_strides);
173 sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes);
174
175 for (IndexType i = 0; i < 2; ++i) {
176 for (IndexType j = 0; j < 3; ++j) {
177 for (IndexType k = 0; k < 5; ++k) {
178 for (IndexType l = 0; l < 7; ++l) {
179 VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(2*i,4*j,2*k,3*l));
180 }
181 }
182 }
183 }
184 sycl_device.deallocate(d_tensor);
185 sycl_device.deallocate(d_no_stride);
186 sycl_device.deallocate(d_stride);
187 }
188
189
tensorStridingPerDevice(Dev_selector & s)190 template <typename Dev_selector> void tensorStridingPerDevice(Dev_selector& s){
191 QueueInterface queueInterface(s);
192 auto sycl_device=Eigen::SyclDevice(&queueInterface);
193 test_simple_striding<float, ColMajor, int64_t>(sycl_device);
194 test_simple_striding<float, RowMajor, int64_t>(sycl_device);
195 test_striding_as_lvalue<float, ColMajor, int64_t>(sycl_device);
196 test_striding_as_lvalue<float, RowMajor, int64_t>(sycl_device);
197 }
198
EIGEN_DECLARE_TEST(cxx11_tensor_striding_sycl)199 EIGEN_DECLARE_TEST(cxx11_tensor_striding_sycl) {
200 for (const auto& device :Eigen::get_sycl_supported_devices()) {
201 CALL_SUBTEST(tensorStridingPerDevice(device));
202 }
203 }
204