1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli Codeplay Software Ltd.
6 // Ralph Potter Codeplay Software Ltd.
7 // Luke Iwanski Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19
20 #include "main.h"
21 #include <unsupported/Eigen/CXX11/Tensor>
22
23 using Eigen::Tensor;
24 template<typename TensorType>
25 struct InsertZeros {
dimensionsInsertZeros26 DSizes<DenseIndex, 2> dimensions(const TensorType& input) const {
27 DSizes<DenseIndex, 2> result;
28 result[0] = input.dimension(0) * 2;
29 result[1] = input.dimension(1) * 2;
30 return result;
31 }
32
33 template <typename Output, typename Device>
evalInsertZeros34 void eval(const TensorType& input, Output& output, const Device& device) const
35 {
36 array<DenseIndex, 2> strides;
37 strides[0] = 2;
38 strides[1] = 2;
39 output.stride(strides).device(device) = input;
40
41 Eigen::DSizes<DenseIndex, 2> offsets(1,1);
42 Eigen::DSizes<DenseIndex, 2> extents(output.dimension(0)-1, output.dimension(1)-1);
43 output.slice(offsets, extents).stride(strides).device(device) = input.constant(0.0f);
44 }
45 };
46
47 template<typename DataType, int DataLayout, typename IndexType>
test_custom_unary_op_sycl(const Eigen::SyclDevice & sycl_device)48 static void test_custom_unary_op_sycl(const Eigen::SyclDevice &sycl_device)
49 {
50 IndexType sizeDim1 = 3;
51 IndexType sizeDim2 = 5;
52 Eigen::array<IndexType, 2> tensorRange = {{sizeDim1, sizeDim2}};
53 Eigen::array<IndexType, 2> tensorResultRange = {{6, 10}};
54
55 Eigen::Tensor<DataType, 2, DataLayout, IndexType> in1(tensorRange);
56 Eigen::Tensor<DataType, 2, DataLayout, IndexType> out(tensorResultRange);
57
58 DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType)));
59 DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
60
61 typedef Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > TensorType;
62 TensorType gpu_in1(gpu_in1_data, tensorRange);
63 TensorType gpu_out(gpu_out_data, tensorResultRange);
64
65 in1.setRandom();
66 sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
67 gpu_out.device(sycl_device) = gpu_in1.customOp(InsertZeros<TensorType>());
68 sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
69
70 VERIFY_IS_EQUAL(out.dimension(0), 6);
71 VERIFY_IS_EQUAL(out.dimension(1), 10);
72
73 for (int i = 0; i < 6; i+=2) {
74 for (int j = 0; j < 10; j+=2) {
75 VERIFY_IS_EQUAL(out(i, j), in1(i/2, j/2));
76 }
77 }
78 for (int i = 1; i < 6; i+=2) {
79 for (int j = 1; j < 10; j+=2) {
80 VERIFY_IS_EQUAL(out(i, j), 0);
81 }
82 }
83 sycl_device.deallocate(gpu_in1_data);
84 sycl_device.deallocate(gpu_out_data);
85 }
86
87 template<typename TensorType>
88 struct BatchMatMul {
dimensionsBatchMatMul89 DSizes<DenseIndex, 3> dimensions(const TensorType& input1, const TensorType& input2) const {
90 DSizes<DenseIndex, 3> result;
91 result[0] = input1.dimension(0);
92 result[1] = input2.dimension(1);
93 result[2] = input2.dimension(2);
94 return result;
95 }
96
97 template <typename Output, typename Device>
evalBatchMatMul98 void eval(const TensorType& input1, const TensorType& input2,
99 Output& output, const Device& device) const
100 {
101 typedef typename TensorType::DimensionPair DimPair;
102 array<DimPair, 1> dims;
103 dims[0] = DimPair(1, 0);
104 for (int64_t i = 0; i < output.dimension(2); ++i) {
105 output.template chip<2>(i).device(device) = input1.template chip<2>(i).contract(input2.template chip<2>(i), dims);
106 }
107 }
108 };
109
110 template<typename DataType, int DataLayout, typename IndexType>
test_custom_binary_op_sycl(const Eigen::SyclDevice & sycl_device)111 static void test_custom_binary_op_sycl(const Eigen::SyclDevice &sycl_device)
112 {
113
114 Eigen::array<IndexType, 3> tensorRange1 = {{2, 3, 5}};
115 Eigen::array<IndexType, 3> tensorRange2 = {{3,7,5}};
116 Eigen::array<IndexType, 3> tensorResultRange = {{2, 7, 5}};
117
118 Eigen::Tensor<DataType, 3, DataLayout, IndexType> in1(tensorRange1);
119 Eigen::Tensor<DataType, 3, DataLayout, IndexType> in2(tensorRange2);
120 Eigen::Tensor<DataType, 3, DataLayout, IndexType> out(tensorResultRange);
121
122 DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType)));
123 DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType)));
124 DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
125
126 typedef Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > TensorType;
127 TensorType gpu_in1(gpu_in1_data, tensorRange1);
128 TensorType gpu_in2(gpu_in2_data, tensorRange2);
129 TensorType gpu_out(gpu_out_data, tensorResultRange);
130
131 in1.setRandom();
132 in2.setRandom();
133
134 sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
135 sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(DataType));
136
137 gpu_out.device(sycl_device) = gpu_in1.customOp(gpu_in2, BatchMatMul<TensorType>());
138 sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
139
140 for (IndexType i = 0; i < 5; ++i) {
141 typedef typename Eigen::Tensor<DataType, 3, DataLayout, IndexType>::DimensionPair DimPair;
142 array<DimPair, 1> dims;
143 dims[0] = DimPair(1, 0);
144 Eigen::Tensor<DataType, 2, DataLayout, IndexType> reference = in1.template chip<2>(i).contract(in2.template chip<2>(i), dims);
145 TensorRef<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > val = out.template chip<2>(i);
146 for (IndexType j = 0; j < 2; ++j) {
147 for (IndexType k = 0; k < 7; ++k) {
148 VERIFY_IS_APPROX(val(j, k), reference(j, k));
149 }
150 }
151 }
152 sycl_device.deallocate(gpu_in1_data);
153 sycl_device.deallocate(gpu_in2_data);
154 sycl_device.deallocate(gpu_out_data);
155 }
156
custom_op_perDevice(Dev_selector s)157 template <typename DataType, typename Dev_selector> void custom_op_perDevice(Dev_selector s){
158 QueueInterface queueInterface(s);
159 auto sycl_device = Eigen::SyclDevice(&queueInterface);
160 test_custom_unary_op_sycl<DataType, RowMajor, int64_t>(sycl_device);
161 test_custom_unary_op_sycl<DataType, ColMajor, int64_t>(sycl_device);
162 test_custom_binary_op_sycl<DataType, ColMajor, int64_t>(sycl_device);
163 test_custom_binary_op_sycl<DataType, RowMajor, int64_t>(sycl_device);
164
165 }
EIGEN_DECLARE_TEST(cxx11_tensor_custom_op_sycl)166 EIGEN_DECLARE_TEST(cxx11_tensor_custom_op_sycl) {
167 for (const auto& device :Eigen::get_sycl_supported_devices()) {
168 CALL_SUBTEST(custom_op_perDevice<float>(device));
169 }
170 }
171