• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016
5 // Mehdi Goli    Codeplay Software Ltd.
6 // Ralph Potter  Codeplay Software Ltd.
7 // Luke Iwanski  Codeplay Software Ltd.
8 // Contact: <eigen@codeplay.com>
9 //
10 // This Source Code Form is subject to the terms of the Mozilla
11 // Public License v. 2.0. If a copy of the MPL was not distributed
12 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13 
14 #define EIGEN_TEST_NO_LONGDOUBLE
15 #define EIGEN_TEST_NO_COMPLEX
16 
17 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
18 #define EIGEN_USE_SYCL
19 
20 #include "main.h"
21 #include <unsupported/Eigen/CXX11/Tensor>
22 
23 using Eigen::Tensor;
24 template<typename TensorType>
25 struct InsertZeros {
dimensionsInsertZeros26   DSizes<DenseIndex, 2> dimensions(const TensorType& input) const {
27     DSizes<DenseIndex, 2> result;
28     result[0] = input.dimension(0) * 2;
29     result[1] = input.dimension(1) * 2;
30     return result;
31   }
32 
33   template <typename Output, typename Device>
evalInsertZeros34   void eval(const TensorType& input, Output& output, const Device& device) const
35   {
36     array<DenseIndex, 2> strides;
37     strides[0] = 2;
38     strides[1] = 2;
39     output.stride(strides).device(device) = input;
40 
41     Eigen::DSizes<DenseIndex, 2> offsets(1,1);
42     Eigen::DSizes<DenseIndex, 2> extents(output.dimension(0)-1, output.dimension(1)-1);
43     output.slice(offsets, extents).stride(strides).device(device) = input.constant(0.0f);
44   }
45 };
46 
47 template<typename DataType, int DataLayout, typename IndexType>
test_custom_unary_op_sycl(const Eigen::SyclDevice & sycl_device)48 static void test_custom_unary_op_sycl(const Eigen::SyclDevice &sycl_device)
49 {
50   IndexType sizeDim1 = 3;
51   IndexType sizeDim2 = 5;
52   Eigen::array<IndexType, 2> tensorRange = {{sizeDim1, sizeDim2}};
53   Eigen::array<IndexType, 2> tensorResultRange = {{6, 10}};
54 
55   Eigen::Tensor<DataType, 2, DataLayout, IndexType> in1(tensorRange);
56   Eigen::Tensor<DataType, 2, DataLayout, IndexType> out(tensorResultRange);
57 
58   DataType * gpu_in1_data  = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType)));
59   DataType * gpu_out_data =  static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
60 
61   typedef Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > TensorType;
62   TensorType gpu_in1(gpu_in1_data, tensorRange);
63   TensorType gpu_out(gpu_out_data, tensorResultRange);
64 
65   in1.setRandom();
66   sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
67   gpu_out.device(sycl_device) = gpu_in1.customOp(InsertZeros<TensorType>());
68   sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
69 
70   VERIFY_IS_EQUAL(out.dimension(0), 6);
71   VERIFY_IS_EQUAL(out.dimension(1), 10);
72 
73   for (int i = 0; i < 6; i+=2) {
74     for (int j = 0; j < 10; j+=2) {
75       VERIFY_IS_EQUAL(out(i, j), in1(i/2, j/2));
76     }
77   }
78   for (int i = 1; i < 6; i+=2) {
79     for (int j = 1; j < 10; j+=2) {
80       VERIFY_IS_EQUAL(out(i, j), 0);
81     }
82   }
83   sycl_device.deallocate(gpu_in1_data);
84 sycl_device.deallocate(gpu_out_data);
85 }
86 
87 template<typename TensorType>
88 struct BatchMatMul {
dimensionsBatchMatMul89   DSizes<DenseIndex, 3> dimensions(const TensorType& input1, const TensorType& input2) const {
90     DSizes<DenseIndex, 3> result;
91     result[0] = input1.dimension(0);
92     result[1] = input2.dimension(1);
93     result[2] = input2.dimension(2);
94     return result;
95   }
96 
97   template <typename Output, typename Device>
evalBatchMatMul98   void eval(const TensorType& input1, const TensorType& input2,
99             Output& output, const Device& device) const
100   {
101     typedef typename TensorType::DimensionPair DimPair;
102     array<DimPair, 1> dims;
103     dims[0] = DimPair(1, 0);
104     for (int64_t i = 0; i < output.dimension(2); ++i) {
105       output.template chip<2>(i).device(device) = input1.template chip<2>(i).contract(input2.template chip<2>(i), dims);
106     }
107   }
108 };
109 
110 template<typename DataType, int DataLayout, typename IndexType>
test_custom_binary_op_sycl(const Eigen::SyclDevice & sycl_device)111 static void test_custom_binary_op_sycl(const Eigen::SyclDevice &sycl_device)
112 {
113 
114   Eigen::array<IndexType, 3> tensorRange1 = {{2, 3, 5}};
115   Eigen::array<IndexType, 3> tensorRange2 = {{3,7,5}};
116   Eigen::array<IndexType, 3> tensorResultRange  = {{2, 7, 5}};
117 
118   Eigen::Tensor<DataType, 3, DataLayout, IndexType> in1(tensorRange1);
119   Eigen::Tensor<DataType, 3, DataLayout, IndexType> in2(tensorRange2);
120   Eigen::Tensor<DataType, 3, DataLayout, IndexType> out(tensorResultRange);
121 
122   DataType * gpu_in1_data  = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType)));
123   DataType * gpu_in2_data  = static_cast<DataType*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType)));
124   DataType * gpu_out_data =  static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
125 
126   typedef Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > TensorType;
127   TensorType gpu_in1(gpu_in1_data, tensorRange1);
128   TensorType gpu_in2(gpu_in2_data, tensorRange2);
129   TensorType gpu_out(gpu_out_data, tensorResultRange);
130 
131   in1.setRandom();
132   in2.setRandom();
133 
134   sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
135   sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(DataType));
136 
137   gpu_out.device(sycl_device) = gpu_in1.customOp(gpu_in2, BatchMatMul<TensorType>());
138   sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
139 
140   for (IndexType i = 0; i < 5; ++i) {
141     typedef typename Eigen::Tensor<DataType, 3, DataLayout, IndexType>::DimensionPair DimPair;
142     array<DimPair, 1> dims;
143     dims[0] = DimPair(1, 0);
144     Eigen::Tensor<DataType, 2, DataLayout, IndexType> reference = in1.template chip<2>(i).contract(in2.template chip<2>(i), dims);
145     TensorRef<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > val = out.template chip<2>(i);
146     for (IndexType j = 0; j < 2; ++j) {
147       for (IndexType k = 0; k < 7; ++k) {
148         VERIFY_IS_APPROX(val(j, k), reference(j, k));
149       }
150     }
151   }
152   sycl_device.deallocate(gpu_in1_data);
153   sycl_device.deallocate(gpu_in2_data);
154   sycl_device.deallocate(gpu_out_data);
155 }
156 
custom_op_perDevice(Dev_selector s)157 template <typename DataType, typename Dev_selector> void custom_op_perDevice(Dev_selector s){
158   QueueInterface queueInterface(s);
159   auto sycl_device = Eigen::SyclDevice(&queueInterface);
160   test_custom_unary_op_sycl<DataType, RowMajor, int64_t>(sycl_device);
161   test_custom_unary_op_sycl<DataType, ColMajor, int64_t>(sycl_device);
162   test_custom_binary_op_sycl<DataType, ColMajor, int64_t>(sycl_device);
163   test_custom_binary_op_sycl<DataType, RowMajor, int64_t>(sycl_device);
164 
165 }
EIGEN_DECLARE_TEST(cxx11_tensor_custom_op_sycl)166 EIGEN_DECLARE_TEST(cxx11_tensor_custom_op_sycl) {
167   for (const auto& device :Eigen::get_sycl_supported_devices()) {
168     CALL_SUBTEST(custom_op_perDevice<float>(device));
169   }
170 }
171