• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef CXX11_SRC_FIXEDPOINT_MATMATPRODUCT_H_
11 #define CXX11_SRC_FIXEDPOINT_MATMATPRODUCT_H_
12 
13 namespace Eigen {
14 namespace internal {
15 
16 // Accumulate the product of 2 QInt8 inputs on 32 bits to prevent
17 // overflows
18 template <>
19 struct scalar_product_traits<QInt8, QInt8> {
20   enum { Defined = 1 };
21   typedef QInt32 ReturnType;
22 };
23 
24 // Accumulate the product of 2 QInt16 inputs on 32 bits to prevent
25 // overflows
26 template <>
27 struct scalar_product_traits<QInt16, QInt16> {
28   enum { Defined = 1 };
29   typedef QInt32 ReturnType;
30 };
31 
32 // Accumulate the product of QInt8 inputs with QUint8 inputs on 32 bits
33 // to prevent overflows
34 template <>
35 struct scalar_product_traits<QInt8, QUInt8> {
36   enum { Defined = 1 };
37   typedef QInt32 ReturnType;
38 };
39 
40 // Accumulate the product of QUInt8 inputs with Qint8 inputs on 32 bits
41 // to prevent overflows
42 template <>
43 struct scalar_product_traits<QUInt8, QInt8> {
44   enum { Defined = 1 };
45   typedef QInt32 ReturnType;
46 };
47 
48 // Description of the product implementation. It's pretty simple now since
49 // nothing is vectorized yet.
50 // This definition tackle the case where both lhs and rhs are encoded using
51 // signed 8bit integers
52 #ifndef EIGEN_USE_OPTIMIZED_INT8_INT8_MAT_MAT_PRODUCT
53 
54 template <bool _ConjLhs, bool _ConjRhs>
55 class gebp_traits<QInt8, QInt8, _ConjLhs, _ConjRhs> {
56  public:
57   typedef QInt8 LhsScalar;
58   typedef QInt8 RhsScalar;
59   typedef QInt32 ResScalar;
60 
61   typedef typename packet_traits<LhsScalar>::type LhsPacket;
62   typedef LhsPacket LhsPacket4Packing;
63 
64   enum {
65     // register block size along the M and N directions
66     // One for the current implementation
67     nr = 1,
68     mr = 1,
69     // Progress made at each iteration of the product loop
70     // also 1 for the current implementation
71     LhsProgress = 1,
72     RhsProgress = 1
73   };
74 };
75 
76 // The signed 8bit Mat-Mat product itself.
77 template <typename Index, typename DataMapper, int mr, int nr,
78           bool ConjugateLhs, bool ConjugateRhs>
79 struct gebp_kernel<QInt8, QInt8, Index, DataMapper, mr, nr, ConjugateLhs,
80                    ConjugateRhs> {
81   EIGEN_DONT_INLINE
82   void operator()(const DataMapper& res, const QInt8* blockA,
83                   const QInt8* blockB, Index rows, Index depth, Index cols,
84                   QInt32 alpha, Index strideA = -1, Index strideB = -1,
85                   Index offsetA = 0, Index offsetB = 0);
86 };
87 
88 template <typename Index, typename DataMapper, int mr, int nr,
89           bool ConjugateLhs, bool ConjugateRhs>
90 EIGEN_DONT_INLINE void gebp_kernel<QInt8, QInt8, Index, DataMapper, mr, nr,
91                                    ConjugateLhs, ConjugateRhs>::
92 operator()(const DataMapper& res, const QInt8* blockA, const QInt8* blockB,
93            Index rows, Index depth, Index cols, QInt32 alpha, Index strideA,
94            Index strideB, Index offsetA, Index offsetB) {
95   EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE);
96   EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE);
97 
98   eigen_assert(alpha.value == 1);
99   eigen_assert(strideA == -1);
100   eigen_assert(strideB == -1);
101   eigen_assert(offsetA == 0);
102   eigen_assert(offsetB == 0);
103 
104   eigen_assert(rows > 0);
105   eigen_assert(cols > 0);
106   eigen_assert(depth > 0);
107   eigen_assert(blockA);
108   eigen_assert(blockB);
109 
110   for (Index j = 0; j < cols; ++j) {
111     Index startB = j * depth;
112 
113     for (Index i = 0; i < rows; ++i) {
114       Index startA = i * depth;
115 
116       for (Index k = 0; k < depth; ++k) {
117         res(i, j) += blockA[startA + k] * blockB[startB + k];
118       }
119     }
120   }
121 }
122 #endif
123 
124 // This definition tackle the case where the lhs is encoded using signed 8bit
125 // integers and the rhs using unsigned 8bit integers.
126 #ifndef EIGEN_USE_OPTIMIZED_INT8_UINT8_MAT_MAT_PRODUCT
127 template <bool _ConjLhs, bool _ConjRhs>
128 class gebp_traits<QInt8, QUInt8, _ConjLhs, _ConjRhs> {
129  public:
130   typedef QInt8 LhsScalar;
131   typedef QUInt8 RhsScalar;
132   typedef QInt32 ResScalar;
133 
134   typedef typename packet_traits<LhsScalar>::type LhsPacket;
135   typedef LhsPacket LhsPacket4Packing;
136 
137   enum {
138     // register block size along the M and N directions
139     // One for the current implementation
140     nr = 1,
141     mr = 1,
142     // Progress made at each iteration of the product loop
143     // also 1 for the current implementation
144     LhsProgress = 1,
145     RhsProgress = 1
146   };
147 };
148 
149 // Mat-Mat product of a signed 8bit lhs with an unsigned 8bit rhs
150 template <typename Index, typename DataMapper, int mr, int nr,
151           bool ConjugateLhs, bool ConjugateRhs>
152 struct gebp_kernel<QInt8, QUInt8, Index, DataMapper, mr, nr, ConjugateLhs,
153                    ConjugateRhs> {
154   EIGEN_DONT_INLINE
155   void operator()(const DataMapper& res, const QInt8* blockA,
156                   const QUInt8* blockB, Index rows, Index depth, Index cols,
157                   QInt32 alpha, Index strideA = -1, Index strideB = -1,
158                   Index offsetA = 0, Index offsetB = 0);
159 };
160 
161 template <typename Index, typename DataMapper, int mr, int nr,
162           bool ConjugateLhs, bool ConjugateRhs>
163 EIGEN_DONT_INLINE void gebp_kernel<QInt8, QUInt8, Index, DataMapper, mr, nr,
164                                    ConjugateLhs, ConjugateRhs>::
165 operator()(const DataMapper& res, const QInt8* blockA, const QUInt8* blockB,
166            Index rows, Index depth, Index cols, QInt32 alpha, Index strideA,
167            Index strideB, Index offsetA, Index offsetB) {
168   EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE);
169   EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE);
170 
171   eigen_assert(alpha.value == 1);
172   eigen_assert(strideA == -1);
173   eigen_assert(strideB == -1);
174   eigen_assert(offsetA == 0);
175   eigen_assert(offsetB == 0);
176 
177   eigen_assert(rows > 0);
178   eigen_assert(cols > 0);
179   eigen_assert(depth > 0);
180   eigen_assert(blockA);
181   eigen_assert(blockB);
182 
183   for (Index j = 0; j < cols; ++j) {
184     Index startB = j * depth;
185 
186     for (Index i = 0; i < rows; ++i) {
187       Index startA = i * depth;
188 
189       for (Index k = 0; k < depth; ++k) {
190         res(i, j) += blockA[startA + k] * blockB[startB + k];
191       }
192     }
193   }
194 }
195 #endif
196 
197 // This definition tackle the case where the khs is encoded using unsigned 8bit
198 // integers and the rhs using signed 8bit integers.
199 #ifndef EIGEN_USE_OPTIMIZED_UINT8_INT8_MAT_MAT_PRODUCT
200 template <bool _ConjLhs, bool _ConjRhs>
201 class gebp_traits<QUInt8, QInt8, _ConjLhs, _ConjRhs> {
202  public:
203   typedef QUInt8 LhsScalar;
204   typedef QInt8 RhsScalar;
205   typedef QInt32 ResScalar;
206 
207   typedef typename packet_traits<LhsScalar>::type LhsPacket;
208   typedef LhsPacket LhsPacket4Packing;
209 
210   enum {
211     // register block size along the M and N directions
212     // One for the current implementation
213     nr = 1,
214     mr = 1,
215     // Progress made at each iteration of the product loop
216     // also 1 for the current implementation
217     LhsProgress = 1,
218     RhsProgress = 1
219   };
220 };
221 
222 // Mat-Mat product of an unsigned 8bit lhs with a signed 8bit rhs
223 template <typename Index, typename DataMapper, int mr, int nr,
224           bool ConjugateLhs, bool ConjugateRhs>
225 struct gebp_kernel<QUInt8, QInt8, Index, DataMapper, mr, nr, ConjugateLhs,
226                    ConjugateRhs> {
227   EIGEN_DONT_INLINE
228   void operator()(const DataMapper& res, const QUInt8* blockA,
229                   const QInt8* blockB, Index rows, Index depth, Index cols,
230                   QInt32 alpha, Index strideA = -1, Index strideB = -1,
231                   Index offsetA = 0, Index offsetB = 0);
232 };
233 
234 template <typename Index, typename DataMapper, int mr, int nr,
235           bool ConjugateLhs, bool ConjugateRhs>
236 EIGEN_DONT_INLINE void gebp_kernel<QUInt8, QInt8, Index, DataMapper, mr, nr,
237                                    ConjugateLhs, ConjugateRhs>::
238 operator()(const DataMapper& res, const QUInt8* blockA, const QInt8* blockB,
239            Index rows, Index depth, Index cols, QInt32 alpha, Index strideA,
240            Index strideB, Index offsetA, Index offsetB) {
241   EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE);
242   EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE);
243 
244   eigen_assert(alpha.value == 1);
245   eigen_assert(strideA == -1);
246   eigen_assert(strideB == -1);
247   eigen_assert(offsetA == 0);
248   eigen_assert(offsetB == 0);
249 
250   eigen_assert(rows > 0);
251   eigen_assert(cols > 0);
252   eigen_assert(depth > 0);
253   eigen_assert(blockA);
254   eigen_assert(blockB);
255 
256   for (Index j = 0; j < cols; ++j) {
257     Index startB = j * depth;
258 
259     for (Index i = 0; i < rows; ++i) {
260       Index startA = i * depth;
261 
262       for (Index k = 0; k < depth; ++k) {
263         res(i, j) += blockA[startA + k] * blockB[startB + k];
264       }
265     }
266   }
267 }
268 #endif
269 
270 #ifndef EIGEN_USE_OPTIMIZED_INT16_INT16_MAT_MAT_PRODUCT
271 
272 template <bool _ConjLhs, bool _ConjRhs>
273 class gebp_traits<QInt16, QInt16, _ConjLhs, _ConjRhs> {
274  public:
275   typedef QInt16 LhsScalar;
276   typedef QInt16 RhsScalar;
277   typedef QInt32 ResScalar;
278 
279   typedef typename packet_traits<LhsScalar>::type LhsPacket;
280   typedef LhsPacket LhsPacket4Packing;
281 
282   enum {
283     // register block size along the M and N directions
284     // One for the current implementation
285     nr = 1,
286     mr = 1,
287     // Progress made at each iteration of the product loop
288     // also 1 for the current implementation
289     LhsProgress = 1,
290     RhsProgress = 1
291   };
292 };
293 
294 // The signed 16bit Mat-Mat product itself.
295 template <typename Index, typename DataMapper, int mr, int nr,
296           bool ConjugateLhs, bool ConjugateRhs>
297 struct gebp_kernel<QInt16, QInt16, Index, DataMapper, mr, nr, ConjugateLhs,
298                    ConjugateRhs> {
299   EIGEN_DONT_INLINE
300   void operator()(const DataMapper& res, const QInt16* blockA,
301                   const QInt16* blockB, Index rows, Index depth, Index cols,
302                   QInt32 alpha, Index strideA = -1, Index strideB = -1,
303                   Index offsetA = 0, Index offsetB = 0);
304 };
305 
306 template <typename Index, typename DataMapper, int mr, int nr,
307           bool ConjugateLhs, bool ConjugateRhs>
308 EIGEN_DONT_INLINE void gebp_kernel<QInt16, QInt16, Index, DataMapper, mr, nr,
309                                    ConjugateLhs, ConjugateRhs>::
310 operator()(const DataMapper& res, const QInt16* blockA, const QInt16* blockB,
311            Index rows, Index depth, Index cols, QInt32 alpha, Index strideA,
312            Index strideB, Index offsetA, Index offsetB) {
313   EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE);
314   EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE);
315 
316   eigen_assert(alpha.value == 1);
317   eigen_assert(strideA == -1);
318   eigen_assert(strideB == -1);
319   eigen_assert(offsetA == 0);
320   eigen_assert(offsetB == 0);
321 
322   eigen_assert(rows > 0);
323   eigen_assert(cols > 0);
324   eigen_assert(depth > 0);
325   eigen_assert(blockA);
326   eigen_assert(blockB);
327 
328   for (Index j = 0; j < cols; ++j) {
329     Index startB = j * depth;
330 
331     for (Index i = 0; i < rows; ++i) {
332       Index startA = i * depth;
333 
334       for (Index k = 0; k < depth; ++k) {
335         res(i, j) += blockA[startA + k] * blockB[startB + k];
336       }
337     }
338   }
339 }
340 #endif
341 
342 }  // namespace internal
343 }  // namespace Eigen
344 
345 #endif  // CXX11_SRC_FIXEDPOINT_MATMATPRODUCT_H_
346