• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (c) 2011, Intel Corporation. All rights reserved.
3 
4  Redistribution and use in source and binary forms, with or without modification,
5  are permitted provided that the following conditions are met:
6 
7  * Redistributions of source code must retain the above copyright notice, this
8    list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright notice,
10    this list of conditions and the following disclaimer in the documentation
11    and/or other materials provided with the distribution.
12  * Neither the name of Intel Corporation nor the names of its contributors may
13    be used to endorse or promote products derived from this software without
14    specific prior written permission.
15 
16  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
23  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 //
27  ********************************************************************************
28  *   Content : Eigen bindings to BLAS F77
29  *   Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
30  ********************************************************************************
31 */
32 
33 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
34 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
35 
36 namespace Eigen {
37 
38 namespace internal {
39 
40 
41 /* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
42 
43 #define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
44 template <typename Index, \
45           int LhsStorageOrder, bool ConjugateLhs, \
46           int RhsStorageOrder, bool ConjugateRhs> \
47 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
48 {\
49 \
50   static void run( \
51     Index rows, Index cols, \
52     const EIGTYPE* _lhs, Index lhsStride, \
53     const EIGTYPE* _rhs, Index rhsStride, \
54     EIGTYPE* res,        Index resIncr, Index resStride, \
55     EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
56   { \
57     EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
58     eigen_assert(resIncr == 1); \
59     char side='L', uplo='L'; \
60     BlasIndex m, n, lda, ldb, ldc; \
61     const EIGTYPE *a, *b; \
62     EIGTYPE beta(1); \
63     MatrixX##EIGPREFIX b_tmp; \
64 \
65 /* Set transpose options */ \
66 /* Set m, n, k */ \
67     m = convert_index<BlasIndex>(rows);  \
68     n = convert_index<BlasIndex>(cols);  \
69 \
70 /* Set lda, ldb, ldc */ \
71     lda = convert_index<BlasIndex>(lhsStride); \
72     ldb = convert_index<BlasIndex>(rhsStride); \
73     ldc = convert_index<BlasIndex>(resStride); \
74 \
75 /* Set a, b, c */ \
76     if (LhsStorageOrder==RowMajor) uplo='U'; \
77     a = _lhs; \
78 \
79     if (RhsStorageOrder==RowMajor) { \
80       Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
81       b_tmp = rhs.adjoint(); \
82       b = b_tmp.data(); \
83       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
84     } else b = _rhs; \
85 \
86     BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
87 \
88   } \
89 };
90 
91 
92 #define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
93 template <typename Index, \
94           int LhsStorageOrder, bool ConjugateLhs, \
95           int RhsStorageOrder, bool ConjugateRhs> \
96 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
97 {\
98   static void run( \
99     Index rows, Index cols, \
100     const EIGTYPE* _lhs, Index lhsStride, \
101     const EIGTYPE* _rhs, Index rhsStride, \
102     EIGTYPE* res,        Index resIncr, Index resStride, \
103     EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
104   { \
105     EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
106     eigen_assert(resIncr == 1); \
107     char side='L', uplo='L'; \
108     BlasIndex m, n, lda, ldb, ldc; \
109     const EIGTYPE *a, *b; \
110     EIGTYPE beta(1); \
111     MatrixX##EIGPREFIX b_tmp; \
112     Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
113 \
114 /* Set transpose options */ \
115 /* Set m, n, k */ \
116     m = convert_index<BlasIndex>(rows); \
117     n = convert_index<BlasIndex>(cols); \
118 \
119 /* Set lda, ldb, ldc */ \
120     lda = convert_index<BlasIndex>(lhsStride); \
121     ldb = convert_index<BlasIndex>(rhsStride); \
122     ldc = convert_index<BlasIndex>(resStride); \
123 \
124 /* Set a, b, c */ \
125     if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
126       Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
127       a_tmp = lhs.conjugate(); \
128       a = a_tmp.data(); \
129       lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
130     } else a = _lhs; \
131     if (LhsStorageOrder==RowMajor) uplo='U'; \
132 \
133     if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
134        b = _rhs; } \
135     else { \
136       if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
137         Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
138         b_tmp = rhs.conjugate(); \
139       } else \
140       if (ConjugateRhs) { \
141         Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
142         b_tmp = rhs.adjoint(); \
143       } else { \
144         Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
145         b_tmp = rhs.transpose(); \
146       } \
147       b = b_tmp.data(); \
148       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
149     } \
150 \
151     BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
152 \
153   } \
154 };
155 
156 #ifdef EIGEN_USE_MKL
157 EIGEN_BLAS_SYMM_L(double, double, d, dsymm)
158 EIGEN_BLAS_SYMM_L(float, float, f, ssymm)
159 EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)
160 EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)
161 #else
162 EIGEN_BLAS_SYMM_L(double, double, d, dsymm_)
163 EIGEN_BLAS_SYMM_L(float, float, f, ssymm_)
164 EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)
165 EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
166 #endif
167 
168 /* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
169 
170 #define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
171 template <typename Index, \
172           int LhsStorageOrder, bool ConjugateLhs, \
173           int RhsStorageOrder, bool ConjugateRhs> \
174 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
175 {\
176 \
177   static void run( \
178     Index rows, Index cols, \
179     const EIGTYPE* _lhs, Index lhsStride, \
180     const EIGTYPE* _rhs, Index rhsStride, \
181     EIGTYPE* res,        Index resIncr, Index resStride, \
182     EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
183   { \
184     EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
185     eigen_assert(resIncr == 1); \
186     char side='R', uplo='L'; \
187     BlasIndex m, n, lda, ldb, ldc; \
188     const EIGTYPE *a, *b; \
189     EIGTYPE beta(1); \
190     MatrixX##EIGPREFIX b_tmp; \
191 \
192 /* Set m, n, k */ \
193     m = convert_index<BlasIndex>(rows);  \
194     n = convert_index<BlasIndex>(cols);  \
195 \
196 /* Set lda, ldb, ldc */ \
197     lda = convert_index<BlasIndex>(rhsStride); \
198     ldb = convert_index<BlasIndex>(lhsStride); \
199     ldc = convert_index<BlasIndex>(resStride); \
200 \
201 /* Set a, b, c */ \
202     if (RhsStorageOrder==RowMajor) uplo='U'; \
203     a = _rhs; \
204 \
205     if (LhsStorageOrder==RowMajor) { \
206       Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
207       b_tmp = lhs.adjoint(); \
208       b = b_tmp.data(); \
209       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
210     } else b = _lhs; \
211 \
212     BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
213 \
214   } \
215 };
216 
217 
218 #define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
219 template <typename Index, \
220           int LhsStorageOrder, bool ConjugateLhs, \
221           int RhsStorageOrder, bool ConjugateRhs> \
222 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
223 {\
224   static void run( \
225     Index rows, Index cols, \
226     const EIGTYPE* _lhs, Index lhsStride, \
227     const EIGTYPE* _rhs, Index rhsStride, \
228     EIGTYPE* res,        Index resIncr, Index resStride, \
229     EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
230   { \
231     EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
232     eigen_assert(resIncr == 1); \
233     char side='R', uplo='L'; \
234     BlasIndex m, n, lda, ldb, ldc; \
235     const EIGTYPE *a, *b; \
236     EIGTYPE beta(1); \
237     MatrixX##EIGPREFIX b_tmp; \
238     Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
239 \
240 /* Set m, n, k */ \
241     m = convert_index<BlasIndex>(rows); \
242     n = convert_index<BlasIndex>(cols); \
243 \
244 /* Set lda, ldb, ldc */ \
245     lda = convert_index<BlasIndex>(rhsStride); \
246     ldb = convert_index<BlasIndex>(lhsStride); \
247     ldc = convert_index<BlasIndex>(resStride); \
248 \
249 /* Set a, b, c */ \
250     if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
251       Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
252       a_tmp = rhs.conjugate(); \
253       a = a_tmp.data(); \
254       lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
255     } else a = _rhs; \
256     if (RhsStorageOrder==RowMajor) uplo='U'; \
257 \
258     if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
259        b = _lhs; } \
260     else { \
261       if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
262         Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
263         b_tmp = lhs.conjugate(); \
264       } else \
265       if (ConjugateLhs) { \
266         Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
267         b_tmp = lhs.adjoint(); \
268       } else { \
269         Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
270         b_tmp = lhs.transpose(); \
271       } \
272       b = b_tmp.data(); \
273       ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
274     } \
275 \
276     BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
277   } \
278 };
279 
280 #ifdef EIGEN_USE_MKL
281 EIGEN_BLAS_SYMM_R(double, double, d, dsymm)
282 EIGEN_BLAS_SYMM_R(float, float, f, ssymm)
283 EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)
284 EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)
285 #else
286 EIGEN_BLAS_SYMM_R(double, double, d, dsymm_)
287 EIGEN_BLAS_SYMM_R(float, float, f, ssymm_)
288 EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)
289 EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)
290 #endif
291 } // end namespace internal
292 
293 } // end namespace Eigen
294 
295 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
296