1 // g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
2 // possible options:
3 // -DEIGEN_DONT_VECTORIZE
4 // -msse2
5
6 // #define EIGEN_DEFAULT_TO_ROW_MAJOR
7 #define _FLOAT
8
9 #include <iostream>
10
11 #include <Eigen/Core>
12 #include "BenchTimer.h"
13
14 // include the BLAS headers
15 extern "C" {
16 #include <cblas.h>
17 }
18 #include <string>
19
20 #ifdef _FLOAT
21 typedef float Scalar;
22 #define CBLAS_GEMM cblas_sgemm
23 #else
24 typedef double Scalar;
25 #define CBLAS_GEMM cblas_dgemm
26 #endif
27
28
29 typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
30 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
31 void check_product(int M, int N, int K);
32 void check_product(void);
33
main(int argc,char * argv[])34 int main(int argc, char *argv[])
35 {
36 // disable SSE exceptions
37 #ifdef __GNUC__
38 {
39 int aux;
40 asm(
41 "stmxcsr %[aux] \n\t"
42 "orl $32832, %[aux] \n\t"
43 "ldmxcsr %[aux] \n\t"
44 : : [aux] "m" (aux));
45 }
46 #endif
47
48 int nbtries=1, nbloops=1, M, N, K;
49
50 if (argc==2)
51 {
52 if (std::string(argv[1])=="check")
53 check_product();
54 else
55 M = N = K = atoi(argv[1]);
56 }
57 else if ((argc==3) && (std::string(argv[1])=="auto"))
58 {
59 M = N = K = atoi(argv[2]);
60 nbloops = 1000000000/(M*M*M);
61 if (nbloops<1)
62 nbloops = 1;
63 nbtries = 6;
64 }
65 else if (argc==4)
66 {
67 M = N = K = atoi(argv[1]);
68 nbloops = atoi(argv[2]);
69 nbtries = atoi(argv[3]);
70 }
71 else if (argc==6)
72 {
73 M = atoi(argv[1]);
74 N = atoi(argv[2]);
75 K = atoi(argv[3]);
76 nbloops = atoi(argv[4]);
77 nbtries = atoi(argv[5]);
78 }
79 else
80 {
81 std::cout << "Usage: " << argv[0] << " size \n";
82 std::cout << "Usage: " << argv[0] << " auto size\n";
83 std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
84 std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
85 std::cout << "Usage: " << argv[0] << " check\n";
86 std::cout << "Options:\n";
87 std::cout << " size unique size of the 2 matrices (integer)\n";
88 std::cout << " auto automatically set the number of repetitions and tries\n";
89 std::cout << " nbloops number of times the GEMM routines is executed\n";
90 std::cout << " nbtries number of times the loop is benched (return the best try)\n";
91 std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n";
92 std::cout << " check check eigen product using cblas as a reference\n";
93 exit(1);
94 }
95
96 double nbmad = double(M) * double(N) * double(K) * double(nbloops);
97
98 if (!(std::string(argv[1])=="auto"))
99 std::cout << M << " x " << N << " x " << K << "\n";
100
101 Scalar alpha, beta;
102 MyMatrix ma(M,K), mb(K,N), mc(M,N);
103 ma = MyMatrix::Random(M,K);
104 mb = MyMatrix::Random(K,N);
105 mc = MyMatrix::Random(M,N);
106
107 Eigen::BenchTimer timer;
108
109 // we simply compute c += a*b, so:
110 alpha = 1;
111 beta = 1;
112
113 // bench cblas
114 // ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
115 if (!(std::string(argv[1])=="auto"))
116 {
117 timer.reset();
118 for (uint k=0 ; k<nbtries ; ++k)
119 {
120 timer.start();
121 for (uint j=0 ; j<nbloops ; ++j)
122 #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
123 CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
124 #else
125 CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
126 #endif
127 timer.stop();
128 }
129 if (!(std::string(argv[1])=="auto"))
130 std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
131 else
132 std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
133 }
134
135 // clear
136 ma = MyMatrix::Random(M,K);
137 mb = MyMatrix::Random(K,N);
138 mc = MyMatrix::Random(M,N);
139
140 // eigen
141 // if (!(std::string(argv[1])=="auto"))
142 {
143 timer.reset();
144 for (uint k=0 ; k<nbtries ; ++k)
145 {
146 timer.start();
147 bench_eigengemm(mc, ma, mb, nbloops);
148 timer.stop();
149 }
150 if (!(std::string(argv[1])=="auto"))
151 std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
152 else
153 std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
154 }
155
156 std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
157 std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
158
159
160 return 0;
161 }
162
163 using namespace Eigen;
164
bench_eigengemm(MyMatrix & mc,const MyMatrix & ma,const MyMatrix & mb,int nbloops)165 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
166 {
167 for (uint j=0 ; j<nbloops ; ++j)
168 mc.noalias() += ma * mb;
169 }
170
171 #define MYVERIFY(A,M) if (!(A)) { \
172 std::cout << "FAIL: " << M << "\n"; \
173 }
check_product(int M,int N,int K)174 void check_product(int M, int N, int K)
175 {
176 MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
177 ma = MyMatrix::Random(M,K);
178 mb = MyMatrix::Random(K,N);
179 maT = ma.transpose();
180 mbT = mb.transpose();
181 mc = MyMatrix::Random(M,N);
182
183 MyMatrix::Scalar eps = 1e-4;
184
185 meigen = mref = mc;
186 CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
187 meigen += ma * mb;
188 MYVERIFY(meigen.isApprox(mref, eps),". * .");
189
190 meigen = mref = mc;
191 CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
192 meigen += maT.transpose() * mb;
193 MYVERIFY(meigen.isApprox(mref, eps),"T * .");
194
195 meigen = mref = mc;
196 CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
197 meigen += (maT.transpose()) * (mbT.transpose());
198 MYVERIFY(meigen.isApprox(mref, eps),"T * T");
199
200 meigen = mref = mc;
201 CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
202 meigen += ma * mbT.transpose();
203 MYVERIFY(meigen.isApprox(mref, eps),". * T");
204 }
205
check_product(void)206 void check_product(void)
207 {
208 int M, N, K;
209 for (uint i=0; i<1000; ++i)
210 {
211 M = internal::random<int>(1,64);
212 N = internal::random<int>(1,768);
213 K = internal::random<int>(1,768);
214 M = (0 + M) * 1;
215 std::cout << M << " x " << N << " x " << K << "\n";
216 check_product(M, N, K);
217 }
218 }
219
220