• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
2 // possible options:
3 //    -DEIGEN_DONT_VECTORIZE
4 //    -msse2
5 
6 // #define EIGEN_DEFAULT_TO_ROW_MAJOR
7 #define _FLOAT
8 
9 #include <iostream>
10 
11 #include <Eigen/Core>
12 #include "BenchTimer.h"
13 
14 // include the BLAS headers
15 extern "C" {
16 #include <cblas.h>
17 }
18 #include <string>
19 
20 #ifdef _FLOAT
21 typedef float Scalar;
22 #define CBLAS_GEMM cblas_sgemm
23 #else
24 typedef double Scalar;
25 #define CBLAS_GEMM cblas_dgemm
26 #endif
27 
28 
29 typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
30 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
31 void check_product(int M, int N, int K);
32 void check_product(void);
33 
main(int argc,char * argv[])34 int main(int argc, char *argv[])
35 {
36   // disable SSE exceptions
37   #ifdef __GNUC__
38   {
39     int aux;
40     asm(
41     "stmxcsr   %[aux]           \n\t"
42     "orl       $32832, %[aux]   \n\t"
43     "ldmxcsr   %[aux]           \n\t"
44     : : [aux] "m" (aux));
45   }
46   #endif
47 
48   int nbtries=1, nbloops=1, M, N, K;
49 
50   if (argc==2)
51   {
52     if (std::string(argv[1])=="check")
53       check_product();
54     else
55       M = N = K = atoi(argv[1]);
56   }
57   else if ((argc==3) && (std::string(argv[1])=="auto"))
58   {
59     M = N = K = atoi(argv[2]);
60     nbloops = 1000000000/(M*M*M);
61     if (nbloops<1)
62       nbloops = 1;
63     nbtries = 6;
64   }
65   else if (argc==4)
66   {
67     M = N = K = atoi(argv[1]);
68     nbloops = atoi(argv[2]);
69     nbtries = atoi(argv[3]);
70   }
71   else if (argc==6)
72   {
73     M = atoi(argv[1]);
74     N = atoi(argv[2]);
75     K = atoi(argv[3]);
76     nbloops = atoi(argv[4]);
77     nbtries = atoi(argv[5]);
78   }
79   else
80   {
81     std::cout << "Usage: " << argv[0] << " size  \n";
82     std::cout << "Usage: " << argv[0] << " auto size\n";
83     std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
84     std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
85     std::cout << "Usage: " << argv[0] << " check\n";
86     std::cout << "Options:\n";
87     std::cout << "    size       unique size of the 2 matrices (integer)\n";
88     std::cout << "    auto       automatically set the number of repetitions and tries\n";
89     std::cout << "    nbloops    number of times the GEMM routines is executed\n";
90     std::cout << "    nbtries    number of times the loop is benched (return the best try)\n";
91     std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN (integers)\n";
92     std::cout << "    check      check eigen product using cblas as a reference\n";
93     exit(1);
94   }
95 
96   double nbmad = double(M) * double(N) * double(K) * double(nbloops);
97 
98   if (!(std::string(argv[1])=="auto"))
99     std::cout << M << " x " << N << " x " << K << "\n";
100 
101   Scalar alpha, beta;
102   MyMatrix ma(M,K), mb(K,N), mc(M,N);
103   ma = MyMatrix::Random(M,K);
104   mb = MyMatrix::Random(K,N);
105   mc = MyMatrix::Random(M,N);
106 
107   Eigen::BenchTimer timer;
108 
109   // we simply compute c += a*b, so:
110   alpha = 1;
111   beta = 1;
112 
113   // bench cblas
114   // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
115   if (!(std::string(argv[1])=="auto"))
116   {
117     timer.reset();
118     for (uint k=0 ; k<nbtries ; ++k)
119     {
120         timer.start();
121         for (uint j=0 ; j<nbloops ; ++j)
122               #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
123               CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
124               #else
125               CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
126               #endif
127         timer.stop();
128     }
129     if (!(std::string(argv[1])=="auto"))
130       std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
131     else
132         std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
133   }
134 
135   // clear
136   ma = MyMatrix::Random(M,K);
137   mb = MyMatrix::Random(K,N);
138   mc = MyMatrix::Random(M,N);
139 
140   // eigen
141 //   if (!(std::string(argv[1])=="auto"))
142   {
143       timer.reset();
144       for (uint k=0 ; k<nbtries ; ++k)
145       {
146           timer.start();
147           bench_eigengemm(mc, ma, mb, nbloops);
148           timer.stop();
149       }
150       if (!(std::string(argv[1])=="auto"))
151         std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
152       else
153         std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
154   }
155 
156   std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
157   std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
158 
159 
160   return 0;
161 }
162 
163 using namespace Eigen;
164 
bench_eigengemm(MyMatrix & mc,const MyMatrix & ma,const MyMatrix & mb,int nbloops)165 void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
166 {
167   for (uint j=0 ; j<nbloops ; ++j)
168       mc.noalias() += ma * mb;
169 }
170 
171 #define MYVERIFY(A,M) if (!(A)) { \
172     std::cout << "FAIL: " << M << "\n"; \
173   }
check_product(int M,int N,int K)174 void check_product(int M, int N, int K)
175 {
176   MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
177   ma = MyMatrix::Random(M,K);
178   mb = MyMatrix::Random(K,N);
179   maT = ma.transpose();
180   mbT = mb.transpose();
181   mc = MyMatrix::Random(M,N);
182 
183   MyMatrix::Scalar eps = 1e-4;
184 
185   meigen = mref = mc;
186   CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
187   meigen += ma * mb;
188   MYVERIFY(meigen.isApprox(mref, eps),". * .");
189 
190   meigen = mref = mc;
191   CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
192   meigen += maT.transpose() * mb;
193   MYVERIFY(meigen.isApprox(mref, eps),"T * .");
194 
195   meigen = mref = mc;
196   CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
197   meigen += (maT.transpose()) * (mbT.transpose());
198   MYVERIFY(meigen.isApprox(mref, eps),"T * T");
199 
200   meigen = mref = mc;
201   CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
202   meigen += ma * mbT.transpose();
203   MYVERIFY(meigen.isApprox(mref, eps),". * T");
204 }
205 
check_product(void)206 void check_product(void)
207 {
208   int M, N, K;
209   for (uint i=0; i<1000; ++i)
210   {
211     M = internal::random<int>(1,64);
212     N = internal::random<int>(1,768);
213     K = internal::random<int>(1,768);
214     M = (0 + M) * 1;
215     std::cout << M << " x " << N << " x " << K << "\n";
216     check_product(M, N, K);
217   }
218 }
219 
220