• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Boost.Units - A C++ library for zero-overhead dimensional analysis and
2 // unit/quantity manipulation and conversion
3 //
4 // Copyright (C) 2003-2008 Matthias Christian Schabel
5 // Copyright (C) 2008 Steven Watanabe
6 //
7 // Distributed under the Boost Software License, Version 1.0. (See
8 // accompanying file LICENSE_1_0.txt or copy at
9 // http://www.boost.org/LICENSE_1_0.txt)
10 
11 /**
12 \file
13 
14 \brief performance.cpp
15 
16 \details
17 Test runtime performance.
18 
19 Output:
20 @verbatim
21 
22 multiplying ublas::matrix<double>(1000, 1000) : 25.03 seconds
23 multiplying ublas::matrix<quantity>(1000, 1000) : 24.49 seconds
24 tiled_matrix_multiply<double>(1000, 1000) : 1.12 seconds
25 tiled_matrix_multiply<quantity>(1000, 1000) : 1.16 seconds
26 solving y' = 1 - x + 4 * y with double: 1.97 seconds
27 solving y' = 1 - x + 4 * y with quantity: 1.84 seconds
28 
29 @endverbatim
30 **/
31 
32 #define _SCL_SECURE_NO_WARNINGS
33 
34 #include <cstdlib>
35 #include <ctime>
36 #include <algorithm>
37 #include <iostream>
38 #include <iomanip>
39 
40 #include <boost/config.hpp>
41 #include <boost/timer.hpp>
42 #include <boost/utility/result_of.hpp>
43 
44 #ifdef BOOST_MSVC
45 #pragma warning(push)
46 #pragma warning(disable:4267; disable:4127; disable:4244; disable:4100)
47 #endif
48 
49 #include <boost/numeric/ublas/matrix.hpp>
50 
51 #ifdef BOOST_MSVC
52 #pragma warning(pop)
53 #endif
54 
55 #include <boost/units/quantity.hpp>
56 #include <boost/units/systems/si.hpp>
57 #include <boost/units/cmath.hpp>
58 #include <boost/units/io.hpp>
59 
60 enum {
61     tile_block_size = 24
62 };
63 
64 template<class T0, class T1, class Out>
tiled_multiply_carray_inner(T0 * first,T1 * second,Out * out,int totalwidth,int width2,int height1,int common)65 void tiled_multiply_carray_inner(T0* first,
66                                  T1* second,
67                                  Out* out,
68                                  int totalwidth,
69                                  int width2,
70                                  int height1,
71                                  int common) {
72     for(int j = 0; j < height1; ++j) {
73         for(int i = 0; i < width2; ++i) {
74             Out value = out[j * totalwidth + i];
75             for(int k = 0; k < common; ++k) {
76                 value += first[k + totalwidth * j] * second[k * totalwidth + i];
77             }
78             out[j * totalwidth + i] = value;
79         }
80     }
81 }
82 
83 template<class T0, class T1, class Out>
tiled_multiply_carray_outer(T0 * first,T1 * second,Out * out,int width2,int height1,int common)84 void tiled_multiply_carray_outer(T0* first,
85                                  T1* second,
86                                  Out* out,
87                                  int width2,
88                                  int height1,
89                                  int common) {
90     std::fill_n(out, width2 * height1, Out());
91     int j = 0;
92     for(; j < height1 - tile_block_size; j += tile_block_size) {
93         int i = 0;
94         for(; i < width2 - tile_block_size; i += tile_block_size) {
95             int k = 0;
96             for(; k < common - tile_block_size; k += tile_block_size) {
97                 tiled_multiply_carray_inner(
98                     &first[k + width2 * j],
99                     &second[k * width2 + i],
100                     &out[j * width2 + i],
101                     width2,
102                     tile_block_size,
103                     tile_block_size,
104                     tile_block_size);
105             }
106             tiled_multiply_carray_inner(
107                 &first[k + width2 * j],
108                 &second[k * width2 + i],
109                 &out[j * width2 + i],
110                 width2,
111                 tile_block_size,
112                 tile_block_size,
113                 common - k);
114         }
115         int k = 0;
116         for(; k < common - tile_block_size; k += tile_block_size) {
117             tiled_multiply_carray_inner(
118                 &first[k + width2 * j],
119                 &second[k * width2 + i],
120                 &out[j * width2 + i],
121                 width2, width2 - i,
122                 tile_block_size,
123                 tile_block_size);
124         }
125         tiled_multiply_carray_inner(
126             &first[k + width2 * j],
127             &second[k * width2 + i],
128             &out[j * width2 + i],
129             width2, width2 - i,
130             tile_block_size,
131             common - k);
132     }
133     int i = 0;
134     for(; i < width2 - tile_block_size; i += tile_block_size) {
135         int k = 0;
136         for(; k < common - tile_block_size; k += tile_block_size) {
137             tiled_multiply_carray_inner(
138                 &first[k + width2 * j],
139                 &second[k * width2 + i],
140                 &out[j * width2 + i],
141                 width2,
142                 tile_block_size,
143                 height1 - j,
144                 tile_block_size);
145         }
146         tiled_multiply_carray_inner(
147             &first[k + width2 * j],
148             &second[k * width2 + i],
149             &out[j * width2 + i],
150             width2,
151             tile_block_size,
152             height1 - j,
153             common - k);
154     }
155     int k = 0;
156     for(; k < common - tile_block_size; k += tile_block_size) {
157         tiled_multiply_carray_inner(
158             &first[k + width2 * j],
159             &second[k * width2 + i],
160             &out[j * width2 + i],
161             width2,
162             width2 - i,
163             height1 - j,
164             tile_block_size);
165     }
166     tiled_multiply_carray_inner(
167         &first[k + width2 * j],
168         &second[k * width2 + i],
169         &out[j * width2 + i],
170         width2,
171         width2 - i,
172         height1 - j,
173         common - k);
174 }
175 
176 enum { max_value = 1000};
177 
178 template<class F, class T, class N, class R>
179 BOOST_CXX14_CONSTEXPR
solve_differential_equation(F f,T lower,T upper,N steps,R start)180 R solve_differential_equation(F f, T lower, T upper, N steps, R start) {
181     typedef typename F::template result<T, R>::type f_result;
182     T h = (upper - lower) / (1.0*steps);
183     for(N i = N(); i < steps; ++i) {
184         R y = start;
185         T x = lower + h * (1.0*i);
186         f_result k1 = f(x, y);
187         f_result k2 = f(x + h / 2.0, y + h * k1 / 2.0);
188         f_result k3 = f(x + h / 2.0, y + h * k2 / 2.0);
189         f_result k4 = f(x + h, y + h * k3);
190         start = y + h * (k1 + 2.0 * k2 + 2.0 * k3 + k4) / 6.0;
191     }
192     return(start);
193 }
194 
195 using namespace boost::units;
196 
197 //y' = 1 - x + 4 * y
198 struct f {
199     template<class Arg1, class Arg2> struct result;
200 
operator ()f201     BOOST_CONSTEXPR double operator()(const double& x, const double& y) const {
202         return(1.0 - x + 4.0 * y);
203     }
204 
205     boost::units::quantity<boost::units::si::velocity>
operator ()f206     BOOST_CONSTEXPR operator()(const quantity<si::time>& x,
207                                const quantity<si::length>& y) const {
208         using namespace boost::units;
209         using namespace si;
210         return(1.0 * meters / second -
211                 x * meters / pow<2>(seconds) +
212                 4.0 * y / seconds );
213     }
214 };
215 
216 template<>
217 struct f::result<double,double> {
218     typedef double type;
219 };
220 
221 template<>
222 struct f::result<quantity<si::time>, quantity<si::length> > {
223     typedef quantity<si::velocity> type;
224 };
225 
226 
227 
228 //y' = 1 - x + 4 * y
229 //y' - 4 * y = 1 - x
230 //e^(-4 * x) * (dy - 4 * y * dx) = e^(-4 * x) * (1 - x) * dx
231 //d/dx(y * e ^ (-4 * x)) = e ^ (-4 * x) (1 - x) * dx
232 
233 //d/dx(y * e ^ (-4 * x)) = e ^ (-4 * x) * dx - x * e ^ (-4 * x) * dx
234 //d/dx(y * e ^ (-4 * x)) = d/dx((-3/16 + 1/4 * x) * e ^ (-4 * x))
235 //y * e ^ (-4 * x) = (-3/16 + 1/4 * x) * e ^ (-4 * x) + C
236 //y = (-3/16 + 1/4 * x) + C/e ^ (-4 * x)
237 //y = 1/4 * x - 3/16 + C * e ^ (4 * x)
238 
239 //y(0) = 1
240 //1 = - 3/16 + C
241 //C = 19/16
242 //y(x) = 1/4 * x - 3/16 + 19/16 * e ^ (4 * x)
243 
244 
245 
main()246 int main() {
247     boost::numeric::ublas::matrix<double> ublas_result;
248     {
249         boost::numeric::ublas::matrix<double> m1(max_value, max_value);
250         boost::numeric::ublas::matrix<double> m2(max_value, max_value);
251         std::srand(1492);
252         for(int i = 0; i < max_value; ++i) {
253             for(int j = 0; j < max_value; ++j) {
254                 m1(i,j) = std::rand();
255                 m2(i,j) = std::rand();
256             }
257         }
258         std::cout << "multiplying ublas::matrix<double>("
259                   << max_value << ", " << max_value << ") : ";
260         boost::timer timer;
261         ublas_result = (prod(m1, m2));
262         std::cout << timer.elapsed() << " seconds" << std::endl;
263     }
264     typedef boost::numeric::ublas::matrix<
265         boost::units::quantity<boost::units::si::dimensionless>
266     > matrix_type;
267     matrix_type ublas_resultq;
268     {
269         matrix_type m1(max_value, max_value);
270         matrix_type m2(max_value, max_value);
271         std::srand(1492);
272         for(int i = 0; i < max_value; ++i) {
273             for(int j = 0; j < max_value; ++j) {
274                 m1(i,j) = std::rand();
275                 m2(i,j) = std::rand();
276             }
277         }
278         std::cout << "multiplying ublas::matrix<quantity>("
279                   << max_value << ", " << max_value << ") : ";
280         boost::timer timer;
281         ublas_resultq = (prod(m1, m2));
282         std::cout << timer.elapsed() << " seconds" << std::endl;
283     }
284     std::vector<double> cresult(max_value * max_value);
285     {
286         std::vector<double> m1(max_value * max_value);
287         std::vector<double> m2(max_value * max_value);
288         std::srand(1492);
289         for(int i = 0; i < max_value * max_value; ++i) {
290             m1[i] = std::rand();
291             m2[i] = std::rand();
292         }
293         std::cout << "tiled_matrix_multiply<double>("
294                   << max_value << ", " << max_value << ") : ";
295         boost::timer timer;
296         tiled_multiply_carray_outer(
297             &m1[0],
298             &m2[0],
299             &cresult[0],
300             max_value,
301             max_value,
302             max_value);
303         std::cout << timer.elapsed() << " seconds" << std::endl;
304     }
305     std::vector<
306         boost::units::quantity<boost::units::si::energy>
307     >  cresultq(max_value * max_value);
308     {
309         std::vector<
310             boost::units::quantity<boost::units::si::force>
311         > m1(max_value * max_value);
312         std::vector<
313             boost::units::quantity<boost::units::si::length>
314         > m2(max_value * max_value);
315         std::srand(1492);
316         for(int i = 0; i < max_value * max_value; ++i) {
317             m1[i] = std::rand() * boost::units::si::newtons;
318             m2[i] = std::rand() * boost::units::si::meters;
319         }
320         std::cout << "tiled_matrix_multiply<quantity>("
321                   << max_value << ", " << max_value << ") : ";
322         boost::timer timer;
323         tiled_multiply_carray_outer(
324             &m1[0],
325             &m2[0],
326             &cresultq[0],
327             max_value,
328             max_value,
329             max_value);
330         std::cout << timer.elapsed() << " seconds" << std::endl;
331     }
332     for(int i = 0; i < max_value; ++i) {
333         for(int j = 0; j < max_value; ++j) {
334             const double diff =
335                 std::abs(ublas_result(i,j) - cresult[i * max_value + j]);
336             if(diff > ublas_result(i,j) /1e14) {
337                 std::cout << std::setprecision(15) << "Uh Oh. ublas_result("
338                           << i << "," << j << ") = " << ublas_result(i,j)
339                           << std::endl
340                           << "cresult[" << i << " * " << max_value << " + "
341                           << j << "] = " << cresult[i * max_value + j]
342                           << std::endl;
343                 return(EXIT_FAILURE);
344             }
345         }
346     }
347     {
348         std::vector<double> values(1000);
349         std::cout << "solving y' = 1 - x + 4 * y with double: ";
350         boost::timer timer;
351         for(int i = 0; i < 1000; ++i) {
352             const double x = .1 * i;
353             values[i] = solve_differential_equation(f(), 0.0, x, i * 100, 1.0);
354         }
355         std::cout << timer.elapsed() << " seconds" << std::endl;
356         for(int i = 0; i < 1000; ++i) {
357             const double x = .1 * i;
358             const double value = 1.0/4.0 * x - 3.0/16.0 + 19.0/16.0 * std::exp(4 * x);
359             if(std::abs(values[i] - value) > value / 1e9) {
360                 std::cout << std::setprecision(15) << "i = : " << i
361                           << ", value = " << value << " approx = "  << values[i]
362                           << std::endl;
363                 return(EXIT_FAILURE);
364             }
365         }
366     }
367     {
368         using namespace boost::units;
369         using namespace si;
370         std::vector<quantity<length> > values(1000);
371         std::cout << "solving y' = 1 - x + 4 * y with quantity: ";
372         boost::timer timer;
373         for(int i = 0; i < 1000; ++i) {
374             const quantity<si::time> x = .1 * i * seconds;
375             values[i] = solve_differential_equation(
376                 f(),
377                 0.0 * seconds,
378                 x,
379                 i * 100,
380                 1.0 * meters);
381         }
382         std::cout << timer.elapsed() << " seconds" << std::endl;
383         for(int i = 0; i < 1000; ++i) {
384             const double x = .1 * i;
385             const quantity<si::length> value =
386                 (1.0/4.0 * x - 3.0/16.0 + 19.0/16.0 * std::exp(4 * x)) * meters;
387             if(abs(values[i] - value) > value / 1e9) {
388                 std::cout << std::setprecision(15) << "i = : " << i
389                           << ", value = " << value << " approx = "
390                           << values[i] << std::endl;
391                 return(EXIT_FAILURE);
392             }
393         }
394     }
395 }
396