1 // Copyright David Abrahams, Matthias Troyer, Michael Gauckler 2005.
2 // Distributed under the Boost Software License, Version 1.0.
3 // (See accompanying file LICENSE_1_0.txt or copy at
4 // http://www.boost.org/LICENSE_1_0.txt)
5
6 #include <boost/parameter/name.hpp>
7 #include <boost/config/workaround.hpp>
8 #include <boost/timer.hpp>
9 #include <iostream>
10
11 namespace test {
12
13 //
14 // This test measures the abstraction overhead of using the named
15 // parameter interface. Some actual test results have been recorded
16 // in timings.txt in this source file's directory, or
17 // http://www.boost.org/libs/parameter/test/timings.txt.
18 //
19 // Caveats:
20 //
21 // 1. This test penalizes the named parameter library slightly, by
22 // passing two arguments through the named interface, while
23 // only passing one through the plain C++ interface.
24 //
25 // 2. This test does not measure the case where an ArgumentPack is
26 // so large that it doesn't fit in the L1 cache.
27 //
28 // 3. Although we've tried to make this test as general as possible,
29 // we are targeting it at a specific application. Where that
30 // affects design decisions, we've noted it below in ***...***.
31 //
32 // 4. The first time you run this program, the time may not be
33 // representative because of disk and memory cache effects, so
34 // always run it multiple times and ignore the first
35 // measurement. This approach will also allow you to estimate
36 // the statistical error of your test by observing the
37 // variation in the valid times.
38 //
39 // 5. Try to run this program on a machine that's otherwise idle,
40 // or other processes and even device hardware interrupts may
41 // interfere by causing caches to be flushed.
42
43 // Accumulator function object with plain C++ interface
44 template <typename T>
45 struct plain_weight_running_total
46 {
plain_weight_running_totaltest::plain_weight_running_total47 plain_weight_running_total()
48 #if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
49 : sum(T())
50 #else
51 : sum()
52 #endif
53 {
54 }
55
operator ()test::plain_weight_running_total56 void operator()(T w)
57 {
58 this->sum += w;
59 }
60
61 T sum;
62 };
63
64 BOOST_PARAMETER_NAME(weight)
65 BOOST_PARAMETER_NAME(value)
66
67 // Accumulator function object with named parameter interface
68 template <typename T>
69 struct named_param_weight_running_total
70 {
named_param_weight_running_totaltest::named_param_weight_running_total71 named_param_weight_running_total()
72 #if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
73 : sum(T())
74 #else
75 : sum()
76 #endif
77 {
78 }
79
80 template <typename ArgumentPack>
operator ()test::named_param_weight_running_total81 void operator()(ArgumentPack const& variates)
82 {
83 this->sum += variates[test::_weight];
84 }
85
86 T sum;
87 };
88
89 // This value is required to ensure that a smart compiler's dead code
90 // elimination doesn't optimize away anything we're testing. We'll use it
91 // to compute the return code of the executable to make sure it's needed.
92 double live_code;
93
94 // Call objects of the given Accumulator type repeatedly
95 // with x an argument.
96 template <typename Accumulator, typename Arg>
hammer(Arg const & x,long const repeats)97 void hammer(Arg const& x, long const repeats)
98 {
99 // Strategy: because the sum in an accumulator after each call
100 // depends on the previous value of the sum, the CPU's pipeline
101 // might be stalled while waiting for the previous addition to
102 // complete. Therefore, we allocate an array of accumulators,
103 // and update them in sequence, so that there's no dependency
104 // between adjacent addition operations.
105 //
106 // Additionally, if there were only one accumulator, the compiler or
107 // CPU might decide to update the value in a register rather than
108 // writing it back to memory. We want each operation to at least
109 // update the L1 cache. *** Note: This concern is specific to the
110 // particular application at which we're targeting the test. ***
111
112 // This has to be at least as large as the number of simultaneous
113 // accumulations that can be executing in the compiler pipeline. A
114 // safe number here is larger than the machine's maximum pipeline
115 // depth. If you want to test the L2 or L3 cache, or main memory,
116 // you can increase the size of this array. 1024 is an upper limit
117 // on the pipeline depth of current vector machines.
118 std::size_t const number_of_accumulators = 1024;
119
120 Accumulator a[number_of_accumulators];
121
122 for (long iteration = 0; iteration < repeats; ++iteration)
123 {
124 for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
125 {
126 (*ap)(x);
127 }
128 }
129
130 // Accumulate all the partial sums to avoid dead code elimination.
131 for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
132 {
133 test::live_code += ap->sum;
134 }
135 }
136
137 // Measure the time required to hammer accumulators of the given
138 // type with the argument x.
139 template <typename Accumulator, typename T>
measure(T const & x,long const repeats)140 double measure(T const& x, long const repeats)
141 {
142 // Hammer accumulators a couple of times to ensure the instruction
143 // cache is full of our test code, and that we don't measure the cost
144 // of a page fault for accessing the data page containing the memory
145 // where the accumulators will be allocated.
146 test::hammer<Accumulator>(x, repeats);
147 test::hammer<Accumulator>(x, repeats);
148
149 // Now start a timer.
150 boost::timer time;
151 test::hammer<Accumulator>(x, repeats); // This time, we'll measure.
152 return time.elapsed();
153 }
154 }
155
main()156 int main()
157 {
158 // First decide how many repetitions to measure.
159 long repeats = 100;
160 double measured = 0;
161
162 while (measured < 1.0 && repeats <= 10000000)
163 {
164 repeats *= 10;
165
166 boost::timer time;
167
168 test::hammer<test::plain_weight_running_total<double> >(.1, repeats);
169 test::hammer<test::named_param_weight_running_total<double> >(
170 (test::_weight = .1, test::_value = .2), repeats
171 );
172
173 measured = time.elapsed();
174 }
175
176 std::cout
177 << "plain time: "
178 << test::measure<test::plain_weight_running_total<double> >(
179 .1, repeats
180 )
181 << std::endl;
182
183 std::cout
184 << "named parameter time: "
185 << test::measure<test::named_param_weight_running_total<double> >(
186 (test::_weight = .1, test::_value = .2), repeats
187 )
188 << std::endl;
189
190 // This is ultimately responsible for preventing all the test code
191 // from being optimized away. Change this to return 0 and you
192 // unplug the whole test's life support system.
193 return test::live_code < 0.;
194 }
195
196