• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright David Abrahams, Matthias Troyer, Michael Gauckler 2005.
2 // Distributed under the Boost Software License, Version 1.0.
3 // (See accompanying file LICENSE_1_0.txt or copy at
4 // http://www.boost.org/LICENSE_1_0.txt)
5 
6 #include <boost/parameter/name.hpp>
7 #include <boost/config/workaround.hpp>
8 #include <boost/timer.hpp>
9 #include <iostream>
10 
11 namespace test {
12 
13     //
14     // This test measures the abstraction overhead of using the named
15     // parameter interface.  Some actual test results have been recorded
16     // in timings.txt in this source file's directory, or
17     // http://www.boost.org/libs/parameter/test/timings.txt.
18     //
19     // Caveats:
20     //
21     //   1. This test penalizes the named parameter library slightly, by
22     //      passing two arguments through the named interface, while
23     //      only passing one through the plain C++ interface.
24     //
25     //   2. This test does not measure the case where an ArgumentPack is
26     //      so large that it doesn't fit in the L1 cache.
27     //
28     //   3. Although we've tried to make this test as general as possible,
29     //      we are targeting it at a specific application.  Where that
30     //      affects design decisions, we've noted it below in ***...***.
31     //
32     //   4. The first time you run this program, the time may not be
33     //      representative because of disk and memory cache effects, so
34     //      always run it multiple times and ignore the first
35     //      measurement.  This approach will also allow you to estimate
36     //      the statistical error of your test by observing the
37     //      variation in the valid times.
38     //
39     //   5. Try to run this program on a machine that's otherwise idle,
40     //      or other processes and even device hardware interrupts may
41     //      interfere by causing caches to be flushed.
42 
43     // Accumulator function object with plain C++ interface
44     template <typename T>
45     struct plain_weight_running_total
46     {
plain_weight_running_totaltest::plain_weight_running_total47         plain_weight_running_total()
48 #if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
49           : sum(T())
50 #else
51           : sum()
52 #endif
53         {
54         }
55 
operator ()test::plain_weight_running_total56         void operator()(T w)
57         {
58             this->sum += w;
59         }
60 
61         T sum;
62     };
63 
64     BOOST_PARAMETER_NAME(weight)
65     BOOST_PARAMETER_NAME(value)
66 
67     // Accumulator function object with named parameter interface
68     template <typename T>
69     struct named_param_weight_running_total
70     {
named_param_weight_running_totaltest::named_param_weight_running_total71         named_param_weight_running_total()
72 #if BOOST_WORKAROUND(BOOST_MSVC, < 1300)
73           : sum(T())
74 #else
75           : sum()
76 #endif
77         {
78         }
79 
80         template <typename ArgumentPack>
operator ()test::named_param_weight_running_total81         void operator()(ArgumentPack const& variates)
82         {
83             this->sum += variates[test::_weight];
84         }
85 
86         T sum;
87     };
88 
89     // This value is required to ensure that a smart compiler's dead code
90     // elimination doesn't optimize away anything we're testing.  We'll use it
91     // to compute the return code of the executable to make sure it's needed.
92     double live_code;
93 
94     // Call objects of the given Accumulator type repeatedly
95     // with x an argument.
96     template <typename Accumulator, typename Arg>
hammer(Arg const & x,long const repeats)97     void hammer(Arg const& x, long const repeats)
98     {
99         // Strategy: because the sum in an accumulator after each call
100         // depends on the previous value of the sum, the CPU's pipeline
101         // might be stalled while waiting for the previous addition to
102         // complete.  Therefore, we allocate an array of accumulators,
103         // and update them in sequence, so that there's no dependency
104         // between adjacent addition operations.
105         //
106         // Additionally, if there were only one accumulator, the compiler or
107         // CPU might decide to update the value in a register rather than
108         // writing it back to memory.  We want each operation to at least
109         // update the L1 cache.  *** Note: This concern is specific to the
110         // particular application at which we're targeting the test. ***
111 
112         // This has to be at least as large as the number of simultaneous
113         // accumulations that can be executing in the compiler pipeline.  A
114         // safe number here is larger than the machine's maximum pipeline
115         // depth.  If you want to test the L2 or L3 cache, or main memory,
116         // you can increase the size of this array.  1024 is an upper limit
117         // on the pipeline depth of current vector machines.
118         std::size_t const number_of_accumulators = 1024;
119 
120         Accumulator a[number_of_accumulators];
121 
122         for (long iteration = 0; iteration < repeats; ++iteration)
123         {
124             for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
125             {
126                 (*ap)(x);
127             }
128         }
129 
130         // Accumulate all the partial sums to avoid dead code elimination.
131         for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
132         {
133             test::live_code += ap->sum;
134         }
135     }
136 
137     // Measure the time required to hammer accumulators of the given
138     // type with the argument x.
139     template <typename Accumulator, typename T>
measure(T const & x,long const repeats)140     double measure(T const& x, long const repeats)
141     {
142         // Hammer accumulators a couple of times to ensure the instruction
143         // cache is full of our test code, and that we don't measure the cost
144         // of a page fault for accessing the data page containing the memory
145         // where the accumulators will be allocated.
146         test::hammer<Accumulator>(x, repeats);
147         test::hammer<Accumulator>(x, repeats);
148 
149         // Now start a timer.
150         boost::timer time;
151         test::hammer<Accumulator>(x, repeats);  // This time, we'll measure.
152         return time.elapsed();
153     }
154 }
155 
main()156 int main()
157 {
158     // First decide how many repetitions to measure.
159     long repeats = 100;
160     double measured = 0;
161 
162     while (measured < 1.0 && repeats <= 10000000)
163     {
164         repeats *= 10;
165 
166         boost::timer time;
167 
168         test::hammer<test::plain_weight_running_total<double> >(.1, repeats);
169         test::hammer<test::named_param_weight_running_total<double> >(
170             (test::_weight = .1, test::_value = .2), repeats
171         );
172 
173         measured = time.elapsed();
174     }
175 
176     std::cout
177         << "plain time:           "
178         << test::measure<test::plain_weight_running_total<double> >(
179             .1, repeats
180         )
181         << std::endl;
182 
183     std::cout
184         << "named parameter time: "
185         << test::measure<test::named_param_weight_running_total<double> >(
186             (test::_weight = .1, test::_value = .2), repeats
187         )
188         << std::endl;
189 
190     // This is ultimately responsible for preventing all the test code
191     // from being optimized away.  Change this to return 0 and you
192     // unplug the whole test's life support system.
193     return test::live_code < 0.;
194 }
195 
196