• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright David Abrahams, Matthias Troyer, Michael Gauckler
2 // 2005. Distributed under the Boost Software License, Version
3 // 1.0. (See accompanying file LICENSE_1_0.txt or copy at
4 // http://www.boost.org/LICENSE_1_0.txt)
5 
6 #if !defined(LIVE_CODE_TYPE)
7 # define LIVE_CODE_TYPE int
8 #endif
9 
10 #include <boost/timer.hpp>
11 
12 namespace test
13 {
14   // This value is required to ensure that a smart compiler's dead
15   // code elimination doesn't optimize away anything we're testing.
16   // We'll use it to compute the return code of the executable to make
17   // sure it's needed.
18   LIVE_CODE_TYPE live_code;
19 
20   // Call objects of the given Accumulator type repeatedly with x as
21   // an argument.
22   template <class Accumulator, class Arg>
hammer(Arg const & x,long const repeats)23   void hammer(Arg const& x, long const repeats)
24   {
25       // Strategy: because the sum in an accumulator after each call
26       // depends on the previous value of the sum, the CPU's pipeline
27       // might be stalled while waiting for the previous addition to
28       // complete.  Therefore, we allocate an array of accumulators,
29       // and update them in sequence, so that there's no dependency
30       // between adjacent addition operations.
31       //
32       // Additionally, if there were only one accumulator, the
33       // compiler or CPU might decide to update the value in a
34       // register rather that writing it back to memory.  we want each
35       // operation to at least update the L1 cache.  *** Note: This
36       // concern is specific to the particular application at which
37       // we're targeting the test. ***
38 
39       // This has to be at least as large as the number of
40       // simultaneous accumulations that can be executing in the
41       // compiler pipeline.  A safe number here is larger than the
42       // machine's maximum pipeline depth. If you want to test the L2
43       // or L3 cache, or main memory, you can increase the size of
44       // this array.  1024 is an upper limit on the pipeline depth of
45       // current vector machines.
46       const std::size_t number_of_accumulators = 1024;
47       live_code = 0; // reset to zero
48 
49       Accumulator a[number_of_accumulators];
50 
51       for (long iteration = 0; iteration < repeats; ++iteration)
52       {
53           for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)
54           {
55               (*ap)(x);
56           }
57       }
58 
59       // Accumulate all the partial sums to avoid dead code
60       // elimination.
61       for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)
62       {
63           live_code += ap->sum;
64       }
65   }
66 
67   // Measure the time required to hammer accumulators of the given
68   // type with the argument x.
69   template <class Accumulator, class T>
measure(T const & x,long const repeats)70   double measure(T const& x, long const repeats)
71   {
72       // Hammer accumulators a couple of times to ensure the
73       // instruction cache is full of our test code, and that we don't
74       // measure the cost of a page fault for accessing the data page
75       // containing the memory where the accumulators will be
76       // allocated
77       hammer<Accumulator>(x, repeats);
78       hammer<Accumulator>(x, repeats);
79 
80       // Now start a timer
81       boost::timer time;
82       hammer<Accumulator>(x, repeats);  // This time, we'll measure
83       return time.elapsed() / repeats;  // return the time of one iteration
84   }
85 }
86