1 // Copyright David Abrahams, Matthias Troyer, Michael Gauckler 2 // 2005. Distributed under the Boost Software License, Version 3 // 1.0. (See accompanying file LICENSE_1_0.txt or copy at 4 // http://www.boost.org/LICENSE_1_0.txt) 5 6 #if !defined(LIVE_CODE_TYPE) 7 # define LIVE_CODE_TYPE int 8 #endif 9 10 #include <boost/timer.hpp> 11 12 namespace test 13 { 14 // This value is required to ensure that a smart compiler's dead 15 // code elimination doesn't optimize away anything we're testing. 16 // We'll use it to compute the return code of the executable to make 17 // sure it's needed. 18 LIVE_CODE_TYPE live_code; 19 20 // Call objects of the given Accumulator type repeatedly with x as 21 // an argument. 22 template <class Accumulator, class Arg> hammer(Arg const & x,long const repeats)23 void hammer(Arg const& x, long const repeats) 24 { 25 // Strategy: because the sum in an accumulator after each call 26 // depends on the previous value of the sum, the CPU's pipeline 27 // might be stalled while waiting for the previous addition to 28 // complete. Therefore, we allocate an array of accumulators, 29 // and update them in sequence, so that there's no dependency 30 // between adjacent addition operations. 31 // 32 // Additionally, if there were only one accumulator, the 33 // compiler or CPU might decide to update the value in a 34 // register rather that writing it back to memory. we want each 35 // operation to at least update the L1 cache. *** Note: This 36 // concern is specific to the particular application at which 37 // we're targeting the test. *** 38 39 // This has to be at least as large as the number of 40 // simultaneous accumulations that can be executing in the 41 // compiler pipeline. A safe number here is larger than the 42 // machine's maximum pipeline depth. If you want to test the L2 43 // or L3 cache, or main memory, you can increase the size of 44 // this array. 1024 is an upper limit on the pipeline depth of 45 // current vector machines. 46 const std::size_t number_of_accumulators = 1024; 47 live_code = 0; // reset to zero 48 49 Accumulator a[number_of_accumulators]; 50 51 for (long iteration = 0; iteration < repeats; ++iteration) 52 { 53 for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) 54 { 55 (*ap)(x); 56 } 57 } 58 59 // Accumulate all the partial sums to avoid dead code 60 // elimination. 61 for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) 62 { 63 live_code += ap->sum; 64 } 65 } 66 67 // Measure the time required to hammer accumulators of the given 68 // type with the argument x. 69 template <class Accumulator, class T> measure(T const & x,long const repeats)70 double measure(T const& x, long const repeats) 71 { 72 // Hammer accumulators a couple of times to ensure the 73 // instruction cache is full of our test code, and that we don't 74 // measure the cost of a page fault for accessing the data page 75 // containing the memory where the accumulators will be 76 // allocated 77 hammer<Accumulator>(x, repeats); 78 hammer<Accumulator>(x, repeats); 79 80 // Now start a timer 81 boost::timer time; 82 hammer<Accumulator>(x, repeats); // This time, we'll measure 83 return time.elapsed() / repeats; // return the time of one iteration 84 } 85 } 86