• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "SpeedTest.h"
2 
3 #include "Random.h"
4 
5 #include <stdio.h>   // for printf
6 #include <memory.h>  // for memset
7 #include <math.h>    // for sqrt
8 #include <algorithm> // for sort
9 
10 //-----------------------------------------------------------------------------
11 // We view our timing values as a series of random variables V that has been
12 // contaminated with occasional outliers due to cache misses, thread
13 // preemption, etcetera. To filter out the outliers, we search for the largest
14 // subset of V such that all its values are within three standard deviations
15 // of the mean.
16 
CalcMean(std::vector<double> & v)17 double CalcMean ( std::vector<double> & v )
18 {
19   double mean = 0;
20 
21   for(int i = 0; i < (int)v.size(); i++)
22   {
23     mean += v[i];
24   }
25 
26   mean /= double(v.size());
27 
28   return mean;
29 }
30 
CalcMean(std::vector<double> & v,int a,int b)31 double CalcMean ( std::vector<double> & v, int a, int b )
32 {
33   double mean = 0;
34 
35   for(int i = a; i <= b; i++)
36   {
37     mean += v[i];
38   }
39 
40   mean /= (b-a+1);
41 
42   return mean;
43 }
44 
CalcStdv(std::vector<double> & v,int a,int b)45 double CalcStdv ( std::vector<double> & v, int a, int b )
46 {
47   double mean = CalcMean(v,a,b);
48 
49   double stdv = 0;
50 
51   for(int i = a; i <= b; i++)
52   {
53     double x = v[i] - mean;
54 
55     stdv += x*x;
56   }
57 
58   stdv = sqrt(stdv / (b-a+1));
59 
60   return stdv;
61 }
62 
63 // Return true if the largest value in v[0,len) is more than three
64 // standard deviations from the mean
65 
ContainsOutlier(std::vector<double> & v,size_t len)66 bool ContainsOutlier ( std::vector<double> & v, size_t len )
67 {
68   double mean = 0;
69 
70   for(size_t i = 0; i < len; i++)
71   {
72     mean += v[i];
73   }
74 
75   mean /= double(len);
76 
77   double stdv = 0;
78 
79   for(size_t i = 0; i < len; i++)
80   {
81     double x = v[i] - mean;
82     stdv += x*x;
83   }
84 
85   stdv = sqrt(stdv / double(len));
86 
87   double cutoff = mean + stdv*3;
88 
89   return v[len-1] > cutoff;
90 }
91 
92 // Do a binary search to find the largest subset of v that does not contain
93 // outliers.
94 
FilterOutliers(std::vector<double> & v)95 void FilterOutliers ( std::vector<double> & v )
96 {
97   std::sort(v.begin(),v.end());
98 
99   size_t len = 0;
100 
101   for(size_t x = 0x40000000; x; x = x >> 1 )
102   {
103     if((len | x) >= v.size()) continue;
104 
105     if(!ContainsOutlier(v,len | x))
106     {
107       len |= x;
108     }
109   }
110 
111   v.resize(len);
112 }
113 
114 // Iteratively tighten the set to find a subset that does not contain
115 // outliers. I'm not positive this works correctly in all cases.
116 
FilterOutliers2(std::vector<double> & v)117 void FilterOutliers2 ( std::vector<double> & v )
118 {
119   std::sort(v.begin(),v.end());
120 
121   int a = 0;
122   int b = (int)(v.size() - 1);
123 
124   for(int i = 0; i < 10; i++)
125   {
126     //printf("%d %d\n",a,b);
127 
128     double mean = CalcMean(v,a,b);
129     double stdv = CalcStdv(v,a,b);
130 
131     double cutA = mean - stdv*3;
132     double cutB = mean + stdv*3;
133 
134     while((a < b) && (v[a] < cutA)) a++;
135     while((b > a) && (v[b] > cutB)) b--;
136   }
137 
138   std::vector<double> v2;
139 
140   v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);
141 
142   v.swap(v2);
143 }
144 
145 //-----------------------------------------------------------------------------
146 // We really want the rdtsc() calls to bracket the function call as tightly
147 // as possible, but that's hard to do portably. We'll try and get as close as
148 // possible by marking the function as NEVER_INLINE (to keep the optimizer from
149 // moving it) and marking the timing variables as "volatile register".
150 
timehash(pfHash hash,const void * key,int len,int seed)151 NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )
152 {
153   volatile register int64_t begin,end;
154 
155   uint32_t temp[16];
156 
157   begin = rdtsc();
158 
159   hash(key,len,seed,temp);
160 
161   end = rdtsc();
162 
163   return end-begin;
164 }
165 
166 //-----------------------------------------------------------------------------
167 
SpeedTest(pfHash hash,uint32_t seed,const int trials,const int blocksize,const int align)168 double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )
169 {
170   Rand r(seed);
171 
172   uint8_t * buf = new uint8_t[blocksize + 512];
173 
174   uint64_t t1 = reinterpret_cast<uint64_t>(buf);
175 
176   t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
177   t1 += align;
178 
179   uint8_t * block = reinterpret_cast<uint8_t*>(t1);
180 
181   r.rand_p(block,blocksize);
182 
183   //----------
184 
185   std::vector<double> times;
186   times.reserve(trials);
187 
188   for(int itrial = 0; itrial < trials; itrial++)
189   {
190     r.rand_p(block,blocksize);
191 
192     double t = (double)timehash(hash,block,blocksize,itrial);
193 
194     if(t > 0) times.push_back(t);
195   }
196 
197   //----------
198 
199   std::sort(times.begin(),times.end());
200 
201   FilterOutliers(times);
202 
203   delete [] buf;
204 
205   return CalcMean(times);
206 }
207 
208 //-----------------------------------------------------------------------------
209 // 256k blocks seem to give the best results.
210 
BulkSpeedTest(pfHash hash,uint32_t seed)211 void BulkSpeedTest ( pfHash hash, uint32_t seed )
212 {
213   const int trials = 2999;
214   const int blocksize = 256 * 1024;
215 
216   printf("Bulk speed test - %d-byte keys\n",blocksize);
217 
218   for(int align = 0; align < 8; align++)
219   {
220     double cycles = SpeedTest(hash,seed,trials,blocksize,align);
221 
222     double bestbpc = double(blocksize)/cycles;
223 
224     double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
225     printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
226   }
227 }
228 
229 //-----------------------------------------------------------------------------
230 
TinySpeedTest(pfHash hash,int hashsize,int keysize,uint32_t seed,bool verbose,double &)231 void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /*outCycles*/ )
232 {
233   const int trials = 999999;
234 
235   if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);
236 
237   double cycles = SpeedTest(hash,seed,trials,keysize,0);
238 
239   printf("%8.2f cycles/hash\n",cycles);
240 }
241 
242 //-----------------------------------------------------------------------------
243