1 //-----------------------------------------------------------------------------
2 // Keyset tests generate various sorts of difficult-to-hash keysets and compare
3 // the distribution and collision frequency of the hash results against an
4 // ideal random distribution
5
6 // The sanity checks are also in this cpp/h
7
8 #pragma once
9
10 #include "Types.h"
11 #include "Stats.h"
12 #include "Random.h" // for rand_p
13
14 #include <algorithm> // for std::swap
15 #include <assert.h>
16
17 //-----------------------------------------------------------------------------
18 // Sanity tests
19
20 bool VerificationTest ( pfHash hash, const int hashbits, uint32_t expected, bool verbose );
21 bool SanityTest ( pfHash hash, const int hashbits );
22 void AppendedZeroesTest ( pfHash hash, const int hashbits );
23
24 //-----------------------------------------------------------------------------
25 // Keyset 'Combination' - all possible combinations of input blocks
26
27 template< typename hashtype >
CombinationKeygenRecurse(uint32_t * key,int len,int maxlen,uint32_t * blocks,int blockcount,pfHash hash,std::vector<hashtype> & hashes)28 void CombinationKeygenRecurse ( uint32_t * key, int len, int maxlen,
29 uint32_t * blocks, int blockcount,
30 pfHash hash, std::vector<hashtype> & hashes )
31 {
32 if(len == maxlen) return;
33
34 for(int i = 0; i < blockcount; i++)
35 {
36 key[len] = blocks[i];
37
38 //if(len == maxlen-1)
39 {
40 hashtype h;
41 hash(key,(len+1) * sizeof(uint32_t),0,&h);
42 hashes.push_back(h);
43 }
44
45 //else
46 {
47 CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,hash,hashes);
48 }
49 }
50 }
51
52 template< typename hashtype >
CombinationKeyTest(hashfunc<hashtype> hash,int maxlen,uint32_t * blocks,int blockcount,bool testColl,bool testDist,bool drawDiagram)53 bool CombinationKeyTest ( hashfunc<hashtype> hash, int maxlen, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
54 {
55 printf("Keyset 'Combination' - up to %d blocks from a set of %d - ",maxlen,blockcount);
56
57 //----------
58
59 std::vector<hashtype> hashes;
60
61 uint32_t * key = new uint32_t[maxlen];
62
63 CombinationKeygenRecurse<hashtype>(key,0,maxlen,blocks,blockcount,hash,hashes);
64
65 delete [] key;
66
67 printf("%d keys\n",(int)hashes.size());
68
69 //----------
70
71 bool result = true;
72
73 result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
74
75 printf("\n");
76
77 return result;
78 }
79
80 //----------------------------------------------------------------------------
81 // Keyset 'Permutation' - given a set of 32-bit blocks, generate keys
82 // consisting of all possible permutations of those blocks
83
84 template< typename hashtype >
PermutationKeygenRecurse(pfHash hash,uint32_t * blocks,int blockcount,int k,std::vector<hashtype> & hashes)85 void PermutationKeygenRecurse ( pfHash hash, uint32_t * blocks, int blockcount, int k, std::vector<hashtype> & hashes )
86 {
87 if(k == blockcount-1)
88 {
89 hashtype h;
90
91 hash(blocks,blockcount * sizeof(uint32_t),0,&h);
92
93 hashes.push_back(h);
94
95 return;
96 }
97
98 for(int i = k; i < blockcount; i++)
99 {
100 std::swap(blocks[k],blocks[i]);
101
102 PermutationKeygenRecurse(hash,blocks,blockcount,k+1,hashes);
103
104 std::swap(blocks[k],blocks[i]);
105 }
106 }
107
108 template< typename hashtype >
PermutationKeyTest(hashfunc<hashtype> hash,uint32_t * blocks,int blockcount,bool testColl,bool testDist,bool drawDiagram)109 bool PermutationKeyTest ( hashfunc<hashtype> hash, uint32_t * blocks, int blockcount, bool testColl, bool testDist, bool drawDiagram )
110 {
111 printf("Keyset 'Permutation' - %d blocks - ",blockcount);
112
113 //----------
114
115 std::vector<hashtype> hashes;
116
117 PermutationKeygenRecurse<hashtype>(hash,blocks,blockcount,0,hashes);
118
119 printf("%d keys\n",(int)hashes.size());
120
121 //----------
122
123 bool result = true;
124
125 result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
126
127 printf("\n");
128
129 return result;
130 }
131
132 //-----------------------------------------------------------------------------
133 // Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
134
135 template < typename keytype, typename hashtype >
SparseKeygenRecurse(pfHash hash,int start,int bitsleft,bool inclusive,keytype & k,std::vector<hashtype> & hashes)136 void SparseKeygenRecurse ( pfHash hash, int start, int bitsleft, bool inclusive, keytype & k, std::vector<hashtype> & hashes )
137 {
138 const int nbytes = sizeof(keytype);
139 const int nbits = nbytes * 8;
140
141 hashtype h;
142
143 for(int i = start; i < nbits; i++)
144 {
145 flipbit(&k,nbytes,i);
146
147 if(inclusive || (bitsleft == 1))
148 {
149 hash(&k,sizeof(keytype),0,&h);
150 hashes.push_back(h);
151 }
152
153 if(bitsleft > 1)
154 {
155 SparseKeygenRecurse(hash,i+1,bitsleft-1,inclusive,k,hashes);
156 }
157
158 flipbit(&k,nbytes,i);
159 }
160 }
161
162 //----------
163
164 template < int keybits, typename hashtype >
SparseKeyTest(hashfunc<hashtype> hash,const int setbits,bool inclusive,bool testColl,bool testDist,bool drawDiagram)165 bool SparseKeyTest ( hashfunc<hashtype> hash, const int setbits, bool inclusive, bool testColl, bool testDist, bool drawDiagram )
166 {
167 printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits, inclusive ? "up to" : "exactly", setbits);
168
169 typedef Blob<keybits> keytype;
170
171 std::vector<hashtype> hashes;
172
173 keytype k;
174 memset(&k,0,sizeof(k));
175
176 if(inclusive)
177 {
178 hashtype h;
179
180 hash(&k,sizeof(keytype),0,&h);
181
182 hashes.push_back(h);
183 }
184
185 SparseKeygenRecurse(hash,0,setbits,inclusive,k,hashes);
186
187 printf("%d keys\n",(int)hashes.size());
188
189 bool result = true;
190
191 result &= TestHashList<hashtype>(hashes,testColl,testDist,drawDiagram);
192
193 printf("\n");
194
195 return result;
196 }
197
198 //-----------------------------------------------------------------------------
199 // Keyset 'Windows' - for all possible N-bit windows of a K-bit key, generate
200 // all possible keys with bits set in that window
201
202 template < typename keytype, typename hashtype >
WindowedKeyTest(hashfunc<hashtype> hash,const int windowbits,bool testCollision,bool testDistribution,bool drawDiagram)203 bool WindowedKeyTest ( hashfunc<hashtype> hash, const int windowbits, bool testCollision, bool testDistribution, bool drawDiagram )
204 {
205 const int keybits = sizeof(keytype) * 8;
206 const int keycount = 1 << windowbits;
207
208 std::vector<hashtype> hashes;
209 hashes.resize(keycount);
210
211 bool result = true;
212
213 int testcount = keybits;
214
215 printf("Keyset 'Windowed' - %3d-bit key, %3d-bit window - %d tests, %d keys per test\n",keybits,windowbits,testcount,keycount);
216
217 for(int j = 0; j <= testcount; j++)
218 {
219 int minbit = j;
220
221 keytype key;
222
223 for(int i = 0; i < keycount; i++)
224 {
225 key = i;
226 //key = key << minbit;
227
228 lrot(&key,sizeof(keytype),minbit);
229
230 hash(&key,sizeof(keytype),0,&hashes[i]);
231 }
232
233 printf("Window at %3d - ",j);
234
235 result &= TestHashList(hashes,testCollision,testDistribution,drawDiagram);
236
237 //printf("\n");
238 }
239
240 return result;
241 }
242
243 //-----------------------------------------------------------------------------
244 // Keyset 'Cyclic' - generate keys that consist solely of N repetitions of M
245 // bytes.
246
247 // (This keyset type is designed to make MurmurHash2 fail)
248
249 template < typename hashtype >
CyclicKeyTest(pfHash hash,int cycleLen,int cycleReps,const int keycount,bool drawDiagram)250 bool CyclicKeyTest ( pfHash hash, int cycleLen, int cycleReps, const int keycount, bool drawDiagram )
251 {
252 printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
253
254 Rand r(483723);
255
256 std::vector<hashtype> hashes;
257 hashes.resize(keycount);
258
259 int keyLen = cycleLen * cycleReps;
260
261 uint8_t * cycle = new uint8_t[cycleLen + 16];
262 uint8_t * key = new uint8_t[keyLen];
263
264 //----------
265
266 for(int i = 0; i < keycount; i++)
267 {
268 r.rand_p(cycle,cycleLen);
269
270 *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
271
272 for(int j = 0; j < keyLen; j++)
273 {
274 key[j] = cycle[j % cycleLen];
275 }
276
277 hash(key,keyLen,0,&hashes[i]);
278 }
279
280 //----------
281
282 bool result = true;
283
284 result &= TestHashList(hashes,true,true,drawDiagram);
285 printf("\n");
286
287 delete [] cycle;
288 delete [] key;
289
290 return result;
291 }
292
293 //-----------------------------------------------------------------------------
294 // Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
295
296 void TwoBytesKeygen ( int maxlen, KeyCallback & c );
297
298 template < typename hashtype >
TwoBytesTest2(pfHash hash,int maxlen,bool drawDiagram)299 bool TwoBytesTest2 ( pfHash hash, int maxlen, bool drawDiagram )
300 {
301 std::vector<hashtype> hashes;
302
303 HashCallback<hashtype> c(hash,hashes);
304
305 TwoBytesKeygen(maxlen,c);
306
307 bool result = true;
308
309 result &= TestHashList(hashes,true,true,drawDiagram);
310 printf("\n");
311
312 return result;
313 }
314
315 //-----------------------------------------------------------------------------
316 // Keyset 'Text' - generate all keys of the form "prefix"+"core"+"suffix",
317 // where "core" consists of all possible combinations of the given character
318 // set of length N.
319
320 template < typename hashtype >
TextKeyTest(hashfunc<hashtype> hash,const char * prefix,const char * coreset,const int corelen,const char * suffix,bool drawDiagram)321 bool TextKeyTest ( hashfunc<hashtype> hash, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
322 {
323 const int prefixlen = (int)strlen(prefix);
324 const int suffixlen = (int)strlen(suffix);
325 const int corecount = (int)strlen(coreset);
326
327 const int keybytes = prefixlen + corelen + suffixlen;
328 const int keycount = (int)pow(double(corecount),double(corelen));
329
330 printf("Keyset 'Text' - keys of form \"%s[",prefix);
331 for(int i = 0; i < corelen; i++) printf("X");
332 printf("]%s\" - %d keys\n",suffix,keycount);
333
334 uint8_t * key = new uint8_t[keybytes+1];
335
336 key[keybytes] = 0;
337
338 memcpy(key,prefix,prefixlen);
339 memcpy(key+prefixlen+corelen,suffix,suffixlen);
340
341 //----------
342
343 std::vector<hashtype> hashes;
344 hashes.resize(keycount);
345
346 for(int i = 0; i < keycount; i++)
347 {
348 int t = i;
349
350 for(int j = 0; j < corelen; j++)
351 {
352 key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
353 }
354
355 hash(key,keybytes,0,&hashes[i]);
356 }
357
358 //----------
359
360 bool result = true;
361
362 result &= TestHashList(hashes,true,true,drawDiagram);
363
364 printf("\n");
365
366 delete [] key;
367
368 return result;
369 }
370
371 //-----------------------------------------------------------------------------
372 // Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
373
374 // We reuse one block of empty bytes, otherwise the RAM cost is enormous.
375
376 template < typename hashtype >
ZeroKeyTest(pfHash hash,bool drawDiagram)377 bool ZeroKeyTest ( pfHash hash, bool drawDiagram )
378 {
379 int keycount = 64*1024;
380
381 printf("Keyset 'Zeroes' - %d keys\n",keycount);
382
383 unsigned char * nullblock = new unsigned char[keycount];
384 memset(nullblock,0,keycount);
385
386 //----------
387
388 std::vector<hashtype> hashes;
389
390 hashes.resize(keycount);
391
392 for(int i = 0; i < keycount; i++)
393 {
394 hash(nullblock,i,0,&hashes[i]);
395 }
396
397 bool result = true;
398
399 result &= TestHashList(hashes,true,true,drawDiagram);
400
401 printf("\n");
402
403 delete [] nullblock;
404
405 return result;
406 }
407
408 //-----------------------------------------------------------------------------
409 // Keyset 'Seed' - hash "the quick brown fox..." using different seeds
410
411 template < typename hashtype >
SeedTest(pfHash hash,int keycount,bool drawDiagram)412 bool SeedTest ( pfHash hash, int keycount, bool drawDiagram )
413 {
414 printf("Keyset 'Seed' - %d keys\n",keycount);
415
416 const char * text = "The quick brown fox jumps over the lazy dog";
417 const int len = (int)strlen(text);
418
419 //----------
420
421 std::vector<hashtype> hashes;
422
423 hashes.resize(keycount);
424
425 for(int i = 0; i < keycount; i++)
426 {
427 hash(text,len,i,&hashes[i]);
428 }
429
430 bool result = true;
431
432 result &= TestHashList(hashes,true,true,drawDiagram);
433
434 printf("\n");
435
436 return result;
437 }
438
439 //-----------------------------------------------------------------------------
440