• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //-----------------------------------------------------------------------------
2 // MurmurHash2 was written by Austin Appleby, and is placed in the public
3 // domain. The author hereby disclaims copyright to this source code.
4 
5 // Note - This code makes a few assumptions about how your machine behaves -
6 
7 // 1. We can read a 4-byte value from any address without crashing
8 // 2. sizeof(int) == 4
9 
10 // And it has a few limitations -
11 
12 // 1. It will not work incrementally.
13 // 2. It will not produce the same results on little-endian and big-endian
14 //    machines.
15 
16 #include "MurmurHash2.h"
17 
18 //-----------------------------------------------------------------------------
19 // Platform-specific functions and macros
20 
21 // Microsoft Visual Studio
22 
23 #if defined(_MSC_VER)
24 
25 #define BIG_CONSTANT(x) (x)
26 
27 // Other compilers
28 
29 #else	// defined(_MSC_VER)
30 
31 #define BIG_CONSTANT(x) (x##LLU)
32 
33 #endif // !defined(_MSC_VER)
34 
35 //-----------------------------------------------------------------------------
36 
MurmurHash2(const void * key,int len,uint32_t seed)37 uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed )
38 {
39   // 'm' and 'r' are mixing constants generated offline.
40   // They're not really 'magic', they just happen to work well.
41 
42   const uint32_t m = 0x5bd1e995;
43   const int r = 24;
44 
45   // Initialize the hash to a 'random' value
46 
47   uint32_t h = seed ^ len;
48 
49   // Mix 4 bytes at a time into the hash
50 
51   const unsigned char * data = (const unsigned char *)key;
52 
53   while(len >= 4)
54   {
55     uint32_t k = *(uint32_t*)data;
56 
57     k *= m;
58     k ^= k >> r;
59     k *= m;
60 
61     h *= m;
62     h ^= k;
63 
64     data += 4;
65     len -= 4;
66   }
67 
68   // Handle the last few bytes of the input array
69 
70   switch(len)
71   {
72   case 3: h ^= data[2] << 16;
73   case 2: h ^= data[1] << 8;
74   case 1: h ^= data[0];
75       h *= m;
76   };
77 
78   // Do a few final mixes of the hash to ensure the last few
79   // bytes are well-incorporated.
80 
81   h ^= h >> 13;
82   h *= m;
83   h ^= h >> 15;
84 
85   return h;
86 }
87 
88 //-----------------------------------------------------------------------------
89 // MurmurHash2, 64-bit versions, by Austin Appleby
90 
91 // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
92 // and endian-ness issues if used across multiple platforms.
93 
94 // 64-bit hash for 64-bit platforms
95 
MurmurHash64A(const void * key,int len,uint64_t seed)96 uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed )
97 {
98   const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
99   const int r = 47;
100 
101   uint64_t h = seed ^ (len * m);
102 
103   const uint64_t * data = (const uint64_t *)key;
104   const uint64_t * end = data + (len/8);
105 
106   while(data != end)
107   {
108     uint64_t k = *data++;
109 
110     k *= m;
111     k ^= k >> r;
112     k *= m;
113 
114     h ^= k;
115     h *= m;
116   }
117 
118   const unsigned char * data2 = (const unsigned char*)data;
119 
120   switch(len & 7)
121   {
122   case 7: h ^= uint64_t(data2[6]) << 48;
123   case 6: h ^= uint64_t(data2[5]) << 40;
124   case 5: h ^= uint64_t(data2[4]) << 32;
125   case 4: h ^= uint64_t(data2[3]) << 24;
126   case 3: h ^= uint64_t(data2[2]) << 16;
127   case 2: h ^= uint64_t(data2[1]) << 8;
128   case 1: h ^= uint64_t(data2[0]);
129           h *= m;
130   };
131 
132   h ^= h >> r;
133   h *= m;
134   h ^= h >> r;
135 
136   return h;
137 }
138 
139 
140 // 64-bit hash for 32-bit platforms
141 
MurmurHash64B(const void * key,int len,uint64_t seed)142 uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed )
143 {
144   const uint32_t m = 0x5bd1e995;
145   const int r = 24;
146 
147   uint32_t h1 = uint32_t(seed) ^ len;
148   uint32_t h2 = uint32_t(seed >> 32);
149 
150   const uint32_t * data = (const uint32_t *)key;
151 
152   while(len >= 8)
153   {
154     uint32_t k1 = *data++;
155     k1 *= m; k1 ^= k1 >> r; k1 *= m;
156     h1 *= m; h1 ^= k1;
157     len -= 4;
158 
159     uint32_t k2 = *data++;
160     k2 *= m; k2 ^= k2 >> r; k2 *= m;
161     h2 *= m; h2 ^= k2;
162     len -= 4;
163   }
164 
165   if(len >= 4)
166   {
167     uint32_t k1 = *data++;
168     k1 *= m; k1 ^= k1 >> r; k1 *= m;
169     h1 *= m; h1 ^= k1;
170     len -= 4;
171   }
172 
173   switch(len)
174   {
175   case 3: h2 ^= ((unsigned char*)data)[2] << 16;
176   case 2: h2 ^= ((unsigned char*)data)[1] << 8;
177   case 1: h2 ^= ((unsigned char*)data)[0];
178       h2 *= m;
179   };
180 
181   h1 ^= h2 >> 18; h1 *= m;
182   h2 ^= h1 >> 22; h2 *= m;
183   h1 ^= h2 >> 17; h1 *= m;
184   h2 ^= h1 >> 19; h2 *= m;
185 
186   uint64_t h = h1;
187 
188   h = (h << 32) | h2;
189 
190   return h;
191 }
192 
193 //-----------------------------------------------------------------------------
194 // MurmurHash2A, by Austin Appleby
195 
196 // This is a variant of MurmurHash2 modified to use the Merkle-Damgard
197 // construction. Bulk speed should be identical to Murmur2, small-key speed
198 // will be 10%-20% slower due to the added overhead at the end of the hash.
199 
200 // This variant fixes a minor issue where null keys were more likely to
201 // collide with each other than expected, and also makes the function
202 // more amenable to incremental implementations.
203 
204 #define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
205 
MurmurHash2A(const void * key,int len,uint32_t seed)206 uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed )
207 {
208   const uint32_t m = 0x5bd1e995;
209   const int r = 24;
210   uint32_t l = len;
211 
212   const unsigned char * data = (const unsigned char *)key;
213 
214   uint32_t h = seed;
215 
216   while(len >= 4)
217   {
218     uint32_t k = *(uint32_t*)data;
219 
220     mmix(h,k);
221 
222     data += 4;
223     len -= 4;
224   }
225 
226   uint32_t t = 0;
227 
228   switch(len)
229   {
230   case 3: t ^= data[2] << 16;
231   case 2: t ^= data[1] << 8;
232   case 1: t ^= data[0];
233   };
234 
235   mmix(h,t);
236   mmix(h,l);
237 
238   h ^= h >> 13;
239   h *= m;
240   h ^= h >> 15;
241 
242   return h;
243 }
244 
245 //-----------------------------------------------------------------------------
246 // CMurmurHash2A, by Austin Appleby
247 
248 // This is a sample implementation of MurmurHash2A designed to work
249 // incrementally.
250 
251 // Usage -
252 
253 // CMurmurHash2A hasher
254 // hasher.Begin(seed);
255 // hasher.Add(data1,size1);
256 // hasher.Add(data2,size2);
257 // ...
258 // hasher.Add(dataN,sizeN);
259 // uint32_t hash = hasher.End()
260 
261 class CMurmurHash2A
262 {
263 public:
264 
Begin(uint32_t seed=0)265   void Begin ( uint32_t seed = 0 )
266   {
267     m_hash  = seed;
268     m_tail  = 0;
269     m_count = 0;
270     m_size  = 0;
271   }
272 
Add(const unsigned char * data,int len)273   void Add ( const unsigned char * data, int len )
274   {
275     m_size += len;
276 
277     MixTail(data,len);
278 
279     while(len >= 4)
280     {
281       uint32_t k = *(uint32_t*)data;
282 
283       mmix(m_hash,k);
284 
285       data += 4;
286       len -= 4;
287     }
288 
289     MixTail(data,len);
290   }
291 
End(void)292   uint32_t End ( void )
293   {
294     mmix(m_hash,m_tail);
295     mmix(m_hash,m_size);
296 
297     m_hash ^= m_hash >> 13;
298     m_hash *= m;
299     m_hash ^= m_hash >> 15;
300 
301     return m_hash;
302   }
303 
304 private:
305 
306   static const uint32_t m = 0x5bd1e995;
307   static const int r = 24;
308 
MixTail(const unsigned char * & data,int & len)309   void MixTail ( const unsigned char * & data, int & len )
310   {
311     while( len && ((len<4) || m_count) )
312     {
313       m_tail |= (*data++) << (m_count * 8);
314 
315       m_count++;
316       len--;
317 
318       if(m_count == 4)
319       {
320         mmix(m_hash,m_tail);
321         m_tail = 0;
322         m_count = 0;
323       }
324     }
325   }
326 
327   uint32_t m_hash;
328   uint32_t m_tail;
329   uint32_t m_count;
330   uint32_t m_size;
331 };
332 
333 //-----------------------------------------------------------------------------
334 // MurmurHashNeutral2, by Austin Appleby
335 
336 // Same as MurmurHash2, but endian- and alignment-neutral.
337 // Half the speed though, alas.
338 
MurmurHashNeutral2(const void * key,int len,uint32_t seed)339 uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed )
340 {
341   const uint32_t m = 0x5bd1e995;
342   const int r = 24;
343 
344   uint32_t h = seed ^ len;
345 
346   const unsigned char * data = (const unsigned char *)key;
347 
348   while(len >= 4)
349   {
350     uint32_t k;
351 
352     k  = data[0];
353     k |= data[1] << 8;
354     k |= data[2] << 16;
355     k |= data[3] << 24;
356 
357     k *= m;
358     k ^= k >> r;
359     k *= m;
360 
361     h *= m;
362     h ^= k;
363 
364     data += 4;
365     len -= 4;
366   }
367 
368   switch(len)
369   {
370   case 3: h ^= data[2] << 16;
371   case 2: h ^= data[1] << 8;
372   case 1: h ^= data[0];
373           h *= m;
374   };
375 
376   h ^= h >> 13;
377   h *= m;
378   h ^= h >> 15;
379 
380   return h;
381 }
382 
383 //-----------------------------------------------------------------------------
384 // MurmurHashAligned2, by Austin Appleby
385 
386 // Same algorithm as MurmurHash2, but only does aligned reads - should be safer
387 // on certain platforms.
388 
389 // Performance will be lower than MurmurHash2
390 
391 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
392 
393 
MurmurHashAligned2(const void * key,int len,uint32_t seed)394 uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed )
395 {
396   const uint32_t m = 0x5bd1e995;
397   const int r = 24;
398 
399   const unsigned char * data = (const unsigned char *)key;
400 
401   uint32_t h = seed ^ len;
402 
403   int align = (uint64_t)data & 3;
404 
405   if(align && (len >= 4))
406   {
407     // Pre-load the temp registers
408 
409     uint32_t t = 0, d = 0;
410 
411     switch(align)
412     {
413       case 1: t |= data[2] << 16;
414       case 2: t |= data[1] << 8;
415       case 3: t |= data[0];
416     }
417 
418     t <<= (8 * align);
419 
420     data += 4-align;
421     len -= 4-align;
422 
423     int sl = 8 * (4-align);
424     int sr = 8 * align;
425 
426     // Mix
427 
428     while(len >= 4)
429     {
430       d = *(uint32_t *)data;
431       t = (t >> sr) | (d << sl);
432 
433       uint32_t k = t;
434 
435       MIX(h,k,m);
436 
437       t = d;
438 
439       data += 4;
440       len -= 4;
441     }
442 
443     // Handle leftover data in temp registers
444 
445     d = 0;
446 
447     if(len >= align)
448     {
449       switch(align)
450       {
451       case 3: d |= data[2] << 16;
452       case 2: d |= data[1] << 8;
453       case 1: d |= data[0];
454       }
455 
456       uint32_t k = (t >> sr) | (d << sl);
457       MIX(h,k,m);
458 
459       data += align;
460       len -= align;
461 
462       //----------
463       // Handle tail bytes
464 
465       switch(len)
466       {
467       case 3: h ^= data[2] << 16;
468       case 2: h ^= data[1] << 8;
469       case 1: h ^= data[0];
470           h *= m;
471       };
472     }
473     else
474     {
475       switch(len)
476       {
477       case 3: d |= data[2] << 16;
478       case 2: d |= data[1] << 8;
479       case 1: d |= data[0];
480       case 0: h ^= (t >> sr) | (d << sl);
481           h *= m;
482       }
483     }
484 
485     h ^= h >> 13;
486     h *= m;
487     h ^= h >> 15;
488 
489     return h;
490   }
491   else
492   {
493     while(len >= 4)
494     {
495       uint32_t k = *(uint32_t *)data;
496 
497       MIX(h,k,m);
498 
499       data += 4;
500       len -= 4;
501     }
502 
503     //----------
504     // Handle tail bytes
505 
506     switch(len)
507     {
508     case 3: h ^= data[2] << 16;
509     case 2: h ^= data[1] << 8;
510     case 1: h ^= data[0];
511         h *= m;
512     };
513 
514     h ^= h >> 13;
515     h *= m;
516     h ^= h >> 15;
517 
518     return h;
519   }
520 }
521 
522 //-----------------------------------------------------------------------------
523 
524