• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 
3 package com.google.common.hash;
4 
5 import static com.google.common.base.Preconditions.checkPositionIndexes;
6 import static com.google.common.hash.LittleEndianByteArray.load64;
7 import static com.google.common.hash.LittleEndianByteArray.load64Safely;
8 import static java.lang.Long.rotateRight;
9 
10 import com.google.common.annotations.VisibleForTesting;
11 
12 /**
13  * Implementation of Geoff Pike's fingerprint2011 hash function. See {@link Hashing#fingerprint2011}
14  * for information on the behaviour of the algorithm.
15  *
16  * <p>On Intel Core2 2.66, on 1000 bytes, fingerprint2011 takes 0.9 microseconds compared to
17  * fingerprint at 4.0 microseconds and md5 at 4.5 microseconds.
18  *
19  * <p>Note to maintainers: This implementation relies on signed arithmetic being bit-wise equivalent
20  * to unsigned arithmetic in all cases except:
21  *
22  * <ul>
23  *   <li>comparisons (signed values can be negative)
24  *   <li>division (avoided here)
25  *   <li>shifting (right shift must be unsigned)
26  * </ul>
27  *
28  * @author kylemaddison@google.com (Kyle Maddison)
29  * @author gpike@google.com (Geoff Pike)
30  */
31 @ElementTypesAreNonnullByDefault
32 final class Fingerprint2011 extends AbstractNonStreamingHashFunction {
33   static final HashFunction FINGERPRINT_2011 = new Fingerprint2011();
34 
35   // Some primes between 2^63 and 2^64 for various uses.
36   private static final long K0 = 0xa5b85c5e198ed849L;
37   private static final long K1 = 0x8d58ac26afe12e47L;
38   private static final long K2 = 0xc47b6e9e3a970ed3L;
39   private static final long K3 = 0xc6a4a7935bd1e995L;
40 
41   @Override
hashBytes(byte[] input, int off, int len)42   public HashCode hashBytes(byte[] input, int off, int len) {
43     checkPositionIndexes(off, off + len, input.length);
44     return HashCode.fromLong(fingerprint(input, off, len));
45   }
46 
47   @Override
bits()48   public int bits() {
49     return 64;
50   }
51 
52   @Override
toString()53   public String toString() {
54     return "Hashing.fingerprint2011()";
55   }
56 
57   // End of public functions.
58 
59   @VisibleForTesting
fingerprint(byte[] bytes, int offset, int length)60   static long fingerprint(byte[] bytes, int offset, int length) {
61     long result;
62 
63     if (length <= 32) {
64       result = murmurHash64WithSeed(bytes, offset, length, K0 ^ K1 ^ K2);
65     } else if (length <= 64) {
66       result = hashLength33To64(bytes, offset, length);
67     } else {
68       result = fullFingerprint(bytes, offset, length);
69     }
70 
71     long u = length >= 8 ? load64(bytes, offset) : K0;
72     long v = length >= 9 ? load64(bytes, offset + length - 8) : K0;
73     result = hash128to64(result + v, u);
74     return result == 0 || result == 1 ? result + ~1 : result;
75   }
76 
shiftMix(long val)77   private static long shiftMix(long val) {
78     return val ^ (val >>> 47);
79   }
80 
81   /** Implementation of Hash128to64 from util/hash/hash128to64.h */
82   @VisibleForTesting
hash128to64(long high, long low)83   static long hash128to64(long high, long low) {
84     long a = (low ^ high) * K3;
85     a ^= (a >>> 47);
86     long b = (high ^ a) * K3;
87     b ^= (b >>> 47);
88     b *= K3;
89     return b;
90   }
91 
92   /**
93    * Computes intermediate hash of 32 bytes of byte array from the given offset. Results are
94    * returned in the output array - this is 12% faster than allocating new arrays every time.
95    */
weakHashLength32WithSeeds( byte[] bytes, int offset, long seedA, long seedB, long[] output)96   private static void weakHashLength32WithSeeds(
97       byte[] bytes, int offset, long seedA, long seedB, long[] output) {
98     long part1 = load64(bytes, offset);
99     long part2 = load64(bytes, offset + 8);
100     long part3 = load64(bytes, offset + 16);
101     long part4 = load64(bytes, offset + 24);
102 
103     seedA += part1;
104     seedB = rotateRight(seedB + seedA + part4, 51);
105     long c = seedA;
106     seedA += part2;
107     seedA += part3;
108     seedB += rotateRight(seedA, 23);
109     output[0] = seedA + part4;
110     output[1] = seedB + c;
111   }
112 
113   /*
114    * Compute an 8-byte hash of a byte array of length greater than 64 bytes.
115    */
fullFingerprint(byte[] bytes, int offset, int length)116   private static long fullFingerprint(byte[] bytes, int offset, int length) {
117     // For lengths over 64 bytes we hash the end first, and then as we
118     // loop we keep 56 bytes of state: v, w, x, y, and z.
119     long x = load64(bytes, offset);
120     long y = load64(bytes, offset + length - 16) ^ K1;
121     long z = load64(bytes, offset + length - 56) ^ K0;
122     long[] v = new long[2];
123     long[] w = new long[2];
124     weakHashLength32WithSeeds(bytes, offset + length - 64, length, y, v);
125     weakHashLength32WithSeeds(bytes, offset + length - 32, length * K1, K0, w);
126     z += shiftMix(v[1]) * K1;
127     x = rotateRight(z + x, 39) * K1;
128     y = rotateRight(y, 33) * K1;
129 
130     // Decrease length to the nearest multiple of 64, and operate on 64-byte chunks.
131     length = (length - 1) & ~63;
132     do {
133       x = rotateRight(x + y + v[0] + load64(bytes, offset + 16), 37) * K1;
134       y = rotateRight(y + v[1] + load64(bytes, offset + 48), 42) * K1;
135       x ^= w[1];
136       y ^= v[0];
137       z = rotateRight(z ^ w[0], 33);
138       weakHashLength32WithSeeds(bytes, offset, v[1] * K1, x + w[0], v);
139       weakHashLength32WithSeeds(bytes, offset + 32, z + w[1], y, w);
140       long tmp = z;
141       z = x;
142       x = tmp;
143       offset += 64;
144       length -= 64;
145     } while (length != 0);
146     return hash128to64(hash128to64(v[0], w[0]) + shiftMix(y) * K1 + z, hash128to64(v[1], w[1]) + x);
147   }
148 
hashLength33To64(byte[] bytes, int offset, int length)149   private static long hashLength33To64(byte[] bytes, int offset, int length) {
150     long z = load64(bytes, offset + 24);
151     long a = load64(bytes, offset) + (length + load64(bytes, offset + length - 16)) * K0;
152     long b = rotateRight(a + z, 52);
153     long c = rotateRight(a, 37);
154     a += load64(bytes, offset + 8);
155     c += rotateRight(a, 7);
156     a += load64(bytes, offset + 16);
157     long vf = a + z;
158     long vs = b + rotateRight(a, 31) + c;
159     a = load64(bytes, offset + 16) + load64(bytes, offset + length - 32);
160     z = load64(bytes, offset + length - 8);
161     b = rotateRight(a + z, 52);
162     c = rotateRight(a, 37);
163     a += load64(bytes, offset + length - 24);
164     c += rotateRight(a, 7);
165     a += load64(bytes, offset + length - 16);
166     long wf = a + z;
167     long ws = b + rotateRight(a, 31) + c;
168     long r = shiftMix((vf + ws) * K2 + (wf + vs) * K0);
169     return shiftMix(r * K0 + vs) * K2;
170   }
171 
172   @VisibleForTesting
murmurHash64WithSeed(byte[] bytes, int offset, int length, long seed)173   static long murmurHash64WithSeed(byte[] bytes, int offset, int length, long seed) {
174     long mul = K3;
175     int topBit = 0x7;
176 
177     int lengthAligned = length & ~topBit;
178     int lengthRemainder = length & topBit;
179     long hash = seed ^ (length * mul);
180 
181     for (int i = 0; i < lengthAligned; i += 8) {
182       long loaded = load64(bytes, offset + i);
183       long data = shiftMix(loaded * mul) * mul;
184       hash ^= data;
185       hash *= mul;
186     }
187 
188     if (lengthRemainder != 0) {
189       long data = load64Safely(bytes, offset + lengthAligned, lengthRemainder);
190       hash ^= data;
191       hash *= mul;
192     }
193 
194     hash = shiftMix(hash) * mul;
195     hash = shiftMix(hash);
196     return hash;
197   }
198 }
199