1 // Copyright 2011 Google Inc. All Rights Reserved. 2 3 package com.google.common.hash; 4 5 import static com.google.common.base.Preconditions.checkPositionIndexes; 6 import static com.google.common.hash.LittleEndianByteArray.load64; 7 import static com.google.common.hash.LittleEndianByteArray.load64Safely; 8 import static java.lang.Long.rotateRight; 9 10 import com.google.common.annotations.VisibleForTesting; 11 12 /** 13 * Implementation of Geoff Pike's fingerprint2011 hash function. See {@link Hashing#fingerprint2011} 14 * for information on the behaviour of the algorithm. 15 * 16 * <p>On Intel Core2 2.66, on 1000 bytes, fingerprint2011 takes 0.9 microseconds compared to 17 * fingerprint at 4.0 microseconds and md5 at 4.5 microseconds. 18 * 19 * <p>Note to maintainers: This implementation relies on signed arithmetic being bit-wise equivalent 20 * to unsigned arithmetic in all cases except: 21 * 22 * <ul> 23 * <li>comparisons (signed values can be negative) 24 * <li>division (avoided here) 25 * <li>shifting (right shift must be unsigned) 26 * </ul> 27 * 28 * @author kylemaddison@google.com (Kyle Maddison) 29 * @author gpike@google.com (Geoff Pike) 30 */ 31 @ElementTypesAreNonnullByDefault 32 final class Fingerprint2011 extends AbstractNonStreamingHashFunction { 33 static final HashFunction FINGERPRINT_2011 = new Fingerprint2011(); 34 35 // Some primes between 2^63 and 2^64 for various uses. 36 private static final long K0 = 0xa5b85c5e198ed849L; 37 private static final long K1 = 0x8d58ac26afe12e47L; 38 private static final long K2 = 0xc47b6e9e3a970ed3L; 39 private static final long K3 = 0xc6a4a7935bd1e995L; 40 41 @Override hashBytes(byte[] input, int off, int len)42 public HashCode hashBytes(byte[] input, int off, int len) { 43 checkPositionIndexes(off, off + len, input.length); 44 return HashCode.fromLong(fingerprint(input, off, len)); 45 } 46 47 @Override bits()48 public int bits() { 49 return 64; 50 } 51 52 @Override toString()53 public String toString() { 54 return "Hashing.fingerprint2011()"; 55 } 56 57 // End of public functions. 58 59 @VisibleForTesting fingerprint(byte[] bytes, int offset, int length)60 static long fingerprint(byte[] bytes, int offset, int length) { 61 long result; 62 63 if (length <= 32) { 64 result = murmurHash64WithSeed(bytes, offset, length, K0 ^ K1 ^ K2); 65 } else if (length <= 64) { 66 result = hashLength33To64(bytes, offset, length); 67 } else { 68 result = fullFingerprint(bytes, offset, length); 69 } 70 71 long u = length >= 8 ? load64(bytes, offset) : K0; 72 long v = length >= 9 ? load64(bytes, offset + length - 8) : K0; 73 result = hash128to64(result + v, u); 74 return result == 0 || result == 1 ? result + ~1 : result; 75 } 76 shiftMix(long val)77 private static long shiftMix(long val) { 78 return val ^ (val >>> 47); 79 } 80 81 /** Implementation of Hash128to64 from util/hash/hash128to64.h */ 82 @VisibleForTesting hash128to64(long high, long low)83 static long hash128to64(long high, long low) { 84 long a = (low ^ high) * K3; 85 a ^= (a >>> 47); 86 long b = (high ^ a) * K3; 87 b ^= (b >>> 47); 88 b *= K3; 89 return b; 90 } 91 92 /** 93 * Computes intermediate hash of 32 bytes of byte array from the given offset. Results are 94 * returned in the output array - this is 12% faster than allocating new arrays every time. 95 */ weakHashLength32WithSeeds( byte[] bytes, int offset, long seedA, long seedB, long[] output)96 private static void weakHashLength32WithSeeds( 97 byte[] bytes, int offset, long seedA, long seedB, long[] output) { 98 long part1 = load64(bytes, offset); 99 long part2 = load64(bytes, offset + 8); 100 long part3 = load64(bytes, offset + 16); 101 long part4 = load64(bytes, offset + 24); 102 103 seedA += part1; 104 seedB = rotateRight(seedB + seedA + part4, 51); 105 long c = seedA; 106 seedA += part2; 107 seedA += part3; 108 seedB += rotateRight(seedA, 23); 109 output[0] = seedA + part4; 110 output[1] = seedB + c; 111 } 112 113 /* 114 * Compute an 8-byte hash of a byte array of length greater than 64 bytes. 115 */ fullFingerprint(byte[] bytes, int offset, int length)116 private static long fullFingerprint(byte[] bytes, int offset, int length) { 117 // For lengths over 64 bytes we hash the end first, and then as we 118 // loop we keep 56 bytes of state: v, w, x, y, and z. 119 long x = load64(bytes, offset); 120 long y = load64(bytes, offset + length - 16) ^ K1; 121 long z = load64(bytes, offset + length - 56) ^ K0; 122 long[] v = new long[2]; 123 long[] w = new long[2]; 124 weakHashLength32WithSeeds(bytes, offset + length - 64, length, y, v); 125 weakHashLength32WithSeeds(bytes, offset + length - 32, length * K1, K0, w); 126 z += shiftMix(v[1]) * K1; 127 x = rotateRight(z + x, 39) * K1; 128 y = rotateRight(y, 33) * K1; 129 130 // Decrease length to the nearest multiple of 64, and operate on 64-byte chunks. 131 length = (length - 1) & ~63; 132 do { 133 x = rotateRight(x + y + v[0] + load64(bytes, offset + 16), 37) * K1; 134 y = rotateRight(y + v[1] + load64(bytes, offset + 48), 42) * K1; 135 x ^= w[1]; 136 y ^= v[0]; 137 z = rotateRight(z ^ w[0], 33); 138 weakHashLength32WithSeeds(bytes, offset, v[1] * K1, x + w[0], v); 139 weakHashLength32WithSeeds(bytes, offset + 32, z + w[1], y, w); 140 long tmp = z; 141 z = x; 142 x = tmp; 143 offset += 64; 144 length -= 64; 145 } while (length != 0); 146 return hash128to64(hash128to64(v[0], w[0]) + shiftMix(y) * K1 + z, hash128to64(v[1], w[1]) + x); 147 } 148 hashLength33To64(byte[] bytes, int offset, int length)149 private static long hashLength33To64(byte[] bytes, int offset, int length) { 150 long z = load64(bytes, offset + 24); 151 long a = load64(bytes, offset) + (length + load64(bytes, offset + length - 16)) * K0; 152 long b = rotateRight(a + z, 52); 153 long c = rotateRight(a, 37); 154 a += load64(bytes, offset + 8); 155 c += rotateRight(a, 7); 156 a += load64(bytes, offset + 16); 157 long vf = a + z; 158 long vs = b + rotateRight(a, 31) + c; 159 a = load64(bytes, offset + 16) + load64(bytes, offset + length - 32); 160 z = load64(bytes, offset + length - 8); 161 b = rotateRight(a + z, 52); 162 c = rotateRight(a, 37); 163 a += load64(bytes, offset + length - 24); 164 c += rotateRight(a, 7); 165 a += load64(bytes, offset + length - 16); 166 long wf = a + z; 167 long ws = b + rotateRight(a, 31) + c; 168 long r = shiftMix((vf + ws) * K2 + (wf + vs) * K0); 169 return shiftMix(r * K0 + vs) * K2; 170 } 171 172 @VisibleForTesting murmurHash64WithSeed(byte[] bytes, int offset, int length, long seed)173 static long murmurHash64WithSeed(byte[] bytes, int offset, int length, long seed) { 174 long mul = K3; 175 int topBit = 0x7; 176 177 int lengthAligned = length & ~topBit; 178 int lengthRemainder = length & topBit; 179 long hash = seed ^ (length * mul); 180 181 for (int i = 0; i < lengthAligned; i += 8) { 182 long loaded = load64(bytes, offset + i); 183 long data = shiftMix(loaded * mul) * mul; 184 hash ^= data; 185 hash *= mul; 186 } 187 188 if (lengthRemainder != 0) { 189 long data = load64Safely(bytes, offset + lengthAligned, lengthRemainder); 190 hash ^= data; 191 hash *= mul; 192 } 193 194 hash = shiftMix(hash) * mul; 195 hash = shiftMix(hash); 196 return hash; 197 } 198 } 199