• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.common.hash;
16 
17 import static com.google.common.base.Preconditions.checkPositionIndexes;
18 import static com.google.common.hash.LittleEndianByteArray.load32;
19 import static com.google.common.hash.LittleEndianByteArray.load64;
20 import static java.lang.Long.rotateRight;
21 
22 import com.google.common.annotations.VisibleForTesting;
23 
24 /**
25  * Implementation of FarmHash Fingerprint64, an open-source fingerprinting algorithm for strings.
26  *
27  * <p>Its speed is comparable to CityHash64, and its quality of hashing is at least as good.
28  *
29  * <p>Note to maintainers: This implementation relies on signed arithmetic being bit-wise equivalent
30  * to unsigned arithmetic in all cases except:
31  *
32  * <ul>
33  *   <li>comparisons (signed values can be negative)
34  *   <li>division (avoided here)
35  *   <li>shifting (right shift must be unsigned)
36  * </ul>
37  *
38  * @author Kyle Maddison
39  * @author Geoff Pike
40  */
41 @ElementTypesAreNonnullByDefault
42 final class FarmHashFingerprint64 extends AbstractNonStreamingHashFunction {
43   static final HashFunction FARMHASH_FINGERPRINT_64 = new FarmHashFingerprint64();
44 
45   // Some primes between 2^63 and 2^64 for various uses.
46   private static final long K0 = 0xc3a5c85c97cb3127L;
47   private static final long K1 = 0xb492b66fbe98f273L;
48   private static final long K2 = 0x9ae16a3b2f90404fL;
49 
50   @Override
hashBytes(byte[] input, int off, int len)51   public HashCode hashBytes(byte[] input, int off, int len) {
52     checkPositionIndexes(off, off + len, input.length);
53     return HashCode.fromLong(fingerprint(input, off, len));
54   }
55 
56   @Override
bits()57   public int bits() {
58     return 64;
59   }
60 
61   @Override
toString()62   public String toString() {
63     return "Hashing.farmHashFingerprint64()";
64   }
65 
66   // End of public functions.
67 
68   @VisibleForTesting
fingerprint(byte[] bytes, int offset, int length)69   static long fingerprint(byte[] bytes, int offset, int length) {
70     if (length <= 32) {
71       if (length <= 16) {
72         return hashLength0to16(bytes, offset, length);
73       } else {
74         return hashLength17to32(bytes, offset, length);
75       }
76     } else if (length <= 64) {
77       return hashLength33To64(bytes, offset, length);
78     } else {
79       return hashLength65Plus(bytes, offset, length);
80     }
81   }
82 
shiftMix(long val)83   private static long shiftMix(long val) {
84     return val ^ (val >>> 47);
85   }
86 
hashLength16(long u, long v, long mul)87   private static long hashLength16(long u, long v, long mul) {
88     long a = (u ^ v) * mul;
89     a ^= (a >>> 47);
90     long b = (v ^ a) * mul;
91     b ^= (b >>> 47);
92     b *= mul;
93     return b;
94   }
95 
96   /**
97    * Computes intermediate hash of 32 bytes of byte array from the given offset. Results are
98    * returned in the output array because when we last measured, this was 12% faster than allocating
99    * new arrays every time.
100    */
weakHashLength32WithSeeds( byte[] bytes, int offset, long seedA, long seedB, long[] output)101   private static void weakHashLength32WithSeeds(
102       byte[] bytes, int offset, long seedA, long seedB, long[] output) {
103     long part1 = load64(bytes, offset);
104     long part2 = load64(bytes, offset + 8);
105     long part3 = load64(bytes, offset + 16);
106     long part4 = load64(bytes, offset + 24);
107 
108     seedA += part1;
109     seedB = rotateRight(seedB + seedA + part4, 21);
110     long c = seedA;
111     seedA += part2;
112     seedA += part3;
113     seedB += rotateRight(seedA, 44);
114     output[0] = seedA + part4;
115     output[1] = seedB + c;
116   }
117 
hashLength0to16(byte[] bytes, int offset, int length)118   private static long hashLength0to16(byte[] bytes, int offset, int length) {
119     if (length >= 8) {
120       long mul = K2 + length * 2L;
121       long a = load64(bytes, offset) + K2;
122       long b = load64(bytes, offset + length - 8);
123       long c = rotateRight(b, 37) * mul + a;
124       long d = (rotateRight(a, 25) + b) * mul;
125       return hashLength16(c, d, mul);
126     }
127     if (length >= 4) {
128       long mul = K2 + length * 2;
129       long a = load32(bytes, offset) & 0xFFFFFFFFL;
130       return hashLength16(length + (a << 3), load32(bytes, offset + length - 4) & 0xFFFFFFFFL, mul);
131     }
132     if (length > 0) {
133       byte a = bytes[offset];
134       byte b = bytes[offset + (length >> 1)];
135       byte c = bytes[offset + (length - 1)];
136       int y = (a & 0xFF) + ((b & 0xFF) << 8);
137       int z = length + ((c & 0xFF) << 2);
138       return shiftMix(y * K2 ^ z * K0) * K2;
139     }
140     return K2;
141   }
142 
hashLength17to32(byte[] bytes, int offset, int length)143   private static long hashLength17to32(byte[] bytes, int offset, int length) {
144     long mul = K2 + length * 2L;
145     long a = load64(bytes, offset) * K1;
146     long b = load64(bytes, offset + 8);
147     long c = load64(bytes, offset + length - 8) * mul;
148     long d = load64(bytes, offset + length - 16) * K2;
149     return hashLength16(
150         rotateRight(a + b, 43) + rotateRight(c, 30) + d, a + rotateRight(b + K2, 18) + c, mul);
151   }
152 
hashLength33To64(byte[] bytes, int offset, int length)153   private static long hashLength33To64(byte[] bytes, int offset, int length) {
154     long mul = K2 + length * 2L;
155     long a = load64(bytes, offset) * K2;
156     long b = load64(bytes, offset + 8);
157     long c = load64(bytes, offset + length - 8) * mul;
158     long d = load64(bytes, offset + length - 16) * K2;
159     long y = rotateRight(a + b, 43) + rotateRight(c, 30) + d;
160     long z = hashLength16(y, a + rotateRight(b + K2, 18) + c, mul);
161     long e = load64(bytes, offset + 16) * mul;
162     long f = load64(bytes, offset + 24);
163     long g = (y + load64(bytes, offset + length - 32)) * mul;
164     long h = (z + load64(bytes, offset + length - 24)) * mul;
165     return hashLength16(
166         rotateRight(e + f, 43) + rotateRight(g, 30) + h, e + rotateRight(f + a, 18) + g, mul);
167   }
168 
169   /*
170    * Compute an 8-byte hash of a byte array of length greater than 64 bytes.
171    */
hashLength65Plus(byte[] bytes, int offset, int length)172   private static long hashLength65Plus(byte[] bytes, int offset, int length) {
173     int seed = 81;
174     // For strings over 64 bytes we loop. Internal state consists of 56 bytes: v, w, x, y, and z.
175     long x = seed;
176     @SuppressWarnings("ConstantOverflow")
177     long y = seed * K1 + 113;
178     long z = shiftMix(y * K2 + 113) * K2;
179     long[] v = new long[2];
180     long[] w = new long[2];
181     x = x * K2 + load64(bytes, offset);
182 
183     // Set end so that after the loop we have 1 to 64 bytes left to process.
184     int end = offset + ((length - 1) / 64) * 64;
185     int last64offset = end + ((length - 1) & 63) - 63;
186     do {
187       x = rotateRight(x + y + v[0] + load64(bytes, offset + 8), 37) * K1;
188       y = rotateRight(y + v[1] + load64(bytes, offset + 48), 42) * K1;
189       x ^= w[1];
190       y += v[0] + load64(bytes, offset + 40);
191       z = rotateRight(z + w[0], 33) * K1;
192       weakHashLength32WithSeeds(bytes, offset, v[1] * K1, x + w[0], v);
193       weakHashLength32WithSeeds(bytes, offset + 32, z + w[1], y + load64(bytes, offset + 16), w);
194       long tmp = x;
195       x = z;
196       z = tmp;
197       offset += 64;
198     } while (offset != end);
199     long mul = K1 + ((z & 0xFF) << 1);
200     // Operate on the last 64 bytes of input.
201     offset = last64offset;
202     w[0] += ((length - 1) & 63);
203     v[0] += w[0];
204     w[0] += v[0];
205     x = rotateRight(x + y + v[0] + load64(bytes, offset + 8), 37) * mul;
206     y = rotateRight(y + v[1] + load64(bytes, offset + 48), 42) * mul;
207     x ^= w[1] * 9;
208     y += v[0] * 9 + load64(bytes, offset + 40);
209     z = rotateRight(z + w[0], 33) * mul;
210     weakHashLength32WithSeeds(bytes, offset, v[1] * mul, x + w[0], v);
211     weakHashLength32WithSeeds(bytes, offset + 32, z + w[1], y + load64(bytes, offset + 16), w);
212     return hashLength16(
213         hashLength16(v[0], w[0], mul) + shiftMix(y) * K0 + x,
214         hashLength16(v[1], w[1], mul) + z,
215         mul);
216   }
217 }
218