• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.common.hash;
16 
17 import static com.google.common.base.Preconditions.checkArgument;
18 
19 import com.google.common.annotations.Beta;
20 import com.google.common.annotations.VisibleForTesting;
21 import com.google.common.base.Supplier;
22 
23 import java.security.MessageDigest;
24 import java.util.Iterator;
25 import java.util.zip.Adler32;
26 import java.util.zip.CRC32;
27 import java.util.zip.Checksum;
28 
29 import javax.annotation.Nullable;
30 
31 /**
32  * Static methods to obtain {@link HashFunction} instances, and other static hashing-related
33  * utilities.
34  *
35  * <p>A comparison of the various hash functions can be found
36  * <a href="http://goo.gl/jS7HH">here</a>.
37  *
38  * @author Kevin Bourrillion
39  * @author Dimitris Andreou
40  * @author Kurt Alfred Kluever
41  * @since 11.0
42  */
43 @Beta
44 public final class Hashing {
45   /**
46    * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm
47    * the returned function implements is unspecified and subject to change without notice.
48    *
49    * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code
50    * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current
51    * process in any way, for example being sent over RPC, or saved to disk.
52    *
53    * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value
54    * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances.
55    *
56    * @param minimumBits a positive integer (can be arbitrarily large)
57    * @return a hash function, described above, that produces hash codes of length {@code
58    *     minimumBits} or greater
59    */
goodFastHash(int minimumBits)60   public static HashFunction goodFastHash(int minimumBits) {
61     int bits = checkPositiveAndMakeMultipleOf32(minimumBits);
62 
63     if (bits == 32) {
64       return Murmur3_32Holder.GOOD_FAST_HASH_FUNCTION_32;
65     }
66     if (bits <= 128) {
67       return Murmur3_128Holder.GOOD_FAST_HASH_FUNCTION_128;
68     }
69 
70     // Otherwise, join together some 128-bit murmur3s
71     int hashFunctionsNeeded = (bits + 127) / 128;
72     HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded];
73     hashFunctions[0] = Murmur3_128Holder.GOOD_FAST_HASH_FUNCTION_128;
74     int seed = GOOD_FAST_HASH_SEED;
75     for (int i = 1; i < hashFunctionsNeeded; i++) {
76       seed += 1500450271; // a prime; shouldn't matter
77       hashFunctions[i] = murmur3_128(seed);
78     }
79     return new ConcatenatedHashFunction(hashFunctions);
80   }
81 
82   /**
83    * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything
84    * dependent on the hash codes they produce will fail sooner.
85    */
86   private static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis();
87 
88   /**
89    * Returns a hash function implementing the
90    * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
91    * 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant),
92    * using the given seed value.
93    *
94    * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
95    */
murmur3_32(int seed)96   public static HashFunction murmur3_32(int seed) {
97     return new Murmur3_32HashFunction(seed);
98   }
99 
100   /**
101    * Returns a hash function implementing the
102    * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
103    * 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant),
104    * using a seed value of zero.
105    *
106    * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
107    */
murmur3_32()108   public static HashFunction murmur3_32() {
109     return Murmur3_32Holder.MURMUR3_32;
110   }
111 
112   private static class Murmur3_32Holder {
113     static final HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0);
114 
115     /** Returned by {@link #goodFastHash} when {@code minimumBits <= 32}. */
116     static final HashFunction GOOD_FAST_HASH_FUNCTION_32 = murmur3_32(GOOD_FAST_HASH_SEED);
117   }
118 
119   /**
120    * Returns a hash function implementing the
121    * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
122    * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant),
123    * using the given seed value.
124    *
125    * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
126    */
murmur3_128(int seed)127   public static HashFunction murmur3_128(int seed) {
128     return new Murmur3_128HashFunction(seed);
129   }
130 
131   /**
132    * Returns a hash function implementing the
133    * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
134    * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant),
135    * using a seed value of zero.
136    *
137    * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
138    */
murmur3_128()139   public static HashFunction murmur3_128() {
140     return Murmur3_128Holder.MURMUR3_128;
141   }
142 
143   private static class Murmur3_128Holder {
144     static final HashFunction MURMUR3_128 = new Murmur3_128HashFunction(0);
145 
146     /** Returned by {@link #goodFastHash} when {@code 32 < minimumBits <= 128}. */
147     static final HashFunction GOOD_FAST_HASH_FUNCTION_128 = murmur3_128(GOOD_FAST_HASH_SEED);
148   }
149 
150   /**
151    * Returns a hash function implementing the
152    * <a href="https://131002.net/siphash/">64-bit SipHash-2-4 algorithm</a>
153    * using a seed value of {@code k = 00 01 02 ...}.
154    *
155    * @since 15.0
156    */
sipHash24()157   public static HashFunction sipHash24() {
158     return SipHash24Holder.SIP_HASH_24;
159   }
160 
161   private static class SipHash24Holder {
162     static final HashFunction SIP_HASH_24 =
163         new SipHashFunction(2, 4, 0x0706050403020100L, 0x0f0e0d0c0b0a0908L);
164   }
165 
166   /**
167    * Returns a hash function implementing the
168    * <a href="https://131002.net/siphash/">64-bit SipHash-2-4 algorithm</a>
169    * using the given seed.
170    *
171    * @since 15.0
172    */
sipHash24(long k0, long k1)173   public static HashFunction sipHash24(long k0, long k1) {
174     return new SipHashFunction(2, 4, k0, k1);
175   }
176 
177   /**
178    * Returns a hash function implementing the MD5 hash algorithm (128 hash bits) by delegating to
179    * the MD5 {@link MessageDigest}.
180    */
md5()181   public static HashFunction md5() {
182     return Md5Holder.MD5;
183   }
184 
185   private static class Md5Holder {
186     static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()");
187   }
188 
189   /**
190    * Returns a hash function implementing the SHA-1 algorithm (160 hash bits) by delegating to the
191    * SHA-1 {@link MessageDigest}.
192    */
sha1()193   public static HashFunction sha1() {
194     return Sha1Holder.SHA_1;
195   }
196 
197   private static class Sha1Holder {
198     static final HashFunction SHA_1 =
199         new MessageDigestHashFunction("SHA-1", "Hashing.sha1()");
200   }
201 
202   /**
203    * Returns a hash function implementing the SHA-256 algorithm (256 hash bits) by delegating to
204    * the SHA-256 {@link MessageDigest}.
205    */
sha256()206   public static HashFunction sha256() {
207     return Sha256Holder.SHA_256;
208   }
209 
210   private static class Sha256Holder {
211     static final HashFunction SHA_256 =
212         new MessageDigestHashFunction("SHA-256", "Hashing.sha256()");
213   }
214 
215   /**
216    * Returns a hash function implementing the SHA-512 algorithm (512 hash bits) by delegating to the
217    * SHA-512 {@link MessageDigest}.
218    */
sha512()219   public static HashFunction sha512() {
220     return Sha512Holder.SHA_512;
221   }
222 
223   private static class Sha512Holder {
224     static final HashFunction SHA_512 =
225         new MessageDigestHashFunction("SHA-512", "Hashing.sha512()");
226   }
227 
228   /**
229    * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described
230    * by RFC 3720, Section 12.1.
231    *
232    * @since 18.0
233    */
crc32c()234   public static HashFunction crc32c() {
235     return Crc32cHolder.CRC_32_C;
236   }
237 
238   private static final class Crc32cHolder {
239     static final HashFunction CRC_32_C = new Crc32cHashFunction();
240   }
241 
242   /**
243    * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits) by delegating
244    * to the {@link CRC32} {@link Checksum}.
245    *
246    * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a
247    * {@code HashCode} produced by this function, use {@link HashCode#padToLong()}.
248    *
249    * @since 14.0
250    */
crc32()251   public static HashFunction crc32() {
252     return Crc32Holder.CRC_32;
253   }
254 
255   private static class Crc32Holder {
256     static final HashFunction CRC_32 =
257         checksumHashFunction(ChecksumType.CRC_32, "Hashing.crc32()");
258   }
259 
260   /**
261    * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits) by
262    * delegating to the {@link Adler32} {@link Checksum}.
263    *
264    * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a
265    * {@code HashCode} produced by this function, use {@link HashCode#padToLong()}.
266    *
267    * @since 14.0
268    */
adler32()269   public static HashFunction adler32() {
270     return Adler32Holder.ADLER_32;
271   }
272 
273   private static class Adler32Holder {
274     static final HashFunction ADLER_32 =
275         checksumHashFunction(ChecksumType.ADLER_32, "Hashing.adler32()");
276   }
277 
checksumHashFunction(ChecksumType type, String toString)278   private static HashFunction checksumHashFunction(ChecksumType type, String toString) {
279     return new ChecksumHashFunction(type, type.bits, toString);
280   }
281 
282   enum ChecksumType implements Supplier<Checksum> {
283     CRC_32(32) {
284       @Override
get()285       public Checksum get() {
286         return new CRC32();
287       }
288     },
289     ADLER_32(32) {
290       @Override
get()291       public Checksum get() {
292         return new Adler32();
293       }
294     };
295 
296     private final int bits;
297 
ChecksumType(int bits)298     ChecksumType(int bits) {
299       this.bits = bits;
300     }
301 
302     @Override
get()303     public abstract Checksum get();
304   }
305 
306   /**
307    * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform
308    * manner that minimizes the need for remapping as {@code buckets} grows. That is,
309    * {@code consistentHash(h, n)} equals:
310    *
311    * <ul>
312    * <li>{@code n - 1}, with approximate probability {@code 1/n}
313    * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
314    * </ul>
315    *
316    * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
317    * article on consistent hashing</a> for more information.
318    */
consistentHash(HashCode hashCode, int buckets)319   public static int consistentHash(HashCode hashCode, int buckets) {
320     return consistentHash(hashCode.padToLong(), buckets);
321   }
322 
323   /**
324    * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform
325    * manner that minimizes the need for remapping as {@code buckets} grows. That is,
326    * {@code consistentHash(h, n)} equals:
327    *
328    * <ul>
329    * <li>{@code n - 1}, with approximate probability {@code 1/n}
330    * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
331    * </ul>
332    *
333    * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
334    * article on consistent hashing</a> for more information.
335    */
consistentHash(long input, int buckets)336   public static int consistentHash(long input, int buckets) {
337     checkArgument(buckets > 0, "buckets must be positive: %s", buckets);
338     LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input);
339     int candidate = 0;
340     int next;
341 
342     // Jump from bucket to bucket until we go out of range
343     while (true) {
344       next = (int) ((candidate + 1) / generator.nextDouble());
345       if (next >= 0 && next < buckets) {
346         candidate = next;
347       } else {
348         return candidate;
349       }
350     }
351   }
352 
353   /**
354    * Returns a hash code, having the same bit length as each of the input hash codes,
355    * that combines the information of these hash codes in an ordered fashion. That
356    * is, whenever two equal hash codes are produced by two calls to this method, it
357    * is <i>as likely as possible</i> that each was computed from the <i>same</i>
358    * input hash codes in the <i>same</i> order.
359    *
360    * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes
361    *     do not all have the same bit length
362    */
combineOrdered(Iterable<HashCode> hashCodes)363   public static HashCode combineOrdered(Iterable<HashCode> hashCodes) {
364     Iterator<HashCode> iterator = hashCodes.iterator();
365     checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
366     int bits = iterator.next().bits();
367     byte[] resultBytes = new byte[bits / 8];
368     for (HashCode hashCode : hashCodes) {
369       byte[] nextBytes = hashCode.asBytes();
370       checkArgument(nextBytes.length == resultBytes.length,
371           "All hashcodes must have the same bit length.");
372       for (int i = 0; i < nextBytes.length; i++) {
373         resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]);
374       }
375     }
376     return HashCode.fromBytesNoCopy(resultBytes);
377   }
378 
379   /**
380    * Returns a hash code, having the same bit length as each of the input hash codes,
381    * that combines the information of these hash codes in an unordered fashion. That
382    * is, whenever two equal hash codes are produced by two calls to this method, it
383    * is <i>as likely as possible</i> that each was computed from the <i>same</i>
384    * input hash codes in <i>some</i> order.
385    *
386    * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes
387    *     do not all have the same bit length
388    */
combineUnordered(Iterable<HashCode> hashCodes)389   public static HashCode combineUnordered(Iterable<HashCode> hashCodes) {
390     Iterator<HashCode> iterator = hashCodes.iterator();
391     checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
392     byte[] resultBytes = new byte[iterator.next().bits() / 8];
393     for (HashCode hashCode : hashCodes) {
394       byte[] nextBytes = hashCode.asBytes();
395       checkArgument(nextBytes.length == resultBytes.length,
396           "All hashcodes must have the same bit length.");
397       for (int i = 0; i < nextBytes.length; i++) {
398         resultBytes[i] += nextBytes[i];
399       }
400     }
401     return HashCode.fromBytesNoCopy(resultBytes);
402   }
403 
404   /**
405    * Checks that the passed argument is positive, and ceils it to a multiple of 32.
406    */
checkPositiveAndMakeMultipleOf32(int bits)407   static int checkPositiveAndMakeMultipleOf32(int bits) {
408     checkArgument(bits > 0, "Number of bits must be positive");
409     return (bits + 31) & ~31;
410   }
411 
412   // TODO(kevinb): Maybe expose this class via a static Hashing method?
413   @VisibleForTesting
414   static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction {
415     private final int bits;
416 
ConcatenatedHashFunction(HashFunction... functions)417     ConcatenatedHashFunction(HashFunction... functions) {
418       super(functions);
419       int bitSum = 0;
420       for (HashFunction function : functions) {
421         bitSum += function.bits();
422       }
423       this.bits = bitSum;
424     }
425 
426     @Override
makeHash(Hasher[] hashers)427     HashCode makeHash(Hasher[] hashers) {
428       byte[] bytes = new byte[bits / 8];
429       int i = 0;
430       for (Hasher hasher : hashers) {
431         HashCode newHash = hasher.hash();
432         i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8);
433       }
434       return HashCode.fromBytesNoCopy(bytes);
435     }
436 
437     @Override
bits()438     public int bits() {
439       return bits;
440     }
441 
442     @Override
equals(@ullable Object object)443     public boolean equals(@Nullable Object object) {
444       if (object instanceof ConcatenatedHashFunction) {
445         ConcatenatedHashFunction other = (ConcatenatedHashFunction) object;
446         if (bits != other.bits || functions.length != other.functions.length) {
447           return false;
448         }
449         for (int i = 0; i < functions.length; i++) {
450           if (!functions[i].equals(other.functions[i])) {
451             return false;
452           }
453         }
454         return true;
455       }
456       return false;
457     }
458 
459     @Override
hashCode()460     public int hashCode() {
461       int hash = bits;
462       for (HashFunction function : functions) {
463         hash ^= function.hashCode();
464       }
465       return hash;
466     }
467   }
468 
469   /**
470    * Linear CongruentialGenerator to use for consistent hashing.
471    * See http://en.wikipedia.org/wiki/Linear_congruential_generator
472    */
473   private static final class LinearCongruentialGenerator {
474     private long state;
475 
LinearCongruentialGenerator(long seed)476     public LinearCongruentialGenerator(long seed) {
477       this.state = seed;
478     }
479 
nextDouble()480     public double nextDouble() {
481       state = 2862933555777941757L * state + 1;
482       return ((double) ((int) (state >>> 33) + 1)) / (0x1.0p31);
483     }
484   }
485 
Hashing()486   private Hashing() {}
487 }
488