1 /* 2 * Copyright (C) 2011 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 package com.google.common.hash; 16 17 import static com.google.common.base.Preconditions.checkArgument; 18 19 import com.google.common.math.LongMath; 20 import com.google.common.primitives.Ints; 21 import com.google.common.primitives.Longs; 22 23 import java.math.RoundingMode; 24 import java.util.Arrays; 25 26 /** 27 * Collections of strategies of generating the k * log(M) bits required for an element to 28 * be mapped to a BloomFilter of M bits and k hash functions. These 29 * strategies are part of the serialized form of the Bloom filters that use them, thus they must be 30 * preserved as is (no updates allowed, only introduction of new versions). 31 * 32 * Important: the order of the constants cannot change, and they cannot be deleted - we depend 33 * on their ordinal for BloomFilter serialization. 34 * 35 * @author Dimitris Andreou 36 * @author Kurt Alfred Kluever 37 */ 38 enum BloomFilterStrategies implements BloomFilter.Strategy { 39 /** 40 * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and 41 * Michael Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the 42 * performance of a Bloom filter (yet only needs two 32bit hash functions). 43 */ MURMUR128_MITZ_32()44 MURMUR128_MITZ_32() { 45 @Override public <T> boolean put(T object, Funnel<? super T> funnel, 46 int numHashFunctions, BitArray bits) { 47 long bitSize = bits.bitSize(); 48 long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong(); 49 int hash1 = (int) hash64; 50 int hash2 = (int) (hash64 >>> 32); 51 52 boolean bitsChanged = false; 53 for (int i = 1; i <= numHashFunctions; i++) { 54 int combinedHash = hash1 + (i * hash2); 55 // Flip all the bits if it's negative (guaranteed positive number) 56 if (combinedHash < 0) { 57 combinedHash = ~combinedHash; 58 } 59 bitsChanged |= bits.set(combinedHash % bitSize); 60 } 61 return bitsChanged; 62 } 63 64 @Override public <T> boolean mightContain(T object, Funnel<? super T> funnel, 65 int numHashFunctions, BitArray bits) { 66 long bitSize = bits.bitSize(); 67 long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong(); 68 int hash1 = (int) hash64; 69 int hash2 = (int) (hash64 >>> 32); 70 71 for (int i = 1; i <= numHashFunctions; i++) { 72 int combinedHash = hash1 + (i * hash2); 73 // Flip all the bits if it's negative (guaranteed positive number) 74 if (combinedHash < 0) { 75 combinedHash = ~combinedHash; 76 } 77 if (!bits.get(combinedHash % bitSize)) { 78 return false; 79 } 80 } 81 return true; 82 } 83 }, 84 /** 85 * This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing. It looks 86 * different than the implementation in MURMUR128_MITZ_32 because we're avoiding the 87 * multiplication in the loop and doing a (much simpler) += hash2. We're also changing the 88 * index to a positive number by AND'ing with Long.MAX_VALUE instead of flipping the bits. 89 */ MURMUR128_MITZ_64()90 MURMUR128_MITZ_64() { 91 @Override 92 public <T> boolean put(T object, Funnel<? super T> funnel, 93 int numHashFunctions, BitArray bits) { 94 long bitSize = bits.bitSize(); 95 byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal(); 96 long hash1 = lowerEight(bytes); 97 long hash2 = upperEight(bytes); 98 99 boolean bitsChanged = false; 100 long combinedHash = hash1; 101 for (int i = 0; i < numHashFunctions; i++) { 102 // Make the combined hash positive and indexable 103 bitsChanged |= bits.set((combinedHash & Long.MAX_VALUE) % bitSize); 104 combinedHash += hash2; 105 } 106 return bitsChanged; 107 } 108 109 @Override 110 public <T> boolean mightContain(T object, Funnel<? super T> funnel, 111 int numHashFunctions, BitArray bits) { 112 long bitSize = bits.bitSize(); 113 byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal(); 114 long hash1 = lowerEight(bytes); 115 long hash2 = upperEight(bytes); 116 117 long combinedHash = hash1; 118 for (int i = 0; i < numHashFunctions; i++) { 119 // Make the combined hash positive and indexable 120 if (!bits.get((combinedHash & Long.MAX_VALUE) % bitSize)) { 121 return false; 122 } 123 combinedHash += hash2; 124 } 125 return true; 126 } 127 128 private /* static */ long lowerEight(byte[] bytes) { 129 return Longs.fromBytes( 130 bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]); 131 } 132 133 private /* static */ long upperEight(byte[] bytes) { 134 return Longs.fromBytes( 135 bytes[15], bytes[14], bytes[13], bytes[12], bytes[11], bytes[10], bytes[9], bytes[8]); 136 } 137 }; 138 139 // Note: We use this instead of java.util.BitSet because we need access to the long[] data field 140 static final class BitArray { 141 final long[] data; 142 long bitCount; 143 BitArray(long bits)144 BitArray(long bits) { 145 this(new long[Ints.checkedCast(LongMath.divide(bits, 64, RoundingMode.CEILING))]); 146 } 147 148 // Used by serialization BitArray(long[] data)149 BitArray(long[] data) { 150 checkArgument(data.length > 0, "data length is zero!"); 151 this.data = data; 152 long bitCount = 0; 153 for (long value : data) { 154 bitCount += Long.bitCount(value); 155 } 156 this.bitCount = bitCount; 157 } 158 159 /** Returns true if the bit changed value. */ set(long index)160 boolean set(long index) { 161 if (!get(index)) { 162 data[(int) (index >>> 6)] |= (1L << index); 163 bitCount++; 164 return true; 165 } 166 return false; 167 } 168 get(long index)169 boolean get(long index) { 170 return (data[(int) (index >>> 6)] & (1L << index)) != 0; 171 } 172 173 /** Number of bits */ bitSize()174 long bitSize() { 175 return (long) data.length * Long.SIZE; 176 } 177 178 /** Number of set bits (1s) */ bitCount()179 long bitCount() { 180 return bitCount; 181 } 182 copy()183 BitArray copy() { 184 return new BitArray(data.clone()); 185 } 186 187 /** Combines the two BitArrays using bitwise OR. */ putAll(BitArray array)188 void putAll(BitArray array) { 189 checkArgument(data.length == array.data.length, 190 "BitArrays must be of equal length (%s != %s)", data.length, array.data.length); 191 bitCount = 0; 192 for (int i = 0; i < data.length; i++) { 193 data[i] |= array.data[i]; 194 bitCount += Long.bitCount(data[i]); 195 } 196 } 197 equals(Object o)198 @Override public boolean equals(Object o) { 199 if (o instanceof BitArray) { 200 BitArray bitArray = (BitArray) o; 201 return Arrays.equals(data, bitArray.data); 202 } 203 return false; 204 } 205 hashCode()206 @Override public int hashCode() { 207 return Arrays.hashCode(data); 208 } 209 } 210 } 211