• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Guava Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.common.hash;
16 
17 import static com.google.common.base.Preconditions.checkArgument;
18 
19 import com.google.common.math.LongMath;
20 import com.google.common.primitives.Ints;
21 import com.google.common.primitives.Longs;
22 
23 import java.math.RoundingMode;
24 import java.util.Arrays;
25 
26 /**
27  * Collections of strategies of generating the k * log(M) bits required for an element to
28  * be mapped to a BloomFilter of M bits and k hash functions. These
29  * strategies are part of the serialized form of the Bloom filters that use them, thus they must be
30  * preserved as is (no updates allowed, only introduction of new versions).
31  *
32  * Important: the order of the constants cannot change, and they cannot be deleted - we depend
33  * on their ordinal for BloomFilter serialization.
34  *
35  * @author Dimitris Andreou
36  * @author Kurt Alfred Kluever
37  */
38 enum BloomFilterStrategies implements BloomFilter.Strategy {
39   /**
40    * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and
41    * Michael Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the
42    * performance of a Bloom filter (yet only needs two 32bit hash functions).
43    */
MURMUR128_MITZ_32()44   MURMUR128_MITZ_32() {
45     @Override public <T> boolean put(T object, Funnel<? super T> funnel,
46         int numHashFunctions, BitArray bits) {
47       long bitSize = bits.bitSize();
48       long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
49       int hash1 = (int) hash64;
50       int hash2 = (int) (hash64 >>> 32);
51 
52       boolean bitsChanged = false;
53       for (int i = 1; i <= numHashFunctions; i++) {
54         int combinedHash = hash1 + (i * hash2);
55         // Flip all the bits if it's negative (guaranteed positive number)
56         if (combinedHash < 0) {
57           combinedHash = ~combinedHash;
58         }
59         bitsChanged |= bits.set(combinedHash % bitSize);
60       }
61       return bitsChanged;
62     }
63 
64     @Override public <T> boolean mightContain(T object, Funnel<? super T> funnel,
65         int numHashFunctions, BitArray bits) {
66       long bitSize = bits.bitSize();
67       long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong();
68       int hash1 = (int) hash64;
69       int hash2 = (int) (hash64 >>> 32);
70 
71       for (int i = 1; i <= numHashFunctions; i++) {
72         int combinedHash = hash1 + (i * hash2);
73         // Flip all the bits if it's negative (guaranteed positive number)
74         if (combinedHash < 0) {
75           combinedHash = ~combinedHash;
76         }
77         if (!bits.get(combinedHash % bitSize)) {
78           return false;
79         }
80       }
81       return true;
82     }
83   },
84   /**
85    * This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing. It looks
86    * different than the implementation in MURMUR128_MITZ_32 because we're avoiding the
87    * multiplication in the loop and doing a (much simpler) += hash2. We're also changing the
88    * index to a positive number by AND'ing with Long.MAX_VALUE instead of flipping the bits.
89    */
MURMUR128_MITZ_64()90   MURMUR128_MITZ_64() {
91     @Override
92     public <T> boolean put(T object, Funnel<? super T> funnel,
93         int numHashFunctions, BitArray bits) {
94       long bitSize = bits.bitSize();
95       byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
96       long hash1 = lowerEight(bytes);
97       long hash2 = upperEight(bytes);
98 
99       boolean bitsChanged = false;
100       long combinedHash = hash1;
101       for (int i = 0; i < numHashFunctions; i++) {
102         // Make the combined hash positive and indexable
103         bitsChanged |= bits.set((combinedHash & Long.MAX_VALUE) % bitSize);
104         combinedHash += hash2;
105       }
106       return bitsChanged;
107     }
108 
109     @Override
110     public <T> boolean mightContain(T object, Funnel<? super T> funnel,
111         int numHashFunctions, BitArray bits) {
112       long bitSize = bits.bitSize();
113       byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal();
114       long hash1 = lowerEight(bytes);
115       long hash2 = upperEight(bytes);
116 
117       long combinedHash = hash1;
118       for (int i = 0; i < numHashFunctions; i++) {
119         // Make the combined hash positive and indexable
120         if (!bits.get((combinedHash & Long.MAX_VALUE) % bitSize)) {
121           return false;
122         }
123         combinedHash += hash2;
124       }
125       return true;
126     }
127 
128     private /* static */ long lowerEight(byte[] bytes) {
129       return Longs.fromBytes(
130           bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
131     }
132 
133     private /* static */ long upperEight(byte[] bytes) {
134       return Longs.fromBytes(
135           bytes[15], bytes[14], bytes[13], bytes[12], bytes[11], bytes[10], bytes[9], bytes[8]);
136     }
137   };
138 
139   // Note: We use this instead of java.util.BitSet because we need access to the long[] data field
140   static final class BitArray {
141     final long[] data;
142     long bitCount;
143 
BitArray(long bits)144     BitArray(long bits) {
145       this(new long[Ints.checkedCast(LongMath.divide(bits, 64, RoundingMode.CEILING))]);
146     }
147 
148     // Used by serialization
BitArray(long[] data)149     BitArray(long[] data) {
150       checkArgument(data.length > 0, "data length is zero!");
151       this.data = data;
152       long bitCount = 0;
153       for (long value : data) {
154         bitCount += Long.bitCount(value);
155       }
156       this.bitCount = bitCount;
157     }
158 
159     /** Returns true if the bit changed value. */
set(long index)160     boolean set(long index) {
161       if (!get(index)) {
162         data[(int) (index >>> 6)] |= (1L << index);
163         bitCount++;
164         return true;
165       }
166       return false;
167     }
168 
get(long index)169     boolean get(long index) {
170       return (data[(int) (index >>> 6)] & (1L << index)) != 0;
171     }
172 
173     /** Number of bits */
bitSize()174     long bitSize() {
175       return (long) data.length * Long.SIZE;
176     }
177 
178     /** Number of set bits (1s) */
bitCount()179     long bitCount() {
180       return bitCount;
181     }
182 
copy()183     BitArray copy() {
184       return new BitArray(data.clone());
185     }
186 
187     /** Combines the two BitArrays using bitwise OR. */
putAll(BitArray array)188     void putAll(BitArray array) {
189       checkArgument(data.length == array.data.length,
190           "BitArrays must be of equal length (%s != %s)", data.length, array.data.length);
191       bitCount = 0;
192       for (int i = 0; i < data.length; i++) {
193         data[i] |= array.data[i];
194         bitCount += Long.bitCount(data[i]);
195       }
196     }
197 
equals(Object o)198     @Override public boolean equals(Object o) {
199       if (o instanceof BitArray) {
200         BitArray bitArray = (BitArray) o;
201         return Arrays.equals(data, bitArray.data);
202       }
203       return false;
204     }
205 
hashCode()206     @Override public int hashCode() {
207       return Arrays.hashCode(data);
208     }
209   }
210 }
211