• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23  * THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef BloomFilter_h
27 #define BloomFilter_h
28 
29 #include "wtf/Compiler.h"
30 #include "wtf/text/AtomicString.h"
31 
32 namespace WTF {
33 
34 // Counting bloom filter with k=2 and 8 bit counters. Uses 2^keyBits bytes of memory.
35 // False positive rate is approximately (1-e^(-2n/m))^2, where n is the number of unique
36 // keys and m is the table size (==2^keyBits).
37 template <unsigned keyBits>
38 class BloomFilter {
39 public:
40     COMPILE_ASSERT(keyBits <= 16, bloom_filter_key_size);
41 
42     static const size_t tableSize = 1 << keyBits;
43     static const unsigned keyMask = (1 << keyBits) - 1;
maximumCount()44     static uint8_t maximumCount() { return std::numeric_limits<uint8_t>::max(); }
45 
BloomFilter()46     BloomFilter() { clear(); }
47 
48     void add(unsigned hash);
49     void remove(unsigned hash);
50 
51     // The filter may give false positives (claim it may contain a key it doesn't)
52     // but never false negatives (claim it doesn't contain a key it does).
mayContain(unsigned hash)53     bool mayContain(unsigned hash) const { return firstSlot(hash) && secondSlot(hash); }
54 
55     // The filter must be cleared before reuse even if all keys are removed.
56     // Otherwise overflowed keys will stick around.
57     void clear();
58 
add(const AtomicString & string)59     void add(const AtomicString& string) { add(string.impl()->existingHash()); }
add(const String & string)60     void add(const String& string) { add(string.impl()->hash()); }
remove(const AtomicString & string)61     void remove(const AtomicString& string) { remove(string.impl()->existingHash()); }
remove(const String & string)62     void remove(const String& string) { remove(string.impl()->hash()); }
63 
mayContain(const AtomicString & string)64     bool mayContain(const AtomicString& string) const { return mayContain(string.impl()->existingHash()); }
mayContain(const String & string)65     bool mayContain(const String& string) const { return mayContain(string.impl()->hash()); }
66 
67 #if ASSERT_ENABLED
68     // Slow.
69     bool likelyEmpty() const;
70     bool isClear() const;
71 #endif
72 
73 private:
firstSlot(unsigned hash)74     uint8_t& firstSlot(unsigned hash) { return m_table[hash & keyMask]; }
secondSlot(unsigned hash)75     uint8_t& secondSlot(unsigned hash) { return m_table[(hash >> 16) & keyMask]; }
firstSlot(unsigned hash)76     const uint8_t& firstSlot(unsigned hash) const { return m_table[hash & keyMask]; }
secondSlot(unsigned hash)77     const uint8_t& secondSlot(unsigned hash) const { return m_table[(hash >> 16) & keyMask]; }
78 
79     uint8_t m_table[tableSize];
80 };
81 
82 template <unsigned keyBits>
add(unsigned hash)83 inline void BloomFilter<keyBits>::add(unsigned hash)
84 {
85     uint8_t& first = firstSlot(hash);
86     uint8_t& second = secondSlot(hash);
87     if (LIKELY(first < maximumCount()))
88         ++first;
89     if (LIKELY(second < maximumCount()))
90         ++second;
91 }
92 
93 template <unsigned keyBits>
remove(unsigned hash)94 inline void BloomFilter<keyBits>::remove(unsigned hash)
95 {
96     uint8_t& first = firstSlot(hash);
97     uint8_t& second = secondSlot(hash);
98     ASSERT(first);
99     ASSERT(second);
100     // In case of an overflow, the slot sticks in the table until clear().
101     if (LIKELY(first < maximumCount()))
102         --first;
103     if (LIKELY(second < maximumCount()))
104         --second;
105 }
106 
107 template <unsigned keyBits>
clear()108 inline void BloomFilter<keyBits>::clear()
109 {
110     memset(m_table, 0, tableSize);
111 }
112 
113 #if ASSERT_ENABLED
114 template <unsigned keyBits>
likelyEmpty()115 bool BloomFilter<keyBits>::likelyEmpty() const
116 {
117     for (size_t n = 0; n < tableSize; ++n) {
118         if (m_table[n] && m_table[n] != maximumCount())
119             return false;
120     }
121     return true;
122 }
123 
124 template <unsigned keyBits>
isClear()125 bool BloomFilter<keyBits>::isClear() const
126 {
127     for (size_t n = 0; n < tableSize; ++n) {
128         if (m_table[n])
129             return false;
130     }
131     return true;
132 }
133 #endif
134 
135 }
136 
137 using WTF::BloomFilter;
138 
139 #endif
140