• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 
5 #include "leveldb/filter_policy.h"
6 
7 #include "leveldb/slice.h"
8 #include "util/hash.h"
9 
10 namespace leveldb {
11 
12 namespace {
BloomHash(const Slice & key)13 static uint32_t BloomHash(const Slice& key) {
14   return Hash(key.data(), key.size(), 0xbc9f1d34);
15 }
16 
17 class BloomFilterPolicy : public FilterPolicy {
18  private:
19   size_t bits_per_key_;
20   size_t k_;
21 
22  public:
BloomFilterPolicy(int bits_per_key)23   explicit BloomFilterPolicy(int bits_per_key)
24       : bits_per_key_(bits_per_key) {
25     // We intentionally round down to reduce probing cost a little bit
26     k_ = static_cast<size_t>(bits_per_key * 0.69);  // 0.69 =~ ln(2)
27     if (k_ < 1) k_ = 1;
28     if (k_ > 30) k_ = 30;
29   }
30 
Name() const31   virtual const char* Name() const {
32     return "leveldb.BuiltinBloomFilter";
33   }
34 
CreateFilter(const Slice * keys,int n,std::string * dst) const35   virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
36     // Compute bloom filter size (in both bits and bytes)
37     size_t bits = n * bits_per_key_;
38 
39     // For small n, we can see a very high false positive rate.  Fix it
40     // by enforcing a minimum bloom filter length.
41     if (bits < 64) bits = 64;
42 
43     size_t bytes = (bits + 7) / 8;
44     bits = bytes * 8;
45 
46     const size_t init_size = dst->size();
47     dst->resize(init_size + bytes, 0);
48     dst->push_back(static_cast<char>(k_));  // Remember # of probes in filter
49     char* array = &(*dst)[init_size];
50     for (size_t i = 0; i < n; i++) {
51       // Use double-hashing to generate a sequence of hash values.
52       // See analysis in [Kirsch,Mitzenmacher 2006].
53       uint32_t h = BloomHash(keys[i]);
54       const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
55       for (size_t j = 0; j < k_; j++) {
56         const uint32_t bitpos = h % bits;
57         array[bitpos/8] |= (1 << (bitpos % 8));
58         h += delta;
59       }
60     }
61   }
62 
KeyMayMatch(const Slice & key,const Slice & bloom_filter) const63   virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
64     const size_t len = bloom_filter.size();
65     if (len < 2) return false;
66 
67     const char* array = bloom_filter.data();
68     const size_t bits = (len - 1) * 8;
69 
70     // Use the encoded k so that we can read filters generated by
71     // bloom filters created using different parameters.
72     const size_t k = array[len-1];
73     if (k > 30) {
74       // Reserved for potentially new encodings for short bloom filters.
75       // Consider it a match.
76       return true;
77     }
78 
79     uint32_t h = BloomHash(key);
80     const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
81     for (size_t j = 0; j < k; j++) {
82       const uint32_t bitpos = h % bits;
83       if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
84       h += delta;
85     }
86     return true;
87   }
88 };
89 }
90 
NewBloomFilterPolicy(int bits_per_key)91 const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
92   return new BloomFilterPolicy(bits_per_key);
93 }
94 
95 }  // namespace leveldb
96