• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 // Licensed under the Apache License, Version 2.0 (the "License");
3 // you may not use this file except in compliance with the License.
4 // You may obtain a copy of the License at
5 //
6 //     http://www.apache.org/licenses/LICENSE-2.0
7 //
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
14 // Copyright 2005-2010 Google, Inc.
15 // Author: Jeffrey Soresnen (sorenj@google.com)
16 
17 #include <fst/extensions/ngram/bitmap-index.h>
18 
19 #include <algorithm>
20 #include <iterator>
21 
22 #include <fst/extensions/ngram/nthbit.h>
23 
24 namespace fst {
25 
26 // These two internal classes implemented inverted views of the
27 // primary and secondary indexes.  That is, they provide iterators
28 // that have operator*'s that return the number zeros rather than
29 // the number of ones.
30 
31 class primary_index_inverted : public vector<uint32>::const_iterator {
32  public:
primary_index_inverted()33   primary_index_inverted() {}
primary_index_inverted(vector<uint32>::const_iterator loc,vector<uint32>::const_iterator begin)34   primary_index_inverted(vector<uint32>::const_iterator loc,
35                          vector<uint32>::const_iterator begin) :
36     vector<uint32>::const_iterator(loc), begin_(begin) {}
operator *()37   uint32 operator*() {
38     return BitmapIndex::kStorageBitSize * BitmapIndex::kSecondaryBlockSize *
39            (1 + std::distance<vector<uint32>::const_iterator>(begin_, *this)) -
40            vector<uint32>::const_iterator::operator*();
41   }
42  private:
43   vector<uint32>::const_iterator begin_;
44 };
45 
46 class secondary_index_inverted : public vector<uint16>::const_iterator {
47  public:
secondary_index_inverted()48   secondary_index_inverted() : vector<uint16>::const_iterator() {}
secondary_index_inverted(vector<uint16>::const_iterator loc,vector<uint16>::const_iterator block_begin)49   secondary_index_inverted(vector<uint16>::const_iterator loc,
50                            vector<uint16>::const_iterator block_begin) :
51     vector<uint16>::const_iterator(loc), block_begin_(block_begin) {}
operator *()52   uint16 operator*() {
53     return ((1 + std::distance<vector<uint16>::const_iterator>(
54         block_begin_, *this)) << BitmapIndex::kStorageLogBitSize) -
55         vector<uint16>::const_iterator::operator*();
56   }
57  private:
58   vector<uint16>::const_iterator block_begin_;
59 };
60 
Rank1(size_t end) const61 size_t BitmapIndex::Rank1(size_t end) const {
62   if (end == 0) return 0;
63   CHECK_LE(end, Bits());
64   const uint32 end_word = (end - 1) >> BitmapIndex::kStorageLogBitSize;
65   const uint32 sum = get_index_ones_count(end_word);
66   const uint64 zero = 0;
67   const uint64 ones = ~zero;
68   return sum + __builtin_popcountll(bits_[end_word] &
69       (ones >> (kStorageBitSize - (end & kStorageBlockMask))));
70 }
71 
Select1(size_t bit_index) const72 size_t BitmapIndex::Select1(size_t bit_index) const {
73   if (bit_index >= GetOnesCount()) return Bits();
74   // search primary index for the relevant block
75   uint32 rembits = bit_index + 1;
76   const uint32 block = find_primary_block(bit_index + 1);
77   uint32 offset = 0;
78   if (block > 0) {
79     rembits -= primary_index_[block - 1];
80     offset += block * kSecondaryBlockSize;
81   }
82   // search the secondary index
83   uint32 word = find_secondary_block(offset, rembits);
84   if (word > 0) {
85     rembits -= secondary_index_[offset + word - 1];
86     offset += word;
87   }
88   int nth = nth_bit(bits_[offset], rembits);
89   return (offset << BitmapIndex::kStorageLogBitSize) + nth;
90 }
91 
Select0(size_t bit_index) const92 size_t BitmapIndex::Select0(size_t bit_index) const {
93   if (bit_index >= Bits() - GetOnesCount()) return Bits();
94   // search inverted primary index for relevant block
95   uint32 remzeros = bit_index + 1;
96   uint32 offset = 0;
97   const uint32 block = find_inverted_primary_block(bit_index + 1);
98   if (block > 0) {
99     remzeros -= *primary_index_inverted(primary_index_.begin() + block - 1,
100                                         primary_index_.begin());
101     offset += block * kSecondaryBlockSize;
102   }
103   // search the inverted secondary index
104   uint32 word = find_inverted_secondary_block(offset, remzeros);
105   if (word > 0) {
106     vector<uint16>::const_iterator block_begin =
107         secondary_index_.begin() + offset;
108     remzeros -= *secondary_index_inverted(block_begin + word - 1, block_begin);
109     offset += word;
110   }
111   int nth = nth_bit(~bits_[offset], remzeros);
112   return (offset << BitmapIndex::kStorageLogBitSize) + nth;
113 }
114 
get_index_ones_count(size_t array_index) const115 size_t BitmapIndex::get_index_ones_count(size_t array_index) const {
116   uint32 sum = 0;
117   if (array_index > 0) {
118     sum += secondary_index_[array_index-1];
119     uint32 end_block = (array_index - 1) / kSecondaryBlockSize;
120     if (end_block > 0) sum += primary_index_[end_block-1];
121   }
122   return sum;
123 }
124 
BuildIndex(const uint64 * bits,size_t size)125 void BitmapIndex::BuildIndex(const uint64 *bits, size_t size) {
126   bits_ = bits;
127   size_ = size;
128   secondary_index_.clear();
129   secondary_index_.reserve(ArraySize());
130   primary_index_.clear();
131   primary_index_.reserve(primary_index_size());
132   const uint64 zero = 0;
133   const uint64 ones = ~zero;
134   uint32 popcount = 0;
135   for (uint32 block_begin = 0; block_begin < ArraySize();
136        block_begin += kSecondaryBlockSize) {
137     uint32 block_popcount = 0;
138     uint32 block_end = block_begin + kSecondaryBlockSize;
139     if (block_end > ArraySize()) block_end = ArraySize();
140     for (uint32 j = block_begin; j < block_end; ++j) {
141       uint64 mask = ones;
142       if (j == ArraySize() - 1) {
143         mask = ones >> (-size_ & BitmapIndex::kStorageBlockMask);
144       }
145       block_popcount += __builtin_popcountll(bits_[j] & mask);
146       secondary_index_.push_back(block_popcount);
147     }
148     popcount += block_popcount;
149     primary_index_.push_back(popcount);
150   }
151 }
152 
find_secondary_block(size_t block_begin,size_t rem_bit_index) const153 size_t BitmapIndex::find_secondary_block(
154     size_t block_begin, size_t rem_bit_index) const {
155   size_t block_end = block_begin + kSecondaryBlockSize;
156   if (block_end > secondary_index_.size()) block_end = secondary_index_.size();
157   return std::distance(secondary_index_.begin() + block_begin,
158                        std::lower_bound(secondary_index_.begin() + block_begin,
159                                         secondary_index_.begin() + block_end,
160                                         rem_bit_index));
161 }
162 
find_inverted_secondary_block(size_t block_begin,size_t rem_bit_index) const163 size_t BitmapIndex::find_inverted_secondary_block(
164     size_t block_begin, size_t rem_bit_index) const {
165   size_t block_end = block_begin + kSecondaryBlockSize;
166   if (block_end > secondary_index_.size()) block_end = secondary_index_.size();
167   secondary_index_inverted start(secondary_index_.begin() + block_begin,
168                                  secondary_index_.begin() + block_begin);
169   secondary_index_inverted end(secondary_index_.begin() + block_end,
170                                secondary_index_.begin() + block_begin);
171   return std::distance(start,
172                        std::lower_bound(start, end, rem_bit_index));
173 }
174 
find_primary_block(size_t bit_index) const175 inline size_t BitmapIndex::find_primary_block(size_t bit_index) const {
176   return std::distance(primary_index_.begin(),
177                        std::lower_bound(primary_index_.begin(),
178                                         primary_index_.end(), bit_index));
179 }
180 
find_inverted_primary_block(size_t bit_index) const181 size_t BitmapIndex::find_inverted_primary_block(size_t bit_index) const {
182   primary_index_inverted start(primary_index_.begin(), primary_index_.begin());
183   primary_index_inverted end(primary_index_.end(), primary_index_.begin());
184   return std::distance(start, std::lower_bound(start, end, bit_index));
185 }
186 
187 }  // end namespace fst
188