• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
11 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
12 
13 #include "llvm/ADT/SparseBitVector.h"
14 #include "llvm/ADT/iterator.h"
15 #include "llvm/DebugInfo/PDB/Native/RawError.h"
16 #include "llvm/Support/BinaryStreamReader.h"
17 #include "llvm/Support/BinaryStreamWriter.h"
18 #include "llvm/Support/Endian.h"
19 #include "llvm/Support/Error.h"
20 #include <cstdint>
21 #include <iterator>
22 #include <utility>
23 #include <vector>
24 
25 namespace llvm {
26 
27 class BinaryStreamReader;
28 class BinaryStreamWriter;
29 
30 namespace pdb {
31 
32 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
33 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
34 
35 template <typename ValueT, typename TraitsT> class HashTable;
36 
37 template <typename ValueT, typename TraitsT>
38 class HashTableIterator
39     : public iterator_facade_base<HashTableIterator<ValueT, TraitsT>,
40                                   std::forward_iterator_tag,
41                                   std::pair<uint32_t, ValueT>> {
42   friend HashTable<ValueT, TraitsT>;
43 
HashTableIterator(const HashTable<ValueT,TraitsT> & Map,uint32_t Index,bool IsEnd)44   HashTableIterator(const HashTable<ValueT, TraitsT> &Map, uint32_t Index,
45                     bool IsEnd)
46       : Map(&Map), Index(Index), IsEnd(IsEnd) {}
47 
48 public:
HashTableIterator(const HashTable<ValueT,TraitsT> & Map)49   HashTableIterator(const HashTable<ValueT, TraitsT> &Map) : Map(&Map) {
50     int I = Map.Present.find_first();
51     if (I == -1) {
52       Index = 0;
53       IsEnd = true;
54     } else {
55       Index = static_cast<uint32_t>(I);
56       IsEnd = false;
57     }
58   }
59 
60   HashTableIterator &operator=(const HashTableIterator &R) {
61     Map = R.Map;
62     return *this;
63   }
64   bool operator==(const HashTableIterator &R) const {
65     if (IsEnd && R.IsEnd)
66       return true;
67     if (IsEnd != R.IsEnd)
68       return false;
69 
70     return (Map == R.Map) && (Index == R.Index);
71   }
72   const std::pair<uint32_t, ValueT> &operator*() const {
73     assert(Map->Present.test(Index));
74     return Map->Buckets[Index];
75   }
76   HashTableIterator &operator++() {
77     while (Index < Map->Buckets.size()) {
78       ++Index;
79       if (Map->Present.test(Index))
80         return *this;
81     }
82 
83     IsEnd = true;
84     return *this;
85   }
86 
87 private:
isEnd()88   bool isEnd() const { return IsEnd; }
index()89   uint32_t index() const { return Index; }
90 
91   const HashTable<ValueT, TraitsT> *Map;
92   uint32_t Index;
93   bool IsEnd;
94 };
95 
96 template <typename T> struct PdbHashTraits {};
97 
98 template <> struct PdbHashTraits<uint32_t> {
99   uint32_t hashLookupKey(uint32_t N) const { return N; }
100   uint32_t storageKeyToLookupKey(uint32_t N) const { return N; }
101   uint32_t lookupKeyToStorageKey(uint32_t N) { return N; }
102 };
103 
104 template <typename ValueT, typename TraitsT = PdbHashTraits<ValueT>>
105 class HashTable {
106   using iterator = HashTableIterator<ValueT, TraitsT>;
107   friend iterator;
108 
109   struct Header {
110     support::ulittle32_t Size;
111     support::ulittle32_t Capacity;
112   };
113 
114   using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
115 
116 public:
117   HashTable() { Buckets.resize(8); }
118 
119   explicit HashTable(TraitsT Traits) : HashTable(8, std::move(Traits)) {}
120   HashTable(uint32_t Capacity, TraitsT Traits) : Traits(Traits) {
121     Buckets.resize(Capacity);
122   }
123 
124   Error load(BinaryStreamReader &Stream) {
125     const Header *H;
126     if (auto EC = Stream.readObject(H))
127       return EC;
128     if (H->Capacity == 0)
129       return make_error<RawError>(raw_error_code::corrupt_file,
130                                   "Invalid Hash Table Capacity");
131     if (H->Size > maxLoad(H->Capacity))
132       return make_error<RawError>(raw_error_code::corrupt_file,
133                                   "Invalid Hash Table Size");
134 
135     Buckets.resize(H->Capacity);
136 
137     if (auto EC = readSparseBitVector(Stream, Present))
138       return EC;
139     if (Present.count() != H->Size)
140       return make_error<RawError>(raw_error_code::corrupt_file,
141                                   "Present bit vector does not match size!");
142 
143     if (auto EC = readSparseBitVector(Stream, Deleted))
144       return EC;
145     if (Present.intersects(Deleted))
146       return make_error<RawError>(raw_error_code::corrupt_file,
147                                   "Present bit vector interesects deleted!");
148 
149     for (uint32_t P : Present) {
150       if (auto EC = Stream.readInteger(Buckets[P].first))
151         return EC;
152       const ValueT *Value;
153       if (auto EC = Stream.readObject(Value))
154         return EC;
155       Buckets[P].second = *Value;
156     }
157 
158     return Error::success();
159   }
160 
161   uint32_t calculateSerializedLength() const {
162     uint32_t Size = sizeof(Header);
163 
164     constexpr int BitsPerWord = 8 * sizeof(uint32_t);
165 
166     int NumBitsP = Present.find_last() + 1;
167     int NumBitsD = Deleted.find_last() + 1;
168 
169     uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
170     uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
171 
172     // Present bit set number of words (4 bytes), followed by that many actual
173     // words (4 bytes each).
174     Size += sizeof(uint32_t);
175     Size += NumWordsP * sizeof(uint32_t);
176 
177     // Deleted bit set number of words (4 bytes), followed by that many actual
178     // words (4 bytes each).
179     Size += sizeof(uint32_t);
180     Size += NumWordsD * sizeof(uint32_t);
181 
182     // One (Key, ValueT) pair for each entry Present.
183     Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
184 
185     return Size;
186   }
187 
188   Error commit(BinaryStreamWriter &Writer) const {
189     Header H;
190     H.Size = size();
191     H.Capacity = capacity();
192     if (auto EC = Writer.writeObject(H))
193       return EC;
194 
195     if (auto EC = writeSparseBitVector(Writer, Present))
196       return EC;
197 
198     if (auto EC = writeSparseBitVector(Writer, Deleted))
199       return EC;
200 
201     for (const auto &Entry : *this) {
202       if (auto EC = Writer.writeInteger(Entry.first))
203         return EC;
204       if (auto EC = Writer.writeObject(Entry.second))
205         return EC;
206     }
207     return Error::success();
208   }
209 
210   void clear() {
211     Buckets.resize(8);
212     Present.clear();
213     Deleted.clear();
214   }
215 
216   bool empty() const { return size() == 0; }
217   uint32_t capacity() const { return Buckets.size(); }
218   uint32_t size() const { return Present.count(); }
219 
220   iterator begin() const { return iterator(*this); }
221   iterator end() const { return iterator(*this, 0, true); }
222 
223   /// Find the entry whose key has the specified hash value, using the specified
224   /// traits defining hash function and equality.
225   template <typename Key> iterator find_as(const Key &K) const {
226     uint32_t H = Traits.hashLookupKey(K) % capacity();
227     uint32_t I = H;
228     Optional<uint32_t> FirstUnused;
229     do {
230       if (isPresent(I)) {
231         if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
232           return iterator(*this, I, false);
233       } else {
234         if (!FirstUnused)
235           FirstUnused = I;
236         // Insertion occurs via linear probing from the slot hint, and will be
237         // inserted at the first empty / deleted location.  Therefore, if we are
238         // probing and find a location that is neither present nor deleted, then
239         // nothing must have EVER been inserted at this location, and thus it is
240         // not possible for a matching value to occur later.
241         if (!isDeleted(I))
242           break;
243       }
244       I = (I + 1) % capacity();
245     } while (I != H);
246 
247     // The only way FirstUnused would not be set is if every single entry in the
248     // table were Present.  But this would violate the load factor constraints
249     // that we impose, so it should never happen.
250     assert(FirstUnused);
251     return iterator(*this, *FirstUnused, true);
252   }
253 
254   /// Set the entry using a key type that the specified Traits can convert
255   /// from a real key to an internal key.
256   template <typename Key> bool set_as(const Key &K, ValueT V) {
257     return set_as_internal(K, std::move(V), None);
258   }
259 
260   template <typename Key> ValueT get(const Key &K) const {
261     auto Iter = find_as(K);
262     assert(Iter != end());
263     return (*Iter).second;
264   }
265 
266 protected:
267   bool isPresent(uint32_t K) const { return Present.test(K); }
268   bool isDeleted(uint32_t K) const { return Deleted.test(K); }
269 
270   TraitsT Traits;
271   BucketList Buckets;
272   mutable SparseBitVector<> Present;
273   mutable SparseBitVector<> Deleted;
274 
275 private:
276   /// Set the entry using a key type that the specified Traits can convert
277   /// from a real key to an internal key.
278   template <typename Key>
279   bool set_as_internal(const Key &K, ValueT V, Optional<uint32_t> InternalKey) {
280     auto Entry = find_as(K);
281     if (Entry != end()) {
282       assert(isPresent(Entry.index()));
283       assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
284       // We're updating, no need to do anything special.
285       Buckets[Entry.index()].second = V;
286       return false;
287     }
288 
289     auto &B = Buckets[Entry.index()];
290     assert(!isPresent(Entry.index()));
291     assert(Entry.isEnd());
292     B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
293     B.second = V;
294     Present.set(Entry.index());
295     Deleted.reset(Entry.index());
296 
297     grow();
298 
299     assert((find_as(K)) != end());
300     return true;
301   }
302 
303   static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
304 
305   void grow() {
306     uint32_t S = size();
307     uint32_t MaxLoad = maxLoad(capacity());
308     if (S < maxLoad(capacity()))
309       return;
310     assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
311 
312     uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
313 
314     // Growing requires rebuilding the table and re-hashing every item.  Make a
315     // copy with a larger capacity, insert everything into the copy, then swap
316     // it in.
317     HashTable NewMap(NewCapacity, Traits);
318     for (auto I : Present) {
319       auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
320       NewMap.set_as_internal(LookupKey, Buckets[I].second, Buckets[I].first);
321     }
322 
323     Buckets.swap(NewMap.Buckets);
324     std::swap(Present, NewMap.Present);
325     std::swap(Deleted, NewMap.Deleted);
326     assert(capacity() == NewCapacity);
327     assert(size() == S);
328   }
329 };
330 
331 } // end namespace pdb
332 
333 } // end namespace llvm
334 
335 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
336