1 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 11 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 12 13 #include "llvm/ADT/SparseBitVector.h" 14 #include "llvm/ADT/iterator.h" 15 #include "llvm/DebugInfo/PDB/Native/RawError.h" 16 #include "llvm/Support/BinaryStreamReader.h" 17 #include "llvm/Support/BinaryStreamWriter.h" 18 #include "llvm/Support/Endian.h" 19 #include "llvm/Support/Error.h" 20 #include <cstdint> 21 #include <iterator> 22 #include <utility> 23 #include <vector> 24 25 namespace llvm { 26 27 class BinaryStreamReader; 28 class BinaryStreamWriter; 29 30 namespace pdb { 31 32 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V); 33 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec); 34 35 template <typename ValueT, typename TraitsT> class HashTable; 36 37 template <typename ValueT, typename TraitsT> 38 class HashTableIterator 39 : public iterator_facade_base<HashTableIterator<ValueT, TraitsT>, 40 std::forward_iterator_tag, 41 std::pair<uint32_t, ValueT>> { 42 friend HashTable<ValueT, TraitsT>; 43 HashTableIterator(const HashTable<ValueT,TraitsT> & Map,uint32_t Index,bool IsEnd)44 HashTableIterator(const HashTable<ValueT, TraitsT> &Map, uint32_t Index, 45 bool IsEnd) 46 : Map(&Map), Index(Index), IsEnd(IsEnd) {} 47 48 public: HashTableIterator(const HashTable<ValueT,TraitsT> & Map)49 HashTableIterator(const HashTable<ValueT, TraitsT> &Map) : Map(&Map) { 50 int I = Map.Present.find_first(); 51 if (I == -1) { 52 Index = 0; 53 IsEnd = true; 54 } else { 55 Index = static_cast<uint32_t>(I); 56 IsEnd = false; 57 } 58 } 59 60 HashTableIterator &operator=(const HashTableIterator &R) { 61 Map = R.Map; 62 return *this; 63 } 64 bool operator==(const HashTableIterator &R) const { 65 if (IsEnd && R.IsEnd) 66 return true; 67 if (IsEnd != R.IsEnd) 68 return false; 69 70 return (Map == R.Map) && (Index == R.Index); 71 } 72 const std::pair<uint32_t, ValueT> &operator*() const { 73 assert(Map->Present.test(Index)); 74 return Map->Buckets[Index]; 75 } 76 HashTableIterator &operator++() { 77 while (Index < Map->Buckets.size()) { 78 ++Index; 79 if (Map->Present.test(Index)) 80 return *this; 81 } 82 83 IsEnd = true; 84 return *this; 85 } 86 87 private: isEnd()88 bool isEnd() const { return IsEnd; } index()89 uint32_t index() const { return Index; } 90 91 const HashTable<ValueT, TraitsT> *Map; 92 uint32_t Index; 93 bool IsEnd; 94 }; 95 96 template <typename T> struct PdbHashTraits {}; 97 98 template <> struct PdbHashTraits<uint32_t> { 99 uint32_t hashLookupKey(uint32_t N) const { return N; } 100 uint32_t storageKeyToLookupKey(uint32_t N) const { return N; } 101 uint32_t lookupKeyToStorageKey(uint32_t N) { return N; } 102 }; 103 104 template <typename ValueT, typename TraitsT = PdbHashTraits<ValueT>> 105 class HashTable { 106 using iterator = HashTableIterator<ValueT, TraitsT>; 107 friend iterator; 108 109 struct Header { 110 support::ulittle32_t Size; 111 support::ulittle32_t Capacity; 112 }; 113 114 using BucketList = std::vector<std::pair<uint32_t, ValueT>>; 115 116 public: 117 HashTable() { Buckets.resize(8); } 118 119 explicit HashTable(TraitsT Traits) : HashTable(8, std::move(Traits)) {} 120 HashTable(uint32_t Capacity, TraitsT Traits) : Traits(Traits) { 121 Buckets.resize(Capacity); 122 } 123 124 Error load(BinaryStreamReader &Stream) { 125 const Header *H; 126 if (auto EC = Stream.readObject(H)) 127 return EC; 128 if (H->Capacity == 0) 129 return make_error<RawError>(raw_error_code::corrupt_file, 130 "Invalid Hash Table Capacity"); 131 if (H->Size > maxLoad(H->Capacity)) 132 return make_error<RawError>(raw_error_code::corrupt_file, 133 "Invalid Hash Table Size"); 134 135 Buckets.resize(H->Capacity); 136 137 if (auto EC = readSparseBitVector(Stream, Present)) 138 return EC; 139 if (Present.count() != H->Size) 140 return make_error<RawError>(raw_error_code::corrupt_file, 141 "Present bit vector does not match size!"); 142 143 if (auto EC = readSparseBitVector(Stream, Deleted)) 144 return EC; 145 if (Present.intersects(Deleted)) 146 return make_error<RawError>(raw_error_code::corrupt_file, 147 "Present bit vector interesects deleted!"); 148 149 for (uint32_t P : Present) { 150 if (auto EC = Stream.readInteger(Buckets[P].first)) 151 return EC; 152 const ValueT *Value; 153 if (auto EC = Stream.readObject(Value)) 154 return EC; 155 Buckets[P].second = *Value; 156 } 157 158 return Error::success(); 159 } 160 161 uint32_t calculateSerializedLength() const { 162 uint32_t Size = sizeof(Header); 163 164 constexpr int BitsPerWord = 8 * sizeof(uint32_t); 165 166 int NumBitsP = Present.find_last() + 1; 167 int NumBitsD = Deleted.find_last() + 1; 168 169 uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord; 170 uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord; 171 172 // Present bit set number of words (4 bytes), followed by that many actual 173 // words (4 bytes each). 174 Size += sizeof(uint32_t); 175 Size += NumWordsP * sizeof(uint32_t); 176 177 // Deleted bit set number of words (4 bytes), followed by that many actual 178 // words (4 bytes each). 179 Size += sizeof(uint32_t); 180 Size += NumWordsD * sizeof(uint32_t); 181 182 // One (Key, ValueT) pair for each entry Present. 183 Size += (sizeof(uint32_t) + sizeof(ValueT)) * size(); 184 185 return Size; 186 } 187 188 Error commit(BinaryStreamWriter &Writer) const { 189 Header H; 190 H.Size = size(); 191 H.Capacity = capacity(); 192 if (auto EC = Writer.writeObject(H)) 193 return EC; 194 195 if (auto EC = writeSparseBitVector(Writer, Present)) 196 return EC; 197 198 if (auto EC = writeSparseBitVector(Writer, Deleted)) 199 return EC; 200 201 for (const auto &Entry : *this) { 202 if (auto EC = Writer.writeInteger(Entry.first)) 203 return EC; 204 if (auto EC = Writer.writeObject(Entry.second)) 205 return EC; 206 } 207 return Error::success(); 208 } 209 210 void clear() { 211 Buckets.resize(8); 212 Present.clear(); 213 Deleted.clear(); 214 } 215 216 bool empty() const { return size() == 0; } 217 uint32_t capacity() const { return Buckets.size(); } 218 uint32_t size() const { return Present.count(); } 219 220 iterator begin() const { return iterator(*this); } 221 iterator end() const { return iterator(*this, 0, true); } 222 223 /// Find the entry whose key has the specified hash value, using the specified 224 /// traits defining hash function and equality. 225 template <typename Key> iterator find_as(const Key &K) const { 226 uint32_t H = Traits.hashLookupKey(K) % capacity(); 227 uint32_t I = H; 228 Optional<uint32_t> FirstUnused; 229 do { 230 if (isPresent(I)) { 231 if (Traits.storageKeyToLookupKey(Buckets[I].first) == K) 232 return iterator(*this, I, false); 233 } else { 234 if (!FirstUnused) 235 FirstUnused = I; 236 // Insertion occurs via linear probing from the slot hint, and will be 237 // inserted at the first empty / deleted location. Therefore, if we are 238 // probing and find a location that is neither present nor deleted, then 239 // nothing must have EVER been inserted at this location, and thus it is 240 // not possible for a matching value to occur later. 241 if (!isDeleted(I)) 242 break; 243 } 244 I = (I + 1) % capacity(); 245 } while (I != H); 246 247 // The only way FirstUnused would not be set is if every single entry in the 248 // table were Present. But this would violate the load factor constraints 249 // that we impose, so it should never happen. 250 assert(FirstUnused); 251 return iterator(*this, *FirstUnused, true); 252 } 253 254 /// Set the entry using a key type that the specified Traits can convert 255 /// from a real key to an internal key. 256 template <typename Key> bool set_as(const Key &K, ValueT V) { 257 return set_as_internal(K, std::move(V), None); 258 } 259 260 template <typename Key> ValueT get(const Key &K) const { 261 auto Iter = find_as(K); 262 assert(Iter != end()); 263 return (*Iter).second; 264 } 265 266 protected: 267 bool isPresent(uint32_t K) const { return Present.test(K); } 268 bool isDeleted(uint32_t K) const { return Deleted.test(K); } 269 270 TraitsT Traits; 271 BucketList Buckets; 272 mutable SparseBitVector<> Present; 273 mutable SparseBitVector<> Deleted; 274 275 private: 276 /// Set the entry using a key type that the specified Traits can convert 277 /// from a real key to an internal key. 278 template <typename Key> 279 bool set_as_internal(const Key &K, ValueT V, Optional<uint32_t> InternalKey) { 280 auto Entry = find_as(K); 281 if (Entry != end()) { 282 assert(isPresent(Entry.index())); 283 assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K); 284 // We're updating, no need to do anything special. 285 Buckets[Entry.index()].second = V; 286 return false; 287 } 288 289 auto &B = Buckets[Entry.index()]; 290 assert(!isPresent(Entry.index())); 291 assert(Entry.isEnd()); 292 B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K); 293 B.second = V; 294 Present.set(Entry.index()); 295 Deleted.reset(Entry.index()); 296 297 grow(); 298 299 assert((find_as(K)) != end()); 300 return true; 301 } 302 303 static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } 304 305 void grow() { 306 uint32_t S = size(); 307 uint32_t MaxLoad = maxLoad(capacity()); 308 if (S < maxLoad(capacity())) 309 return; 310 assert(capacity() != UINT32_MAX && "Can't grow Hash table!"); 311 312 uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX; 313 314 // Growing requires rebuilding the table and re-hashing every item. Make a 315 // copy with a larger capacity, insert everything into the copy, then swap 316 // it in. 317 HashTable NewMap(NewCapacity, Traits); 318 for (auto I : Present) { 319 auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first); 320 NewMap.set_as_internal(LookupKey, Buckets[I].second, Buckets[I].first); 321 } 322 323 Buckets.swap(NewMap.Buckets); 324 std::swap(Present, NewMap.Present); 325 std::swap(Deleted, NewMap.Deleted); 326 assert(capacity() == NewCapacity); 327 assert(size() == S); 328 } 329 }; 330 331 } // end namespace pdb 332 333 } // end namespace llvm 334 335 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H 336