1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_LIBARTBASE_BASE_HASH_SET_H_
18 #define ART_LIBARTBASE_BASE_HASH_SET_H_
19
20 #include <stdint.h>
21
22 #include <functional>
23 #include <iterator>
24 #include <memory>
25 #include <string>
26 #include <type_traits>
27 #include <utility>
28
29 #include <android-base/logging.h>
30
31 #include "base/data_hash.h"
32 #include "bit_utils.h"
33 #include "macros.h"
34
35 namespace art {
36
37 template <class Elem, class HashSetType>
38 class HashSetIterator : std::iterator<std::forward_iterator_tag, Elem> {
39 public:
40 HashSetIterator(const HashSetIterator&) = default;
41 HashSetIterator(HashSetIterator&&) = default;
HashSetIterator(HashSetType * hash_set,size_t index)42 HashSetIterator(HashSetType* hash_set, size_t index) : index_(index), hash_set_(hash_set) {}
43
44 // Conversion from iterator to const_iterator.
45 template <class OtherElem,
46 class OtherHashSetType,
47 typename = typename std::enable_if<
48 std::is_same<Elem, const OtherElem>::value &&
49 std::is_same<HashSetType, const OtherHashSetType>::value>::type>
HashSetIterator(const HashSetIterator<OtherElem,OtherHashSetType> & other)50 HashSetIterator(const HashSetIterator<OtherElem, OtherHashSetType>& other)
51 : index_(other.index_), hash_set_(other.hash_set_) {}
52
53 HashSetIterator& operator=(const HashSetIterator&) = default;
54 HashSetIterator& operator=(HashSetIterator&&) = default;
55
56 bool operator==(const HashSetIterator& other) const {
57 return hash_set_ == other.hash_set_ && this->index_ == other.index_;
58 }
59
60 bool operator!=(const HashSetIterator& other) const {
61 return !(*this == other);
62 }
63
64 HashSetIterator operator++() { // Value after modification.
65 this->index_ = hash_set_->NextNonEmptySlot(index_);
66 return *this;
67 }
68
69 HashSetIterator operator++(int) {
70 HashSetIterator temp = *this;
71 ++*this;
72 return temp;
73 }
74
75 Elem& operator*() const {
76 DCHECK(!hash_set_->IsFreeSlot(this->index_));
77 return hash_set_->ElementForIndex(this->index_);
78 }
79
80 Elem* operator->() const {
81 return &**this;
82 }
83
84 private:
85 size_t index_;
86 HashSetType* hash_set_;
87
88 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
89 friend bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
90 const HashSetIterator<Elem2, HashSetType2>& rhs);
91 template <class T, class EmptyFn, class HashFn, class Pred, class Alloc> friend class HashSet;
92 template <class OtherElem, class OtherHashSetType> friend class HashSetIterator;
93 };
94
95 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
96 bool operator==(const HashSetIterator<Elem1, HashSetType1>& lhs,
97 const HashSetIterator<Elem2, HashSetType2>& rhs) {
98 static_assert(
99 std::is_convertible<HashSetIterator<Elem1, HashSetType1>,
100 HashSetIterator<Elem2, HashSetType2>>::value ||
101 std::is_convertible<HashSetIterator<Elem2, HashSetType2>,
102 HashSetIterator<Elem1, HashSetType1>>::value, "Bad iterator types.");
103 DCHECK_EQ(lhs.hash_set_, rhs.hash_set_);
104 return lhs.index_ == rhs.index_;
105 }
106
107 template <class Elem1, class HashSetType1, class Elem2, class HashSetType2>
108 bool operator!=(const HashSetIterator<Elem1, HashSetType1>& lhs,
109 const HashSetIterator<Elem2, HashSetType2>& rhs) {
110 return !(lhs == rhs);
111 }
112
113 // Returns true if an item is empty.
114 template <class T>
115 class DefaultEmptyFn {
116 public:
MakeEmpty(T & item)117 void MakeEmpty(T& item) const {
118 item = T();
119 }
IsEmpty(const T & item)120 bool IsEmpty(const T& item) const {
121 return item == T();
122 }
123 };
124
125 template <class T>
126 class DefaultEmptyFn<T*> {
127 public:
MakeEmpty(T * & item)128 void MakeEmpty(T*& item) const {
129 item = nullptr;
130 }
IsEmpty(T * const & item)131 bool IsEmpty(T* const& item) const {
132 return item == nullptr;
133 }
134 };
135
136 template <class T>
137 using DefaultHashFn = typename std::conditional<std::is_same<T, std::string>::value,
138 DataHash,
139 std::hash<T>>::type;
140
141 struct DefaultStringEquals {
142 // Allow comparison with anything that can be compared to std::string,
143 // for example std::string_view.
144 template <typename T>
operatorDefaultStringEquals145 bool operator()(const std::string& lhs, const T& rhs) const {
146 return lhs == rhs;
147 }
148 };
149
150 template <class T>
151 using DefaultPred = typename std::conditional<std::is_same<T, std::string>::value,
152 DefaultStringEquals,
153 std::equal_to<T>>::type;
154
155 // Low memory version of a hash set, uses less memory than std::unordered_multiset since elements
156 // aren't boxed. Uses linear probing to resolve collisions.
157 // EmptyFn needs to implement two functions MakeEmpty(T& item) and IsEmpty(const T& item).
158 // TODO: We could get rid of this requirement by using a bitmap, though maybe this would be slower
159 // and more complicated.
160 template <class T,
161 class EmptyFn = DefaultEmptyFn<T>,
162 class HashFn = DefaultHashFn<T>,
163 class Pred = DefaultPred<T>,
164 class Alloc = std::allocator<T>>
165 class HashSet {
166 public:
167 using value_type = T;
168 using allocator_type = Alloc;
169 using reference = T&;
170 using const_reference = const T&;
171 using pointer = T*;
172 using const_pointer = const T*;
173 using iterator = HashSetIterator<T, HashSet>;
174 using const_iterator = HashSetIterator<const T, const HashSet>;
175 using size_type = size_t;
176 using difference_type = ptrdiff_t;
177
178 static constexpr double kDefaultMinLoadFactor = 0.4;
179 static constexpr double kDefaultMaxLoadFactor = 0.7;
180 static constexpr size_t kMinBuckets = 1000;
181
182 // If we don't own the data, this will create a new array which owns the data.
clear()183 void clear() {
184 DeallocateStorage();
185 num_elements_ = 0;
186 elements_until_expand_ = 0;
187 }
188
HashSet()189 HashSet() : HashSet(kDefaultMinLoadFactor, kDefaultMaxLoadFactor) {}
190
HashSet(double min_load_factor,double max_load_factor)191 HashSet(double min_load_factor, double max_load_factor) noexcept
192 : num_elements_(0u),
193 num_buckets_(0u),
194 elements_until_expand_(0u),
195 owns_data_(false),
196 data_(nullptr),
197 min_load_factor_(min_load_factor),
198 max_load_factor_(max_load_factor) {
199 DCHECK_GT(min_load_factor, 0.0);
200 DCHECK_LT(max_load_factor, 1.0);
201 }
202
HashSet(const allocator_type & alloc)203 explicit HashSet(const allocator_type& alloc) noexcept
204 : allocfn_(alloc),
205 hashfn_(),
206 emptyfn_(),
207 pred_(),
208 num_elements_(0u),
209 num_buckets_(0u),
210 elements_until_expand_(0u),
211 owns_data_(false),
212 data_(nullptr),
213 min_load_factor_(kDefaultMinLoadFactor),
214 max_load_factor_(kDefaultMaxLoadFactor) {
215 }
216
HashSet(const HashSet & other)217 HashSet(const HashSet& other) noexcept
218 : allocfn_(other.allocfn_),
219 hashfn_(other.hashfn_),
220 emptyfn_(other.emptyfn_),
221 pred_(other.pred_),
222 num_elements_(other.num_elements_),
223 num_buckets_(0),
224 elements_until_expand_(other.elements_until_expand_),
225 owns_data_(false),
226 data_(nullptr),
227 min_load_factor_(other.min_load_factor_),
228 max_load_factor_(other.max_load_factor_) {
229 AllocateStorage(other.NumBuckets());
230 for (size_t i = 0; i < num_buckets_; ++i) {
231 ElementForIndex(i) = other.data_[i];
232 }
233 }
234
235 // noexcept required so that the move constructor is used instead of copy constructor.
236 // b/27860101
HashSet(HashSet && other)237 HashSet(HashSet&& other) noexcept
238 : allocfn_(std::move(other.allocfn_)),
239 hashfn_(std::move(other.hashfn_)),
240 emptyfn_(std::move(other.emptyfn_)),
241 pred_(std::move(other.pred_)),
242 num_elements_(other.num_elements_),
243 num_buckets_(other.num_buckets_),
244 elements_until_expand_(other.elements_until_expand_),
245 owns_data_(other.owns_data_),
246 data_(other.data_),
247 min_load_factor_(other.min_load_factor_),
248 max_load_factor_(other.max_load_factor_) {
249 other.num_elements_ = 0u;
250 other.num_buckets_ = 0u;
251 other.elements_until_expand_ = 0u;
252 other.owns_data_ = false;
253 other.data_ = nullptr;
254 }
255
256 // Construct from existing data.
257 // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
258 // passed in ptr_.
HashSet(const uint8_t * ptr,bool make_copy_of_data,size_t * read_count)259 HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) noexcept {
260 uint64_t temp;
261 size_t offset = 0;
262 offset = ReadFromBytes(ptr, offset, &temp);
263 num_elements_ = static_cast<uint64_t>(temp);
264 offset = ReadFromBytes(ptr, offset, &temp);
265 num_buckets_ = static_cast<uint64_t>(temp);
266 CHECK_LE(num_elements_, num_buckets_);
267 offset = ReadFromBytes(ptr, offset, &temp);
268 elements_until_expand_ = static_cast<uint64_t>(temp);
269 offset = ReadFromBytes(ptr, offset, &min_load_factor_);
270 offset = ReadFromBytes(ptr, offset, &max_load_factor_);
271 if (!make_copy_of_data) {
272 owns_data_ = false;
273 data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset));
274 offset += sizeof(*data_) * num_buckets_;
275 } else {
276 AllocateStorage(num_buckets_);
277 // Write elements, not that this may not be safe for cross compilation if the elements are
278 // pointer sized.
279 for (size_t i = 0; i < num_buckets_; ++i) {
280 offset = ReadFromBytes(ptr, offset, &data_[i]);
281 }
282 }
283 // Caller responsible for aligning.
284 *read_count = offset;
285 }
286
287 // Returns how large the table is after being written. If target is null, then no writing happens
288 // but the size is still returned. Target must be 8 byte aligned.
WriteToMemory(uint8_t * ptr)289 size_t WriteToMemory(uint8_t* ptr) const {
290 size_t offset = 0;
291 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
292 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
293 offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_));
294 offset = WriteToBytes(ptr, offset, min_load_factor_);
295 offset = WriteToBytes(ptr, offset, max_load_factor_);
296 // Write elements, not that this may not be safe for cross compilation if the elements are
297 // pointer sized.
298 for (size_t i = 0; i < num_buckets_; ++i) {
299 offset = WriteToBytes(ptr, offset, data_[i]);
300 }
301 // Caller responsible for aligning.
302 return offset;
303 }
304
~HashSet()305 ~HashSet() {
306 DeallocateStorage();
307 }
308
309 HashSet& operator=(HashSet&& other) noexcept {
310 HashSet(std::move(other)).swap(*this); // NOLINT [runtime/explicit] [5]
311 return *this;
312 }
313
314 HashSet& operator=(const HashSet& other) noexcept {
315 HashSet(other).swap(*this); // NOLINT(runtime/explicit) - a case of lint gone mad.
316 return *this;
317 }
318
319 // Lower case for c++11 for each.
begin()320 iterator begin() {
321 iterator ret(this, 0);
322 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
323 ++ret; // Skip all the empty slots.
324 }
325 return ret;
326 }
327
328 // Lower case for c++11 for each. const version.
begin()329 const_iterator begin() const {
330 const_iterator ret(this, 0);
331 if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
332 ++ret; // Skip all the empty slots.
333 }
334 return ret;
335 }
336
337 // Lower case for c++11 for each.
end()338 iterator end() {
339 return iterator(this, NumBuckets());
340 }
341
342 // Lower case for c++11 for each. const version.
end()343 const_iterator end() const {
344 return const_iterator(this, NumBuckets());
345 }
346
size()347 size_t size() const {
348 return num_elements_;
349 }
350
empty()351 bool empty() const {
352 return size() == 0;
353 }
354
355 // Erase algorithm:
356 // Make an empty slot where the iterator is pointing.
357 // Scan forwards until we hit another empty slot.
358 // If an element in between doesn't rehash to the range from the current empty slot to the
359 // iterator. It must be before the empty slot, in that case we can move it to the empty slot
360 // and set the empty slot to be the location we just moved from.
361 // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
362 // element to its actual location/index.
363 // Note that since erase shuffles back elements, it may result in the same element being visited
364 // twice during HashSet iteration. This happens when an element already visited during iteration
365 // gets shuffled to the end of the bucket array.
erase(iterator it)366 iterator erase(iterator it) {
367 // empty_index is the index that will become empty.
368 size_t empty_index = it.index_;
369 DCHECK(!IsFreeSlot(empty_index));
370 size_t next_index = empty_index;
371 bool filled = false; // True if we filled the empty index.
372 while (true) {
373 next_index = NextIndex(next_index);
374 T& next_element = ElementForIndex(next_index);
375 // If the next element is empty, we are done. Make sure to clear the current empty index.
376 if (emptyfn_.IsEmpty(next_element)) {
377 emptyfn_.MakeEmpty(ElementForIndex(empty_index));
378 break;
379 }
380 // Otherwise try to see if the next element can fill the current empty index.
381 const size_t next_hash = hashfn_(next_element);
382 // Calculate the ideal index, if it is within empty_index + 1 to next_index then there is
383 // nothing we can do.
384 size_t next_ideal_index = IndexForHash(next_hash);
385 // Loop around if needed for our check.
386 size_t unwrapped_next_index = next_index;
387 if (unwrapped_next_index < empty_index) {
388 unwrapped_next_index += NumBuckets();
389 }
390 // Loop around if needed for our check.
391 size_t unwrapped_next_ideal_index = next_ideal_index;
392 if (unwrapped_next_ideal_index < empty_index) {
393 unwrapped_next_ideal_index += NumBuckets();
394 }
395 if (unwrapped_next_ideal_index <= empty_index ||
396 unwrapped_next_ideal_index > unwrapped_next_index) {
397 // If the target index isn't within our current range it must have been probed from before
398 // the empty index.
399 ElementForIndex(empty_index) = std::move(next_element);
400 filled = true; // TODO: Optimize
401 empty_index = next_index;
402 }
403 }
404 --num_elements_;
405 // If we didn't fill the slot then we need go to the next non free slot.
406 if (!filled) {
407 ++it;
408 }
409 return it;
410 }
411
412 // Find an element, returns end() if not found.
413 // Allows custom key (K) types, example of when this is useful:
414 // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
415 // object in the heap for performance solution.
416 template <typename K>
find(const K & key)417 iterator find(const K& key) {
418 return FindWithHash(key, hashfn_(key));
419 }
420
421 template <typename K>
find(const K & key)422 const_iterator find(const K& key) const {
423 return FindWithHash(key, hashfn_(key));
424 }
425
426 template <typename K>
FindWithHash(const K & key,size_t hash)427 iterator FindWithHash(const K& key, size_t hash) {
428 return iterator(this, FindIndex(key, hash));
429 }
430
431 template <typename K>
FindWithHash(const K & key,size_t hash)432 const_iterator FindWithHash(const K& key, size_t hash) const {
433 return const_iterator(this, FindIndex(key, hash));
434 }
435
436 // Insert an element with hint, allows duplicates.
437 // Note: The hint is not very useful for a HashSet<> unless there are many hash conflicts
438 // and in that case the use of HashSet<> itself should be reconsidered.
insert(const_iterator hint ATTRIBUTE_UNUSED,const T & element)439 iterator insert(const_iterator hint ATTRIBUTE_UNUSED, const T& element) {
440 return insert(element);
441 }
insert(const_iterator hint ATTRIBUTE_UNUSED,T && element)442 iterator insert(const_iterator hint ATTRIBUTE_UNUSED, T&& element) {
443 return insert(std::move(element));
444 }
445
446 // Insert an element, allows duplicates.
insert(const T & element)447 iterator insert(const T& element) {
448 return InsertWithHash(element, hashfn_(element));
449 }
insert(T && element)450 iterator insert(T&& element) {
451 return InsertWithHash(std::move(element), hashfn_(element));
452 }
453
454 template <typename U, typename = typename std::enable_if<std::is_convertible<U, T>::value>::type>
InsertWithHash(U && element,size_t hash)455 iterator InsertWithHash(U&& element, size_t hash) {
456 DCHECK_EQ(hash, hashfn_(element));
457 if (num_elements_ >= elements_until_expand_) {
458 Expand();
459 DCHECK_LT(num_elements_, elements_until_expand_);
460 }
461 const size_t index = FirstAvailableSlot(IndexForHash(hash));
462 data_[index] = std::forward<U>(element);
463 ++num_elements_;
464 return iterator(this, index);
465 }
466
swap(HashSet & other)467 void swap(HashSet& other) {
468 // Use argument-dependent lookup with fall-back to std::swap() for function objects.
469 using std::swap;
470 swap(allocfn_, other.allocfn_);
471 swap(hashfn_, other.hashfn_);
472 swap(emptyfn_, other.emptyfn_);
473 swap(pred_, other.pred_);
474 std::swap(data_, other.data_);
475 std::swap(num_buckets_, other.num_buckets_);
476 std::swap(num_elements_, other.num_elements_);
477 std::swap(elements_until_expand_, other.elements_until_expand_);
478 std::swap(min_load_factor_, other.min_load_factor_);
479 std::swap(max_load_factor_, other.max_load_factor_);
480 std::swap(owns_data_, other.owns_data_);
481 }
482
get_allocator()483 allocator_type get_allocator() const {
484 return allocfn_;
485 }
486
ShrinkToMaximumLoad()487 void ShrinkToMaximumLoad() {
488 Resize(size() / max_load_factor_);
489 }
490
491 // Reserve enough room to insert until Size() == num_elements without requiring to grow the hash
492 // set. No-op if the hash set is already large enough to do this.
reserve(size_t num_elements)493 void reserve(size_t num_elements) {
494 size_t num_buckets = num_elements / max_load_factor_;
495 // Deal with rounding errors. Add one for rounding.
496 while (static_cast<size_t>(num_buckets * max_load_factor_) <= num_elements + 1u) {
497 ++num_buckets;
498 }
499 if (num_buckets > NumBuckets()) {
500 Resize(num_buckets);
501 }
502 }
503
504 // To distance that inserted elements were probed. Used for measuring how good hash functions
505 // are.
TotalProbeDistance()506 size_t TotalProbeDistance() const {
507 size_t total = 0;
508 for (size_t i = 0; i < NumBuckets(); ++i) {
509 const T& element = ElementForIndex(i);
510 if (!emptyfn_.IsEmpty(element)) {
511 size_t ideal_location = IndexForHash(hashfn_(element));
512 if (ideal_location > i) {
513 total += i + NumBuckets() - ideal_location;
514 } else {
515 total += i - ideal_location;
516 }
517 }
518 }
519 return total;
520 }
521
522 // Calculate the current load factor and return it.
CalculateLoadFactor()523 double CalculateLoadFactor() const {
524 return static_cast<double>(size()) / static_cast<double>(NumBuckets());
525 }
526
527 // Make sure that everything reinserts in the right spot. Returns the number of errors.
Verify()528 size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
529 size_t errors = 0;
530 for (size_t i = 0; i < num_buckets_; ++i) {
531 T& element = data_[i];
532 if (!emptyfn_.IsEmpty(element)) {
533 T temp;
534 emptyfn_.MakeEmpty(temp);
535 std::swap(temp, element);
536 size_t first_slot = FirstAvailableSlot(IndexForHash(hashfn_(temp)));
537 if (i != first_slot) {
538 LOG(ERROR) << "Element " << i << " should be in slot " << first_slot;
539 ++errors;
540 }
541 std::swap(temp, element);
542 }
543 }
544 return errors;
545 }
546
GetMinLoadFactor()547 double GetMinLoadFactor() const {
548 return min_load_factor_;
549 }
550
GetMaxLoadFactor()551 double GetMaxLoadFactor() const {
552 return max_load_factor_;
553 }
554
555 // Change the load factor of the hash set. If the current load factor is greater than the max
556 // specified, then we resize the hash table storage.
SetLoadFactor(double min_load_factor,double max_load_factor)557 void SetLoadFactor(double min_load_factor, double max_load_factor) {
558 DCHECK_LT(min_load_factor, max_load_factor);
559 DCHECK_GT(min_load_factor, 0.0);
560 DCHECK_LT(max_load_factor, 1.0);
561 min_load_factor_ = min_load_factor;
562 max_load_factor_ = max_load_factor;
563 elements_until_expand_ = NumBuckets() * max_load_factor_;
564 // If the current load factor isn't in the range, then resize to the mean of the minimum and
565 // maximum load factor.
566 const double load_factor = CalculateLoadFactor();
567 if (load_factor > max_load_factor_) {
568 Resize(size() / ((min_load_factor_ + max_load_factor_) * 0.5));
569 }
570 }
571
572 // The hash set expands when Size() reaches ElementsUntilExpand().
ElementsUntilExpand()573 size_t ElementsUntilExpand() const {
574 return elements_until_expand_;
575 }
576
NumBuckets()577 size_t NumBuckets() const {
578 return num_buckets_;
579 }
580
581 private:
ElementForIndex(size_t index)582 T& ElementForIndex(size_t index) {
583 DCHECK_LT(index, NumBuckets());
584 DCHECK(data_ != nullptr);
585 return data_[index];
586 }
587
ElementForIndex(size_t index)588 const T& ElementForIndex(size_t index) const {
589 DCHECK_LT(index, NumBuckets());
590 DCHECK(data_ != nullptr);
591 return data_[index];
592 }
593
IndexForHash(size_t hash)594 size_t IndexForHash(size_t hash) const {
595 // Protect against undefined behavior (division by zero).
596 if (UNLIKELY(num_buckets_ == 0)) {
597 return 0;
598 }
599 return hash % num_buckets_;
600 }
601
NextIndex(size_t index)602 size_t NextIndex(size_t index) const {
603 if (UNLIKELY(++index >= num_buckets_)) {
604 DCHECK_EQ(index, NumBuckets());
605 return 0;
606 }
607 return index;
608 }
609
610 // Find the hash table slot for an element, or return NumBuckets() if not found.
611 // This value for not found is important so that iterator(this, FindIndex(...)) == end().
612 template <typename K>
FindIndex(const K & element,size_t hash)613 size_t FindIndex(const K& element, size_t hash) const {
614 // Guard against failing to get an element for a non-existing index.
615 if (UNLIKELY(NumBuckets() == 0)) {
616 return 0;
617 }
618 DCHECK_EQ(hashfn_(element), hash);
619 size_t index = IndexForHash(hash);
620 while (true) {
621 const T& slot = ElementForIndex(index);
622 if (emptyfn_.IsEmpty(slot)) {
623 return NumBuckets();
624 }
625 if (pred_(slot, element)) {
626 return index;
627 }
628 index = NextIndex(index);
629 }
630 }
631
IsFreeSlot(size_t index)632 bool IsFreeSlot(size_t index) const {
633 return emptyfn_.IsEmpty(ElementForIndex(index));
634 }
635
636 // Allocate a number of buckets.
AllocateStorage(size_t num_buckets)637 void AllocateStorage(size_t num_buckets) {
638 num_buckets_ = num_buckets;
639 data_ = allocfn_.allocate(num_buckets_);
640 owns_data_ = true;
641 for (size_t i = 0; i < num_buckets_; ++i) {
642 allocfn_.construct(allocfn_.address(data_[i]));
643 emptyfn_.MakeEmpty(data_[i]);
644 }
645 }
646
DeallocateStorage()647 void DeallocateStorage() {
648 if (owns_data_) {
649 for (size_t i = 0; i < NumBuckets(); ++i) {
650 allocfn_.destroy(allocfn_.address(data_[i]));
651 }
652 if (data_ != nullptr) {
653 allocfn_.deallocate(data_, NumBuckets());
654 }
655 owns_data_ = false;
656 }
657 data_ = nullptr;
658 num_buckets_ = 0;
659 }
660
661 // Expand the set based on the load factors.
Expand()662 void Expand() {
663 size_t min_index = static_cast<size_t>(size() / min_load_factor_);
664 // Resize based on the minimum load factor.
665 Resize(min_index);
666 }
667
668 // Expand / shrink the table to the new specified size.
Resize(size_t new_size)669 void Resize(size_t new_size) {
670 if (new_size < kMinBuckets) {
671 new_size = kMinBuckets;
672 }
673 DCHECK_GE(new_size, size());
674 T* const old_data = data_;
675 size_t old_num_buckets = num_buckets_;
676 // Reinsert all of the old elements.
677 const bool owned_data = owns_data_;
678 AllocateStorage(new_size);
679 for (size_t i = 0; i < old_num_buckets; ++i) {
680 T& element = old_data[i];
681 if (!emptyfn_.IsEmpty(element)) {
682 data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
683 }
684 if (owned_data) {
685 allocfn_.destroy(allocfn_.address(element));
686 }
687 }
688 if (owned_data) {
689 allocfn_.deallocate(old_data, old_num_buckets);
690 }
691
692 // When we hit elements_until_expand_, we are at the max load factor and must expand again.
693 elements_until_expand_ = NumBuckets() * max_load_factor_;
694 }
695
FirstAvailableSlot(size_t index)696 ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
697 DCHECK_LT(index, NumBuckets()); // Don't try to get a slot out of range.
698 size_t non_empty_count = 0;
699 while (!emptyfn_.IsEmpty(data_[index])) {
700 index = NextIndex(index);
701 non_empty_count++;
702 DCHECK_LE(non_empty_count, NumBuckets()); // Don't loop forever.
703 }
704 return index;
705 }
706
NextNonEmptySlot(size_t index)707 size_t NextNonEmptySlot(size_t index) const {
708 const size_t num_buckets = NumBuckets();
709 DCHECK_LT(index, num_buckets);
710 do {
711 ++index;
712 } while (index < num_buckets && IsFreeSlot(index));
713 return index;
714 }
715
716 // Return new offset.
717 template <typename Elem>
WriteToBytes(uint8_t * ptr,size_t offset,Elem n)718 static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) {
719 DCHECK_ALIGNED(ptr + offset, sizeof(n));
720 if (ptr != nullptr) {
721 *reinterpret_cast<Elem*>(ptr + offset) = n;
722 }
723 return offset + sizeof(n);
724 }
725
726 template <typename Elem>
ReadFromBytes(const uint8_t * ptr,size_t offset,Elem * out)727 static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) {
728 DCHECK(ptr != nullptr);
729 DCHECK_ALIGNED(ptr + offset, sizeof(*out));
730 *out = *reinterpret_cast<const Elem*>(ptr + offset);
731 return offset + sizeof(*out);
732 }
733
734 Alloc allocfn_; // Allocator function.
735 HashFn hashfn_; // Hashing function.
736 EmptyFn emptyfn_; // IsEmpty/SetEmpty function.
737 Pred pred_; // Equals function.
738 size_t num_elements_; // Number of inserted elements.
739 size_t num_buckets_; // Number of hash table buckets.
740 size_t elements_until_expand_; // Maximum number of elements until we expand the table.
741 bool owns_data_; // If we own data_ and are responsible for freeing it.
742 T* data_; // Backing storage.
743 double min_load_factor_;
744 double max_load_factor_;
745
746 template <class Elem, class HashSetType>
747 friend class HashSetIterator;
748
749 ART_FRIEND_TEST(InternTableTest, CrossHash);
750 };
751
752 template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
swap(HashSet<T,EmptyFn,HashFn,Pred,Alloc> & lhs,HashSet<T,EmptyFn,HashFn,Pred,Alloc> & rhs)753 void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
754 HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
755 lhs.swap(rhs);
756 }
757
758 } // namespace art
759
760 #endif // ART_LIBARTBASE_BASE_HASH_SET_H_
761