1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "hash_set.h"
18
19 #include <forward_list>
20 #include <map>
21 #include <sstream>
22 #include <string>
23 #include <string_view>
24 #include <unordered_set>
25 #include <vector>
26
27 #include <gtest/gtest.h>
28
29 #include "hash_map.h"
30
31 namespace art {
32
33 struct IsEmptyFnString {
MakeEmptyart::IsEmptyFnString34 void MakeEmpty(std::string& item) const {
35 item.clear();
36 }
IsEmptyart::IsEmptyFnString37 bool IsEmpty(const std::string& item) const {
38 return item.empty();
39 }
40 };
41
42 class HashSetTest : public testing::Test {
43 public:
HashSetTest()44 HashSetTest() : seed_(97421), unique_number_(0) {
45 }
RandomString(size_t len)46 std::string RandomString(size_t len) {
47 std::ostringstream oss;
48 for (size_t i = 0; i < len; ++i) {
49 oss << static_cast<char>('A' + PRand() % 64);
50 }
51 static_assert(' ' < 'A', "space must be less than a");
52 oss << " " << unique_number_++; // Relies on ' ' < 'A'
53 return oss.str();
54 }
SetSeed(size_t seed)55 void SetSeed(size_t seed) {
56 seed_ = seed;
57 }
PRand()58 size_t PRand() { // Pseudo random.
59 seed_ = seed_ * 1103515245 + 12345;
60 return seed_;
61 }
62
63 private:
64 size_t seed_;
65 size_t unique_number_;
66 };
67
TEST_F(HashSetTest,TestSmoke)68 TEST_F(HashSetTest, TestSmoke) {
69 HashSet<std::string, IsEmptyFnString> hash_set;
70 const std::string test_string = "hello world 1234";
71 ASSERT_TRUE(hash_set.empty());
72 ASSERT_EQ(hash_set.size(), 0U);
73 hash_set.insert(test_string);
74 auto it = hash_set.find(test_string);
75 ASSERT_EQ(*it, test_string);
76 auto after_it = hash_set.erase(it);
77 ASSERT_TRUE(after_it == hash_set.end());
78 ASSERT_TRUE(hash_set.empty());
79 ASSERT_EQ(hash_set.size(), 0U);
80 it = hash_set.find(test_string);
81 ASSERT_TRUE(it == hash_set.end());
82 }
83
TEST_F(HashSetTest,TestInsertAndErase)84 TEST_F(HashSetTest, TestInsertAndErase) {
85 HashSet<std::string, IsEmptyFnString> hash_set;
86 static constexpr size_t count = 1000;
87 std::vector<std::string> strings;
88 for (size_t i = 0; i < count; ++i) {
89 // Insert a bunch of elements and make sure we can find them.
90 strings.push_back(RandomString(10));
91 hash_set.insert(strings[i]);
92 auto it = hash_set.find(strings[i]);
93 ASSERT_TRUE(it != hash_set.end());
94 ASSERT_EQ(*it, strings[i]);
95 }
96 ASSERT_EQ(strings.size(), hash_set.size());
97 // Try to erase the odd strings.
98 for (size_t i = 1; i < count; i += 2) {
99 auto it = hash_set.find(strings[i]);
100 ASSERT_TRUE(it != hash_set.end());
101 ASSERT_EQ(*it, strings[i]);
102 hash_set.erase(it);
103 }
104 // Test removed.
105 for (size_t i = 1; i < count; i += 2) {
106 auto it = hash_set.find(strings[i]);
107 ASSERT_TRUE(it == hash_set.end());
108 }
109 for (size_t i = 0; i < count; i += 2) {
110 auto it = hash_set.find(strings[i]);
111 ASSERT_TRUE(it != hash_set.end());
112 ASSERT_EQ(*it, strings[i]);
113 }
114 }
115
TEST_F(HashSetTest,TestIterator)116 TEST_F(HashSetTest, TestIterator) {
117 HashSet<std::string, IsEmptyFnString> hash_set;
118 ASSERT_TRUE(hash_set.begin() == hash_set.end());
119 static constexpr size_t count = 1000;
120 std::vector<std::string> strings;
121 for (size_t i = 0; i < count; ++i) {
122 // Insert a bunch of elements and make sure we can find them.
123 strings.push_back(RandomString(10));
124 hash_set.insert(strings[i]);
125 }
126 // Make sure we visit each string exactly once.
127 std::map<std::string, size_t> found_count;
128 for (const std::string& s : hash_set) {
129 ++found_count[s];
130 }
131 for (size_t i = 0; i < count; ++i) {
132 ASSERT_EQ(found_count[strings[i]], 1U);
133 }
134 found_count.clear();
135 // Remove all the elements with iterator erase.
136 for (auto it = hash_set.begin(); it != hash_set.end();) {
137 ++found_count[*it];
138 it = hash_set.erase(it);
139 ASSERT_EQ(hash_set.Verify(), 0U);
140 }
141 for (size_t i = 0; i < count; ++i) {
142 ASSERT_EQ(found_count[strings[i]], 1U);
143 }
144 }
145
TEST_F(HashSetTest,TestSwap)146 TEST_F(HashSetTest, TestSwap) {
147 HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb;
148 std::vector<std::string> strings;
149 static constexpr size_t count = 1000;
150 for (size_t i = 0; i < count; ++i) {
151 strings.push_back(RandomString(10));
152 hash_seta.insert(strings[i]);
153 }
154 std::swap(hash_seta, hash_setb);
155 hash_seta.insert("TEST");
156 hash_setb.insert("TEST2");
157 for (size_t i = 0; i < count; ++i) {
158 strings.push_back(RandomString(10));
159 hash_seta.insert(strings[i]);
160 }
161 }
162
TEST_F(HashSetTest,TestShrink)163 TEST_F(HashSetTest, TestShrink) {
164 HashSet<std::string, IsEmptyFnString> hash_set;
165 std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"};
166 for (size_t i = 0; i < strings.size(); ++i) {
167 // Insert some strings into the beginning of our hash set to establish an initial size
168 hash_set.insert(strings[i]);
169 }
170
171 hash_set.ShrinkToMaximumLoad();
172 const double initial_load = hash_set.CalculateLoadFactor();
173
174 // Insert a bunch of random strings to guarantee that we grow the capacity.
175 std::vector<std::string> random_strings;
176 static constexpr size_t count = 1000;
177 for (size_t i = 0; i < count; ++i) {
178 random_strings.push_back(RandomString(10));
179 hash_set.insert(random_strings[i]);
180 }
181
182 // Erase all the extra strings which guarantees that our load factor will be really bad.
183 for (size_t i = 0; i < count; ++i) {
184 hash_set.erase(hash_set.find(random_strings[i]));
185 }
186
187 const double bad_load = hash_set.CalculateLoadFactor();
188 EXPECT_GT(initial_load, bad_load);
189
190 // Shrink again, the load factor should be good again.
191 hash_set.ShrinkToMaximumLoad();
192 EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor());
193
194 // Make sure all the initial elements we had are still there
195 for (const std::string& initial_string : strings) {
196 EXPECT_NE(hash_set.end(), hash_set.find(initial_string))
197 << "expected to find " << initial_string;
198 }
199 }
200
TEST_F(HashSetTest,TestLoadFactor)201 TEST_F(HashSetTest, TestLoadFactor) {
202 HashSet<std::string, IsEmptyFnString> hash_set;
203 static constexpr size_t kStringCount = 1000;
204 static constexpr double kEpsilon = 0.01;
205 for (size_t i = 0; i < kStringCount; ++i) {
206 hash_set.insert(RandomString(i % 10 + 1));
207 }
208 // Check that changing the load factor resizes the table to be within the target range.
209 EXPECT_GE(hash_set.CalculateLoadFactor() + kEpsilon, hash_set.GetMinLoadFactor());
210 EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
211 hash_set.SetLoadFactor(0.1, 0.3);
212 EXPECT_DOUBLE_EQ(0.1, hash_set.GetMinLoadFactor());
213 EXPECT_DOUBLE_EQ(0.3, hash_set.GetMaxLoadFactor());
214 EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
215 hash_set.SetLoadFactor(0.6, 0.8);
216 EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
217 }
218
TEST_F(HashSetTest,TestStress)219 TEST_F(HashSetTest, TestStress) {
220 HashSet<std::string, IsEmptyFnString> hash_set;
221 std::unordered_set<std::string> std_set;
222 std::vector<std::string> strings;
223 static constexpr size_t string_count = 2000;
224 static constexpr size_t operations = 100000;
225 static constexpr size_t target_size = 5000;
226 for (size_t i = 0; i < string_count; ++i) {
227 strings.push_back(RandomString(i % 10 + 1));
228 }
229 const size_t seed = time(nullptr);
230 SetSeed(seed);
231 LOG(INFO) << "Starting stress test with seed " << seed;
232 for (size_t i = 0; i < operations; ++i) {
233 ASSERT_EQ(hash_set.size(), std_set.size());
234 size_t delta = std::abs(static_cast<ssize_t>(target_size) -
235 static_cast<ssize_t>(hash_set.size()));
236 size_t n = PRand();
237 if (n % target_size == 0) {
238 hash_set.clear();
239 std_set.clear();
240 ASSERT_TRUE(hash_set.empty());
241 ASSERT_TRUE(std_set.empty());
242 } else if (n % target_size < delta) {
243 // Skew towards adding elements until we are at the desired size.
244 const std::string& s = strings[PRand() % string_count];
245 hash_set.insert(s);
246 std_set.insert(s);
247 ASSERT_EQ(*hash_set.find(s), *std_set.find(s));
248 } else {
249 const std::string& s = strings[PRand() % string_count];
250 auto it1 = hash_set.find(s);
251 auto it2 = std_set.find(s);
252 ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end());
253 if (it1 != hash_set.end()) {
254 ASSERT_EQ(*it1, *it2);
255 hash_set.erase(it1);
256 std_set.erase(it2);
257 }
258 }
259 }
260 }
261
262 struct IsEmptyStringPair {
MakeEmptyart::IsEmptyStringPair263 void MakeEmpty(std::pair<std::string, int>& pair) const {
264 pair.first.clear();
265 }
IsEmptyart::IsEmptyStringPair266 bool IsEmpty(const std::pair<std::string, int>& pair) const {
267 return pair.first.empty();
268 }
269 };
270
TEST_F(HashSetTest,TestHashMap)271 TEST_F(HashSetTest, TestHashMap) {
272 HashMap<std::string, int, IsEmptyStringPair> hash_map;
273 hash_map.insert(std::make_pair(std::string("abcd"), 123));
274 hash_map.insert(std::make_pair(std::string("abcd"), 124));
275 hash_map.insert(std::make_pair(std::string("bags"), 444));
276 auto it = hash_map.find(std::string("abcd"));
277 ASSERT_EQ(it->second, 123);
278 hash_map.erase(it);
279 it = hash_map.find(std::string("abcd"));
280 ASSERT_EQ(it, hash_map.end());
281 }
282
283 struct IsEmptyFnVectorInt {
MakeEmptyart::IsEmptyFnVectorInt284 void MakeEmpty(std::vector<int>& item) const {
285 item.clear();
286 }
IsEmptyart::IsEmptyFnVectorInt287 bool IsEmpty(const std::vector<int>& item) const {
288 return item.empty();
289 }
290 };
291
292 template <typename T>
HashIntSequence(T begin,T end)293 size_t HashIntSequence(T begin, T end) {
294 size_t hash = 0;
295 for (auto iter = begin; iter != end; ++iter) {
296 hash = hash * 2 + *iter;
297 }
298 return hash;
299 }
300
301 struct VectorIntHashEquals {
operator ()art::VectorIntHashEquals302 std::size_t operator()(const std::vector<int>& item) const {
303 return HashIntSequence(item.begin(), item.end());
304 }
305
operator ()art::VectorIntHashEquals306 std::size_t operator()(const std::forward_list<int>& item) const {
307 return HashIntSequence(item.begin(), item.end());
308 }
309
operator ()art::VectorIntHashEquals310 bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
311 return a == b;
312 }
313
operator ()art::VectorIntHashEquals314 bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const {
315 auto aiter = a.begin();
316 auto biter = b.begin();
317 while (aiter != a.end() && biter != b.end()) {
318 if (*aiter != *biter) {
319 return false;
320 }
321 aiter++;
322 biter++;
323 }
324 return (aiter == a.end() && biter == b.end());
325 }
326 };
327
TEST_F(HashSetTest,TestLookupByAlternateKeyType)328 TEST_F(HashSetTest, TestLookupByAlternateKeyType) {
329 HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set;
330 hash_set.insert(std::vector<int>({1, 2, 3, 4}));
331 hash_set.insert(std::vector<int>({4, 2}));
332 ASSERT_EQ(hash_set.end(), hash_set.find(std::vector<int>({1, 1, 1, 1})));
333 ASSERT_NE(hash_set.end(), hash_set.find(std::vector<int>({1, 2, 3, 4})));
334 ASSERT_EQ(hash_set.end(), hash_set.find(std::forward_list<int>({1, 1, 1, 1})));
335 ASSERT_NE(hash_set.end(), hash_set.find(std::forward_list<int>({1, 2, 3, 4})));
336 }
337
TEST_F(HashSetTest,TestReserve)338 TEST_F(HashSetTest, TestReserve) {
339 HashSet<std::string, IsEmptyFnString> hash_set;
340 std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096};
341 for (size_t size : sizes) {
342 hash_set.reserve(size);
343 const size_t buckets_before = hash_set.NumBuckets();
344 // Check that we expanded enough.
345 CHECK_GE(hash_set.ElementsUntilExpand(), size);
346 // Try inserting elements until we are at our reserve size and ensure the hash set did not
347 // expand.
348 while (hash_set.size() < size) {
349 hash_set.insert(std::to_string(hash_set.size()));
350 }
351 CHECK_EQ(hash_set.NumBuckets(), buckets_before);
352 }
353 // Check the behaviour for shrinking, it does not necessarily resize down.
354 constexpr size_t size = 100;
355 hash_set.reserve(size);
356 CHECK_GE(hash_set.ElementsUntilExpand(), size);
357 }
358
TEST_F(HashSetTest,IteratorConversion)359 TEST_F(HashSetTest, IteratorConversion) {
360 const char* test_string = "test string";
361 HashSet<std::string> hash_set;
362 HashSet<std::string>::iterator it = hash_set.insert(test_string).first;
363 HashSet<std::string>::const_iterator cit = it;
364 ASSERT_TRUE(it == cit);
365 ASSERT_EQ(*it, *cit);
366 }
367
TEST_F(HashSetTest,StringSearchStringView)368 TEST_F(HashSetTest, StringSearchStringView) {
369 const char* test_string = "test string";
370 HashSet<std::string> hash_set;
371 HashSet<std::string>::iterator insert_pos = hash_set.insert(test_string).first;
372 HashSet<std::string>::iterator it = hash_set.find(std::string_view(test_string));
373 ASSERT_TRUE(it == insert_pos);
374 }
375
TEST_F(HashSetTest,DoubleInsert)376 TEST_F(HashSetTest, DoubleInsert) {
377 const char* test_string = "test string";
378 HashSet<std::string> hash_set;
379 hash_set.insert(test_string);
380 hash_set.insert(test_string);
381 ASSERT_EQ(1u, hash_set.size());
382 }
383
TEST_F(HashSetTest,Preallocated)384 TEST_F(HashSetTest, Preallocated) {
385 static const size_t kBufferSize = 64;
386 uint32_t buffer[kBufferSize];
387 HashSet<uint32_t> hash_set(buffer, kBufferSize);
388 size_t max_without_resize = kBufferSize * hash_set.GetMaxLoadFactor();
389 for (size_t i = 0; i != max_without_resize; ++i) {
390 hash_set.insert(i);
391 }
392 ASSERT_FALSE(hash_set.owns_data_);
393 hash_set.insert(max_without_resize);
394 ASSERT_TRUE(hash_set.owns_data_);
395 }
396
397 class SmallIndexEmptyFn {
398 public:
MakeEmpty(uint16_t & item) const399 void MakeEmpty(uint16_t& item) const {
400 item = std::numeric_limits<uint16_t>::max();
401 }
IsEmpty(const uint16_t & item) const402 bool IsEmpty(const uint16_t& item) const {
403 return item == std::numeric_limits<uint16_t>::max();
404 }
405 };
406
407 class StatefulHashFn {
408 public:
StatefulHashFn(const std::vector<std::string> * strings)409 explicit StatefulHashFn(const std::vector<std::string>* strings)
410 : strings_(strings) {}
411
operator ()(const uint16_t & index) const412 size_t operator() (const uint16_t& index) const {
413 CHECK_LT(index, strings_->size());
414 return (*this)((*strings_)[index]);
415 }
416
operator ()(std::string_view s) const417 size_t operator() (std::string_view s) const {
418 return DataHash()(s);
419 }
420
421 private:
422 const std::vector<std::string>* strings_;
423 };
424
425 class StatefulPred {
426 public:
StatefulPred(const std::vector<std::string> * strings)427 explicit StatefulPred(const std::vector<std::string>* strings)
428 : strings_(strings) {}
429
operator ()(const uint16_t & lhs,const uint16_t & rhs) const430 bool operator() (const uint16_t& lhs, const uint16_t& rhs) const {
431 CHECK_LT(rhs, strings_->size());
432 return (*this)(lhs, (*strings_)[rhs]);
433 }
434
operator ()(const uint16_t & lhs,std::string_view rhs) const435 bool operator() (const uint16_t& lhs, std::string_view rhs) const {
436 CHECK_LT(lhs, strings_->size());
437 return (*strings_)[lhs] == rhs;
438 }
439
440 private:
441 const std::vector<std::string>* strings_;
442 };
443
TEST_F(HashSetTest,StatefulHashSet)444 TEST_F(HashSetTest, StatefulHashSet) {
445 std::vector<std::string> strings{
446 "duplicate",
447 "a",
448 "b",
449 "xyz",
450 "___",
451 "123",
452 "placeholder",
453 "duplicate"
454 };
455 const size_t duplicateFirstIndex = 0;
456 const size_t duplicateSecondIndex = strings.size() - 1u;
457 const size_t otherIndex = 1u;
458
459 StatefulHashFn hashfn(&strings);
460 StatefulPred pred(&strings);
461 HashSet<uint16_t, SmallIndexEmptyFn, StatefulHashFn, StatefulPred> hash_set(hashfn, pred);
462 for (size_t index = 0, size = strings.size(); index != size; ++index) {
463 bool inserted = hash_set.insert(index).second;
464 ASSERT_EQ(index != duplicateSecondIndex, inserted) << index;
465 }
466
467 // Check search by string.
468 for (size_t index = 0, size = strings.size(); index != size; ++index) {
469 auto it = hash_set.find(strings[index]);
470 ASSERT_FALSE(it == hash_set.end());
471 ASSERT_EQ(index == duplicateSecondIndex ? duplicateFirstIndex : index, *it) << index;
472 }
473 ASSERT_TRUE(hash_set.find("missing") == hash_set.end());
474
475 // Check search by index.
476 for (size_t index = 0, size = strings.size(); index != size; ++index) {
477 auto it = hash_set.find(index);
478 ASSERT_FALSE(it == hash_set.end());
479 ASSERT_EQ(index == duplicateSecondIndex ? duplicateFirstIndex : index, *it) << index;
480 }
481 // Note: Searching for index >= strings.size() is not supported by Stateful{HashFn,Pred}.
482
483 // Test removal and search by missing index.
484 auto remove_it = hash_set.find(otherIndex);
485 ASSERT_FALSE(remove_it == hash_set.end());
486 hash_set.erase(remove_it);
487 auto search_it = hash_set.find(otherIndex);
488 ASSERT_TRUE(search_it == hash_set.end());
489 }
490
491 } // namespace art
492