1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "hash_set.h"
18
19 #include <forward_list>
20 #include <map>
21 #include <sstream>
22 #include <string>
23 #include <unordered_set>
24 #include <vector>
25
26 #include <gtest/gtest.h>
27 #include "hash_map.h"
28
29 namespace art {
30
31 struct IsEmptyFnString {
MakeEmptyart::IsEmptyFnString32 void MakeEmpty(std::string& item) const {
33 item.clear();
34 }
IsEmptyart::IsEmptyFnString35 bool IsEmpty(const std::string& item) const {
36 return item.empty();
37 }
38 };
39
40 class HashSetTest : public testing::Test {
41 public:
HashSetTest()42 HashSetTest() : seed_(97421), unique_number_(0) {
43 }
RandomString(size_t len)44 std::string RandomString(size_t len) {
45 std::ostringstream oss;
46 for (size_t i = 0; i < len; ++i) {
47 oss << static_cast<char>('A' + PRand() % 64);
48 }
49 static_assert(' ' < 'A', "space must be less than a");
50 oss << " " << unique_number_++; // Relies on ' ' < 'A'
51 return oss.str();
52 }
SetSeed(size_t seed)53 void SetSeed(size_t seed) {
54 seed_ = seed;
55 }
PRand()56 size_t PRand() { // Pseudo random.
57 seed_ = seed_ * 1103515245 + 12345;
58 return seed_;
59 }
60
61 private:
62 size_t seed_;
63 size_t unique_number_;
64 };
65
TEST_F(HashSetTest,TestSmoke)66 TEST_F(HashSetTest, TestSmoke) {
67 HashSet<std::string, IsEmptyFnString> hash_set;
68 const std::string test_string = "hello world 1234";
69 ASSERT_TRUE(hash_set.Empty());
70 ASSERT_EQ(hash_set.Size(), 0U);
71 hash_set.Insert(test_string);
72 auto it = hash_set.Find(test_string);
73 ASSERT_EQ(*it, test_string);
74 auto after_it = hash_set.Erase(it);
75 ASSERT_TRUE(after_it == hash_set.end());
76 ASSERT_TRUE(hash_set.Empty());
77 ASSERT_EQ(hash_set.Size(), 0U);
78 it = hash_set.Find(test_string);
79 ASSERT_TRUE(it == hash_set.end());
80 }
81
TEST_F(HashSetTest,TestInsertAndErase)82 TEST_F(HashSetTest, TestInsertAndErase) {
83 HashSet<std::string, IsEmptyFnString> hash_set;
84 static constexpr size_t count = 1000;
85 std::vector<std::string> strings;
86 for (size_t i = 0; i < count; ++i) {
87 // Insert a bunch of elements and make sure we can find them.
88 strings.push_back(RandomString(10));
89 hash_set.Insert(strings[i]);
90 auto it = hash_set.Find(strings[i]);
91 ASSERT_TRUE(it != hash_set.end());
92 ASSERT_EQ(*it, strings[i]);
93 }
94 ASSERT_EQ(strings.size(), hash_set.Size());
95 // Try to erase the odd strings.
96 for (size_t i = 1; i < count; i += 2) {
97 auto it = hash_set.Find(strings[i]);
98 ASSERT_TRUE(it != hash_set.end());
99 ASSERT_EQ(*it, strings[i]);
100 hash_set.Erase(it);
101 }
102 // Test removed.
103 for (size_t i = 1; i < count; i += 2) {
104 auto it = hash_set.Find(strings[i]);
105 ASSERT_TRUE(it == hash_set.end());
106 }
107 for (size_t i = 0; i < count; i += 2) {
108 auto it = hash_set.Find(strings[i]);
109 ASSERT_TRUE(it != hash_set.end());
110 ASSERT_EQ(*it, strings[i]);
111 }
112 }
113
TEST_F(HashSetTest,TestIterator)114 TEST_F(HashSetTest, TestIterator) {
115 HashSet<std::string, IsEmptyFnString> hash_set;
116 ASSERT_TRUE(hash_set.begin() == hash_set.end());
117 static constexpr size_t count = 1000;
118 std::vector<std::string> strings;
119 for (size_t i = 0; i < count; ++i) {
120 // Insert a bunch of elements and make sure we can find them.
121 strings.push_back(RandomString(10));
122 hash_set.Insert(strings[i]);
123 }
124 // Make sure we visit each string exactly once.
125 std::map<std::string, size_t> found_count;
126 for (const std::string& s : hash_set) {
127 ++found_count[s];
128 }
129 for (size_t i = 0; i < count; ++i) {
130 ASSERT_EQ(found_count[strings[i]], 1U);
131 }
132 found_count.clear();
133 // Remove all the elements with iterator erase.
134 for (auto it = hash_set.begin(); it != hash_set.end();) {
135 ++found_count[*it];
136 it = hash_set.Erase(it);
137 ASSERT_EQ(hash_set.Verify(), 0U);
138 }
139 for (size_t i = 0; i < count; ++i) {
140 ASSERT_EQ(found_count[strings[i]], 1U);
141 }
142 }
143
TEST_F(HashSetTest,TestSwap)144 TEST_F(HashSetTest, TestSwap) {
145 HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb;
146 std::vector<std::string> strings;
147 static constexpr size_t count = 1000;
148 for (size_t i = 0; i < count; ++i) {
149 strings.push_back(RandomString(10));
150 hash_seta.Insert(strings[i]);
151 }
152 std::swap(hash_seta, hash_setb);
153 hash_seta.Insert("TEST");
154 hash_setb.Insert("TEST2");
155 for (size_t i = 0; i < count; ++i) {
156 strings.push_back(RandomString(10));
157 hash_seta.Insert(strings[i]);
158 }
159 }
160
TEST_F(HashSetTest,TestShrink)161 TEST_F(HashSetTest, TestShrink) {
162 HashSet<std::string, IsEmptyFnString> hash_set;
163 std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"};
164 for (size_t i = 0; i < strings.size(); ++i) {
165 // Insert some strings into the beginning of our hash set to establish an initial size
166 hash_set.Insert(strings[i]);
167 }
168
169 hash_set.ShrinkToMaximumLoad();
170 const double initial_load = hash_set.CalculateLoadFactor();
171
172 // Insert a bunch of random strings to guarantee that we grow the capacity.
173 std::vector<std::string> random_strings;
174 static constexpr size_t count = 1000;
175 for (size_t i = 0; i < count; ++i) {
176 random_strings.push_back(RandomString(10));
177 hash_set.Insert(random_strings[i]);
178 }
179
180 // Erase all the extra strings which guarantees that our load factor will be really bad.
181 for (size_t i = 0; i < count; ++i) {
182 hash_set.Erase(hash_set.Find(random_strings[i]));
183 }
184
185 const double bad_load = hash_set.CalculateLoadFactor();
186 EXPECT_GT(initial_load, bad_load);
187
188 // Shrink again, the load factor should be good again.
189 hash_set.ShrinkToMaximumLoad();
190 EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor());
191
192 // Make sure all the initial elements we had are still there
193 for (const std::string& initial_string : strings) {
194 EXPECT_NE(hash_set.end(), hash_set.Find(initial_string))
195 << "expected to find " << initial_string;
196 }
197 }
198
TEST_F(HashSetTest,TestLoadFactor)199 TEST_F(HashSetTest, TestLoadFactor) {
200 HashSet<std::string, IsEmptyFnString> hash_set;
201 static constexpr size_t kStringCount = 1000;
202 static constexpr double kEpsilon = 0.01;
203 for (size_t i = 0; i < kStringCount; ++i) {
204 hash_set.Insert(RandomString(i % 10 + 1));
205 }
206 // Check that changing the load factor resizes the table to be within the target range.
207 EXPECT_GE(hash_set.CalculateLoadFactor() + kEpsilon, hash_set.GetMinLoadFactor());
208 EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
209 hash_set.SetLoadFactor(0.1, 0.3);
210 EXPECT_DOUBLE_EQ(0.1, hash_set.GetMinLoadFactor());
211 EXPECT_DOUBLE_EQ(0.3, hash_set.GetMaxLoadFactor());
212 EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
213 hash_set.SetLoadFactor(0.6, 0.8);
214 EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
215 }
216
TEST_F(HashSetTest,TestStress)217 TEST_F(HashSetTest, TestStress) {
218 HashSet<std::string, IsEmptyFnString> hash_set;
219 std::unordered_multiset<std::string> std_set;
220 std::vector<std::string> strings;
221 static constexpr size_t string_count = 2000;
222 static constexpr size_t operations = 100000;
223 static constexpr size_t target_size = 5000;
224 for (size_t i = 0; i < string_count; ++i) {
225 strings.push_back(RandomString(i % 10 + 1));
226 }
227 const size_t seed = time(nullptr);
228 SetSeed(seed);
229 LOG(INFO) << "Starting stress test with seed " << seed;
230 for (size_t i = 0; i < operations; ++i) {
231 ASSERT_EQ(hash_set.Size(), std_set.size());
232 size_t delta = std::abs(static_cast<ssize_t>(target_size) -
233 static_cast<ssize_t>(hash_set.Size()));
234 size_t n = PRand();
235 if (n % target_size == 0) {
236 hash_set.Clear();
237 std_set.clear();
238 ASSERT_TRUE(hash_set.Empty());
239 ASSERT_TRUE(std_set.empty());
240 } else if (n % target_size < delta) {
241 // Skew towards adding elements until we are at the desired size.
242 const std::string& s = strings[PRand() % string_count];
243 hash_set.Insert(s);
244 std_set.insert(s);
245 ASSERT_EQ(*hash_set.Find(s), *std_set.find(s));
246 } else {
247 const std::string& s = strings[PRand() % string_count];
248 auto it1 = hash_set.Find(s);
249 auto it2 = std_set.find(s);
250 ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end());
251 if (it1 != hash_set.end()) {
252 ASSERT_EQ(*it1, *it2);
253 hash_set.Erase(it1);
254 std_set.erase(it2);
255 }
256 }
257 }
258 }
259
260 struct IsEmptyStringPair {
MakeEmptyart::IsEmptyStringPair261 void MakeEmpty(std::pair<std::string, int>& pair) const {
262 pair.first.clear();
263 }
IsEmptyart::IsEmptyStringPair264 bool IsEmpty(const std::pair<std::string, int>& pair) const {
265 return pair.first.empty();
266 }
267 };
268
TEST_F(HashSetTest,TestHashMap)269 TEST_F(HashSetTest, TestHashMap) {
270 HashMap<std::string, int, IsEmptyStringPair> hash_map;
271 hash_map.Insert(std::make_pair(std::string("abcd"), 123));
272 hash_map.Insert(std::make_pair(std::string("abcd"), 124));
273 hash_map.Insert(std::make_pair(std::string("bags"), 444));
274 auto it = hash_map.Find(std::string("abcd"));
275 ASSERT_EQ(it->second, 123);
276 hash_map.Erase(it);
277 it = hash_map.Find(std::string("abcd"));
278 ASSERT_EQ(it->second, 124);
279 }
280
281 struct IsEmptyFnVectorInt {
MakeEmptyart::IsEmptyFnVectorInt282 void MakeEmpty(std::vector<int>& item) const {
283 item.clear();
284 }
IsEmptyart::IsEmptyFnVectorInt285 bool IsEmpty(const std::vector<int>& item) const {
286 return item.empty();
287 }
288 };
289
290 template <typename T>
HashIntSequence(T begin,T end)291 size_t HashIntSequence(T begin, T end) {
292 size_t hash = 0;
293 for (auto iter = begin; iter != end; ++iter) {
294 hash = hash * 2 + *iter;
295 }
296 return hash;
297 }
298
299 struct VectorIntHashEquals {
operator ()art::VectorIntHashEquals300 std::size_t operator()(const std::vector<int>& item) const {
301 return HashIntSequence(item.begin(), item.end());
302 }
303
operator ()art::VectorIntHashEquals304 std::size_t operator()(const std::forward_list<int>& item) const {
305 return HashIntSequence(item.begin(), item.end());
306 }
307
operator ()art::VectorIntHashEquals308 bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
309 return a == b;
310 }
311
operator ()art::VectorIntHashEquals312 bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const {
313 auto aiter = a.begin();
314 auto biter = b.begin();
315 while (aiter != a.end() && biter != b.end()) {
316 if (*aiter != *biter) {
317 return false;
318 }
319 aiter++;
320 biter++;
321 }
322 return (aiter == a.end() && biter == b.end());
323 }
324 };
325
TEST_F(HashSetTest,TestLookupByAlternateKeyType)326 TEST_F(HashSetTest, TestLookupByAlternateKeyType) {
327 HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set;
328 hash_set.Insert(std::vector<int>({1, 2, 3, 4}));
329 hash_set.Insert(std::vector<int>({4, 2}));
330 ASSERT_EQ(hash_set.end(), hash_set.Find(std::vector<int>({1, 1, 1, 1})));
331 ASSERT_NE(hash_set.end(), hash_set.Find(std::vector<int>({1, 2, 3, 4})));
332 ASSERT_EQ(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 1, 1, 1})));
333 ASSERT_NE(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 2, 3, 4})));
334 }
335
TEST_F(HashSetTest,TestReserve)336 TEST_F(HashSetTest, TestReserve) {
337 HashSet<std::string, IsEmptyFnString> hash_set;
338 std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096};
339 for (size_t size : sizes) {
340 hash_set.Reserve(size);
341 const size_t buckets_before = hash_set.NumBuckets();
342 // Check that we expanded enough.
343 CHECK_GE(hash_set.ElementsUntilExpand(), size);
344 // Try inserting elements until we are at our reserve size and ensure the hash set did not
345 // expand.
346 while (hash_set.Size() < size) {
347 hash_set.Insert(std::to_string(hash_set.Size()));
348 }
349 CHECK_EQ(hash_set.NumBuckets(), buckets_before);
350 }
351 // Check the behaviour for shrinking, it does not necessarily resize down.
352 constexpr size_t size = 100;
353 hash_set.Reserve(size);
354 CHECK_GE(hash_set.ElementsUntilExpand(), size);
355 }
356
357 } // namespace art
358