• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "hash_set.h"
18 
19 #include <forward_list>
20 #include <map>
21 #include <sstream>
22 #include <string>
23 #include <unordered_set>
24 #include <vector>
25 
26 #include <gtest/gtest.h>
27 #include "hash_map.h"
28 
29 namespace art {
30 
31 struct IsEmptyFnString {
MakeEmptyart::IsEmptyFnString32   void MakeEmpty(std::string& item) const {
33     item.clear();
34   }
IsEmptyart::IsEmptyFnString35   bool IsEmpty(const std::string& item) const {
36     return item.empty();
37   }
38 };
39 
40 class HashSetTest : public testing::Test {
41  public:
HashSetTest()42   HashSetTest() : seed_(97421), unique_number_(0) {
43   }
RandomString(size_t len)44   std::string RandomString(size_t len) {
45     std::ostringstream oss;
46     for (size_t i = 0; i < len; ++i) {
47       oss << static_cast<char>('A' + PRand() % 64);
48     }
49     static_assert(' ' < 'A', "space must be less than a");
50     oss << " " << unique_number_++;  // Relies on ' ' < 'A'
51     return oss.str();
52   }
SetSeed(size_t seed)53   void SetSeed(size_t seed) {
54     seed_ = seed;
55   }
PRand()56   size_t PRand() {  // Pseudo random.
57     seed_ = seed_ * 1103515245 + 12345;
58     return seed_;
59   }
60 
61  private:
62   size_t seed_;
63   size_t unique_number_;
64 };
65 
TEST_F(HashSetTest,TestSmoke)66 TEST_F(HashSetTest, TestSmoke) {
67   HashSet<std::string, IsEmptyFnString> hash_set;
68   const std::string test_string = "hello world 1234";
69   ASSERT_TRUE(hash_set.Empty());
70   ASSERT_EQ(hash_set.Size(), 0U);
71   hash_set.Insert(test_string);
72   auto it = hash_set.Find(test_string);
73   ASSERT_EQ(*it, test_string);
74   auto after_it = hash_set.Erase(it);
75   ASSERT_TRUE(after_it == hash_set.end());
76   ASSERT_TRUE(hash_set.Empty());
77   ASSERT_EQ(hash_set.Size(), 0U);
78   it = hash_set.Find(test_string);
79   ASSERT_TRUE(it == hash_set.end());
80 }
81 
TEST_F(HashSetTest,TestInsertAndErase)82 TEST_F(HashSetTest, TestInsertAndErase) {
83   HashSet<std::string, IsEmptyFnString> hash_set;
84   static constexpr size_t count = 1000;
85   std::vector<std::string> strings;
86   for (size_t i = 0; i < count; ++i) {
87     // Insert a bunch of elements and make sure we can find them.
88     strings.push_back(RandomString(10));
89     hash_set.Insert(strings[i]);
90     auto it = hash_set.Find(strings[i]);
91     ASSERT_TRUE(it != hash_set.end());
92     ASSERT_EQ(*it, strings[i]);
93   }
94   ASSERT_EQ(strings.size(), hash_set.Size());
95   // Try to erase the odd strings.
96   for (size_t i = 1; i < count; i += 2) {
97     auto it = hash_set.Find(strings[i]);
98     ASSERT_TRUE(it != hash_set.end());
99     ASSERT_EQ(*it, strings[i]);
100     hash_set.Erase(it);
101   }
102   // Test removed.
103   for (size_t i = 1; i < count; i += 2) {
104     auto it = hash_set.Find(strings[i]);
105     ASSERT_TRUE(it == hash_set.end());
106   }
107   for (size_t i = 0; i < count; i += 2) {
108     auto it = hash_set.Find(strings[i]);
109     ASSERT_TRUE(it != hash_set.end());
110     ASSERT_EQ(*it, strings[i]);
111   }
112 }
113 
TEST_F(HashSetTest,TestIterator)114 TEST_F(HashSetTest, TestIterator) {
115   HashSet<std::string, IsEmptyFnString> hash_set;
116   ASSERT_TRUE(hash_set.begin() == hash_set.end());
117   static constexpr size_t count = 1000;
118   std::vector<std::string> strings;
119   for (size_t i = 0; i < count; ++i) {
120     // Insert a bunch of elements and make sure we can find them.
121     strings.push_back(RandomString(10));
122     hash_set.Insert(strings[i]);
123   }
124   // Make sure we visit each string exactly once.
125   std::map<std::string, size_t> found_count;
126   for (const std::string& s : hash_set) {
127     ++found_count[s];
128   }
129   for (size_t i = 0; i < count; ++i) {
130     ASSERT_EQ(found_count[strings[i]], 1U);
131   }
132   found_count.clear();
133   // Remove all the elements with iterator erase.
134   for (auto it = hash_set.begin(); it != hash_set.end();) {
135     ++found_count[*it];
136     it = hash_set.Erase(it);
137     ASSERT_EQ(hash_set.Verify(), 0U);
138   }
139   for (size_t i = 0; i < count; ++i) {
140     ASSERT_EQ(found_count[strings[i]], 1U);
141   }
142 }
143 
TEST_F(HashSetTest,TestSwap)144 TEST_F(HashSetTest, TestSwap) {
145   HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb;
146   std::vector<std::string> strings;
147   static constexpr size_t count = 1000;
148   for (size_t i = 0; i < count; ++i) {
149     strings.push_back(RandomString(10));
150     hash_seta.Insert(strings[i]);
151   }
152   std::swap(hash_seta, hash_setb);
153   hash_seta.Insert("TEST");
154   hash_setb.Insert("TEST2");
155   for (size_t i = 0; i < count; ++i) {
156     strings.push_back(RandomString(10));
157     hash_seta.Insert(strings[i]);
158   }
159 }
160 
TEST_F(HashSetTest,TestShrink)161 TEST_F(HashSetTest, TestShrink) {
162   HashSet<std::string, IsEmptyFnString> hash_set;
163   std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"};
164   for (size_t i = 0; i < strings.size(); ++i) {
165     // Insert some strings into the beginning of our hash set to establish an initial size
166     hash_set.Insert(strings[i]);
167   }
168 
169   hash_set.ShrinkToMaximumLoad();
170   const double initial_load = hash_set.CalculateLoadFactor();
171 
172   // Insert a bunch of random strings to guarantee that we grow the capacity.
173   std::vector<std::string> random_strings;
174   static constexpr size_t count = 1000;
175   for (size_t i = 0; i < count; ++i) {
176     random_strings.push_back(RandomString(10));
177     hash_set.Insert(random_strings[i]);
178   }
179 
180   // Erase all the extra strings which guarantees that our load factor will be really bad.
181   for (size_t i = 0; i < count; ++i) {
182     hash_set.Erase(hash_set.Find(random_strings[i]));
183   }
184 
185   const double bad_load = hash_set.CalculateLoadFactor();
186   EXPECT_GT(initial_load, bad_load);
187 
188   // Shrink again, the load factor should be good again.
189   hash_set.ShrinkToMaximumLoad();
190   EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor());
191 
192   // Make sure all the initial elements we had are still there
193   for (const std::string& initial_string : strings) {
194     EXPECT_NE(hash_set.end(), hash_set.Find(initial_string))
195         << "expected to find " << initial_string;
196   }
197 }
198 
TEST_F(HashSetTest,TestLoadFactor)199 TEST_F(HashSetTest, TestLoadFactor) {
200   HashSet<std::string, IsEmptyFnString> hash_set;
201   static constexpr size_t kStringCount = 1000;
202   static constexpr double kEpsilon = 0.01;
203   for (size_t i = 0; i < kStringCount; ++i) {
204     hash_set.Insert(RandomString(i % 10 + 1));
205   }
206   // Check that changing the load factor resizes the table to be within the target range.
207   EXPECT_GE(hash_set.CalculateLoadFactor() + kEpsilon, hash_set.GetMinLoadFactor());
208   EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
209   hash_set.SetLoadFactor(0.1, 0.3);
210   EXPECT_DOUBLE_EQ(0.1, hash_set.GetMinLoadFactor());
211   EXPECT_DOUBLE_EQ(0.3, hash_set.GetMaxLoadFactor());
212   EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
213   hash_set.SetLoadFactor(0.6, 0.8);
214   EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor());
215 }
216 
TEST_F(HashSetTest,TestStress)217 TEST_F(HashSetTest, TestStress) {
218   HashSet<std::string, IsEmptyFnString> hash_set;
219   std::unordered_multiset<std::string> std_set;
220   std::vector<std::string> strings;
221   static constexpr size_t string_count = 2000;
222   static constexpr size_t operations = 100000;
223   static constexpr size_t target_size = 5000;
224   for (size_t i = 0; i < string_count; ++i) {
225     strings.push_back(RandomString(i % 10 + 1));
226   }
227   const size_t seed = time(nullptr);
228   SetSeed(seed);
229   LOG(INFO) << "Starting stress test with seed " << seed;
230   for (size_t i = 0; i < operations; ++i) {
231     ASSERT_EQ(hash_set.Size(), std_set.size());
232     size_t delta = std::abs(static_cast<ssize_t>(target_size) -
233                             static_cast<ssize_t>(hash_set.Size()));
234     size_t n = PRand();
235     if (n % target_size == 0) {
236       hash_set.Clear();
237       std_set.clear();
238       ASSERT_TRUE(hash_set.Empty());
239       ASSERT_TRUE(std_set.empty());
240     } else  if (n % target_size < delta) {
241       // Skew towards adding elements until we are at the desired size.
242       const std::string& s = strings[PRand() % string_count];
243       hash_set.Insert(s);
244       std_set.insert(s);
245       ASSERT_EQ(*hash_set.Find(s), *std_set.find(s));
246     } else {
247       const std::string& s = strings[PRand() % string_count];
248       auto it1 = hash_set.Find(s);
249       auto it2 = std_set.find(s);
250       ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end());
251       if (it1 != hash_set.end()) {
252         ASSERT_EQ(*it1, *it2);
253         hash_set.Erase(it1);
254         std_set.erase(it2);
255       }
256     }
257   }
258 }
259 
260 struct IsEmptyStringPair {
MakeEmptyart::IsEmptyStringPair261   void MakeEmpty(std::pair<std::string, int>& pair) const {
262     pair.first.clear();
263   }
IsEmptyart::IsEmptyStringPair264   bool IsEmpty(const std::pair<std::string, int>& pair) const {
265     return pair.first.empty();
266   }
267 };
268 
TEST_F(HashSetTest,TestHashMap)269 TEST_F(HashSetTest, TestHashMap) {
270   HashMap<std::string, int, IsEmptyStringPair> hash_map;
271   hash_map.Insert(std::make_pair(std::string("abcd"), 123));
272   hash_map.Insert(std::make_pair(std::string("abcd"), 124));
273   hash_map.Insert(std::make_pair(std::string("bags"), 444));
274   auto it = hash_map.Find(std::string("abcd"));
275   ASSERT_EQ(it->second, 123);
276   hash_map.Erase(it);
277   it = hash_map.Find(std::string("abcd"));
278   ASSERT_EQ(it->second, 124);
279 }
280 
281 struct IsEmptyFnVectorInt {
MakeEmptyart::IsEmptyFnVectorInt282   void MakeEmpty(std::vector<int>& item) const {
283     item.clear();
284   }
IsEmptyart::IsEmptyFnVectorInt285   bool IsEmpty(const std::vector<int>& item) const {
286     return item.empty();
287   }
288 };
289 
290 template <typename T>
HashIntSequence(T begin,T end)291 size_t HashIntSequence(T begin, T end) {
292   size_t hash = 0;
293   for (auto iter = begin; iter != end; ++iter) {
294     hash = hash * 2 + *iter;
295   }
296   return hash;
297 }
298 
299 struct VectorIntHashEquals {
operator ()art::VectorIntHashEquals300   std::size_t operator()(const std::vector<int>& item) const {
301     return HashIntSequence(item.begin(), item.end());
302   }
303 
operator ()art::VectorIntHashEquals304   std::size_t operator()(const std::forward_list<int>& item) const {
305     return HashIntSequence(item.begin(), item.end());
306   }
307 
operator ()art::VectorIntHashEquals308   bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
309     return a == b;
310   }
311 
operator ()art::VectorIntHashEquals312   bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const {
313     auto aiter = a.begin();
314     auto biter = b.begin();
315     while (aiter != a.end() && biter != b.end()) {
316       if (*aiter != *biter) {
317         return false;
318       }
319       aiter++;
320       biter++;
321     }
322     return (aiter == a.end() && biter == b.end());
323   }
324 };
325 
TEST_F(HashSetTest,TestLookupByAlternateKeyType)326 TEST_F(HashSetTest, TestLookupByAlternateKeyType) {
327   HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set;
328   hash_set.Insert(std::vector<int>({1, 2, 3, 4}));
329   hash_set.Insert(std::vector<int>({4, 2}));
330   ASSERT_EQ(hash_set.end(), hash_set.Find(std::vector<int>({1, 1, 1, 1})));
331   ASSERT_NE(hash_set.end(), hash_set.Find(std::vector<int>({1, 2, 3, 4})));
332   ASSERT_EQ(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 1, 1, 1})));
333   ASSERT_NE(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 2, 3, 4})));
334 }
335 
TEST_F(HashSetTest,TestReserve)336 TEST_F(HashSetTest, TestReserve) {
337   HashSet<std::string, IsEmptyFnString> hash_set;
338   std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096};
339   for (size_t size : sizes) {
340     hash_set.Reserve(size);
341     const size_t buckets_before = hash_set.NumBuckets();
342     // Check that we expanded enough.
343     CHECK_GE(hash_set.ElementsUntilExpand(), size);
344     // Try inserting elements until we are at our reserve size and ensure the hash set did not
345     // expand.
346     while (hash_set.Size() < size) {
347       hash_set.Insert(std::to_string(hash_set.Size()));
348     }
349     CHECK_EQ(hash_set.NumBuckets(), buckets_before);
350   }
351   // Check the behaviour for shrinking, it does not necessarily resize down.
352   constexpr size_t size = 100;
353   hash_set.Reserve(size);
354   CHECK_GE(hash_set.ElementsUntilExpand(), size);
355 }
356 
357 }  // namespace art
358