1 /*
2 * Copyright 2014 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef SEMISTATIC_MAP_TEMPLATES_H
18 #define SEMISTATIC_MAP_TEMPLATES_H
19
20 #if !IN_FRUIT_CPP_FILE
21 #error "Fruit .template.h file included in non-cpp file."
22 #endif
23
24 #include <algorithm>
25 #include <cassert>
26 #include <chrono>
27 #include <random>
28 #include <utility>
29 // This include is not necessary for GCC/Clang, but it's necessary for MSVC.
30 #include <numeric>
31
32 #include <fruit/impl/data_structures/semistatic_map.h>
33
34 #include <fruit/impl/data_structures/arena_allocator.h>
35 #include <fruit/impl/data_structures/fixed_size_vector.templates.h>
36 #include <fruit/impl/fruit_assert.h>
37
38 namespace fruit {
39 namespace impl {
40
41 template <typename Key, typename Value>
42 template <typename Iter>
SemistaticMap(Iter values_begin,Iter values_end,std::size_t num_values,MemoryPool & memory_pool)43 SemistaticMap<Key, Value>::SemistaticMap(
44 Iter values_begin, Iter values_end, std::size_t num_values, MemoryPool& memory_pool) {
45 NumBits num_bits = pickNumBits(num_values);
46 std::size_t num_buckets = size_t(1) << num_bits;
47
48 FixedSizeVector<Unsigned, ArenaAllocator<Unsigned>> count(num_buckets, 0, ArenaAllocator<Unsigned>(memory_pool));
49
50 hash_function.shift = (sizeof(Unsigned) * CHAR_BIT - num_bits);
51
52 // The cast is a no-op in some systems (e.g. GCC and Clang under Linux 64bit) but it's needed in other systems (e.g.
53 // MSVC).
54 unsigned seed = (unsigned)std::chrono::system_clock::now().time_since_epoch().count();
55 std::default_random_engine random_generator(seed);
56 std::uniform_int_distribution<Unsigned> random_distribution;
57
58 while (1) {
59 hash_function.a = random_distribution(random_generator);
60
61 for (Iter itr = values_begin; !(itr == values_end); ++itr) {
62 Unsigned& this_count = count[hash((*itr).first)];
63 ++this_count;
64 if (this_count == beta) {
65 goto pick_another;
66 }
67 }
68 break;
69
70 pick_another:
71 std::memset(count.data(), 0, num_buckets * sizeof(Unsigned));
72 }
73
74 values = FixedSizeVector<value_type>(num_values, value_type());
75
76 std::partial_sum(count.begin(), count.end(), count.begin());
77 lookup_table = FixedSizeVector<CandidateValuesRange>(count.size());
78 for (Unsigned n : count) {
79 lookup_table.push_back(CandidateValuesRange{values.data() + n, values.data() + n});
80 }
81
82 // At this point lookup_table[h] is the number of keys in [first, last) that have a hash <=h.
83 // Note that even though we ensure this after construction, it is not maintained by insert() so it's not an invariant.
84
85 Iter itr = values_begin;
86 for (std::size_t i = 0; i < num_values; ++i, ++itr) {
87 value_type*& first_value_ptr = lookup_table[hash((*itr).first)].begin;
88 --first_value_ptr;
89 FruitAssert(values.data() <= first_value_ptr);
90 FruitAssert(first_value_ptr < values.data() + values.size());
91 *first_value_ptr = *itr;
92 }
93 }
94
95 template <typename Key, typename Value>
SemistaticMap(const SemistaticMap<Key,Value> & map,std::vector<value_type,ArenaAllocator<value_type>> && new_elements)96 SemistaticMap<Key, Value>::SemistaticMap(const SemistaticMap<Key, Value>& map,
97 std::vector<value_type, ArenaAllocator<value_type>>&& new_elements)
98 : hash_function(map.hash_function), lookup_table(map.lookup_table, map.lookup_table.size()) {
99
100 // Sort by hash.
101 std::sort(new_elements.begin(), new_elements.end(),
102 [this](const value_type& x, const value_type& y) { return hash(x.first) < hash(y.first); });
103
104 std::size_t num_additional_values = new_elements.size();
105 // Add the space needed to store copies of the old buckets.
106 for (auto itr = new_elements.begin(), itr_end = new_elements.end(); itr != itr_end; /* no increment */) {
107 Unsigned h = hash(itr->first);
108 auto p = map.lookup_table[h];
109 num_additional_values += (p.end - p.begin);
110 for (; itr != itr_end && hash(itr->first) == h; ++itr) {
111 }
112 }
113
114 values = FixedSizeVector<value_type>(num_additional_values);
115
116 // Now actually perform the insertions.
117
118 if (new_elements.empty()) {
119 // This is to workaround a bug in the STL shipped with GCC <4.8.2, where calling data() on an
120 // empty vector causes undefined behavior (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59829).
121 return;
122 }
123 for (value_type *itr = new_elements.data(), *itr_end = new_elements.data() + new_elements.size(); itr != itr_end;
124 /* no increment */) {
125 Unsigned h = hash(itr->first);
126 auto p = map.lookup_table[h];
127 num_additional_values += (p.end - p.begin);
128 value_type* first = itr;
129 for (; itr != itr_end && hash(itr->first) == h; ++itr) {
130 }
131 value_type* last = itr;
132 insert(h, first, last);
133 }
134 }
135
136 template <typename Key, typename Value>
insert(std::size_t h,const value_type * elems_begin,const value_type * elems_end)137 void SemistaticMap<Key, Value>::insert(std::size_t h, const value_type* elems_begin, const value_type* elems_end) {
138
139 value_type* old_bucket_begin = lookup_table[h].begin;
140 value_type* old_bucket_end = lookup_table[h].end;
141
142 lookup_table[h].begin = values.data() + values.size();
143
144 // Step 1: re-insert all keys with the same hash at the end (if any).
145 for (value_type* p = old_bucket_begin; p != old_bucket_end; ++p) {
146 values.push_back(*p);
147 }
148
149 // Step 2: also insert the new keys and values
150 for (auto itr = elems_begin; itr != elems_end; ++itr) {
151 values.push_back(*itr);
152 }
153
154 lookup_table[h].end = values.data() + values.size();
155
156 // The old sequence is no longer pointed to by any index in the lookup table, but recompacting the vectors would be
157 // too slow.
158 }
159
160 template <typename Key, typename Value>
at(Key key)161 const Value& SemistaticMap<Key, Value>::at(Key key) const {
162 Unsigned h = hash(key);
163 for (const value_type* p = lookup_table[h].begin; /* p!=lookup_table[h].end but no need to check */; ++p) {
164 FruitAssert(p != lookup_table[h].end);
165 if (p->first == key) {
166 return p->second;
167 }
168 }
169 }
170
171 template <typename Key, typename Value>
find(Key key)172 const Value* SemistaticMap<Key, Value>::find(Key key) const {
173 Unsigned h = hash(key);
174 for (const value_type *p = lookup_table[h].begin, *p_end = lookup_table[h].end; p != p_end; ++p) {
175 if (p->first == key) {
176 return &(p->second);
177 }
178 }
179 return nullptr;
180 }
181
182 template <typename Key, typename Value>
pickNumBits(std::size_t n)183 typename SemistaticMap<Key, Value>::NumBits SemistaticMap<Key, Value>::pickNumBits(std::size_t n) {
184 NumBits result = 1;
185 while ((std::size_t(1) << result) < n) {
186 ++result;
187 }
188 return result + 1;
189 }
190
191 // This is here so that we don't have to include fixed_size_vector.templates.h in fruit.h.
192 template <typename Key, typename Value>
~SemistaticMap()193 SemistaticMap<Key, Value>::~SemistaticMap() {}
194
195 } // namespace impl
196 } // namespace fruit
197
198 #endif // SEMISTATIC_MAP_TEMPLATES_H
199