1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ANDROID_BASIC_HASHTABLE_H
18 #define ANDROID_BASIC_HASHTABLE_H
19
20 #include <stdint.h>
21 #include <sys/types.h>
22 #include <utils/SharedBuffer.h>
23 #include <utils/TypeHelpers.h>
24
25 namespace android {
26
27 /* Implementation type. Nothing to see here. */
28 class BasicHashtableImpl {
29 protected:
30 struct Bucket {
31 // The collision flag indicates that the bucket is part of a collision chain
32 // such that at least two entries both hash to this bucket. When true, we
33 // may need to seek further along the chain to find the entry.
34 static const uint32_t COLLISION = 0x80000000UL;
35
36 // The present flag indicates that the bucket contains an initialized entry value.
37 static const uint32_t PRESENT = 0x40000000UL;
38
39 // Mask for 30 bits worth of the hash code that are stored within the bucket to
40 // speed up lookups and rehashing by eliminating the need to recalculate the
41 // hash code of the entry's key.
42 static const uint32_t HASH_MASK = 0x3fffffffUL;
43
44 // Combined value that stores the collision and present flags as well as
45 // a 30 bit hash code.
46 uint32_t cookie;
47
48 // Storage for the entry begins here.
49 char entry[0];
50 };
51
52 BasicHashtableImpl(size_t entrySize, bool hasTrivialDestructor,
53 size_t minimumInitialCapacity, float loadFactor);
54 BasicHashtableImpl(const BasicHashtableImpl& other);
55 virtual ~BasicHashtableImpl();
56
57 void dispose();
58
edit()59 inline void edit() {
60 if (mBuckets && !SharedBuffer::bufferFromData(mBuckets)->onlyOwner()) {
61 clone();
62 }
63 }
64
65 void setTo(const BasicHashtableImpl& other);
66 void clear();
67
68 ssize_t next(ssize_t index) const;
69 ssize_t find(ssize_t index, hash_t hash, const void* __restrict__ key) const;
70 size_t add(hash_t hash, const void* __restrict__ entry);
71 void removeAt(size_t index);
72 void rehash(size_t minimumCapacity, float loadFactor);
73
74 const size_t mBucketSize; // number of bytes per bucket including the entry
75 const bool mHasTrivialDestructor; // true if the entry type does not require destruction
76 size_t mCapacity; // number of buckets that can be filled before exceeding load factor
77 float mLoadFactor; // load factor
78 size_t mSize; // number of elements actually in the table
79 size_t mFilledBuckets; // number of buckets for which collision or present is true
80 size_t mBucketCount; // number of slots in the mBuckets array
81 void* mBuckets; // array of buckets, as a SharedBuffer
82
bucketAt(const void * __restrict__ buckets,size_t index)83 inline const Bucket& bucketAt(const void* __restrict__ buckets, size_t index) const {
84 return *reinterpret_cast<const Bucket*>(
85 static_cast<const uint8_t*>(buckets) + index * mBucketSize);
86 }
87
bucketAt(void * __restrict__ buckets,size_t index)88 inline Bucket& bucketAt(void* __restrict__ buckets, size_t index) const {
89 return *reinterpret_cast<Bucket*>(static_cast<uint8_t*>(buckets) + index * mBucketSize);
90 }
91
92 virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const = 0;
93 virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const = 0;
94 virtual void destroyBucketEntry(Bucket& bucket) const = 0;
95
96 private:
97 void clone();
98
99 // Allocates a bucket array as a SharedBuffer.
100 void* allocateBuckets(size_t count) const;
101
102 // Releases a bucket array's associated SharedBuffer.
103 void releaseBuckets(void* __restrict__ buckets, size_t count) const;
104
105 // Destroys the contents of buckets (invokes destroyBucketEntry for each
106 // populated bucket if needed).
107 void destroyBuckets(void* __restrict__ buckets, size_t count) const;
108
109 // Copies the content of buckets (copies the cookie and invokes copyBucketEntry
110 // for each populated bucket if needed).
111 void copyBuckets(const void* __restrict__ fromBuckets,
112 void* __restrict__ toBuckets, size_t count) const;
113
114 // Determines the appropriate size of a bucket array to store a certain minimum
115 // number of entries and returns its effective capacity.
116 static void determineCapacity(size_t minimumCapacity, float loadFactor,
117 size_t* __restrict__ outBucketCount, size_t* __restrict__ outCapacity);
118
119 // Trim a hash code to 30 bits to match what we store in the bucket's cookie.
trimHash(hash_t hash)120 inline static hash_t trimHash(hash_t hash) {
121 return (hash & Bucket::HASH_MASK) ^ (hash >> 30);
122 }
123
124 // Returns the index of the first bucket that is in the collision chain
125 // for the specified hash code, given the total number of buckets.
126 // (Primary hash)
chainStart(hash_t hash,size_t count)127 inline static size_t chainStart(hash_t hash, size_t count) {
128 return hash % count;
129 }
130
131 // Returns the increment to add to a bucket index to seek to the next bucket
132 // in the collision chain for the specified hash code, given the total number of buckets.
133 // (Secondary hash)
chainIncrement(hash_t hash,size_t count)134 inline static size_t chainIncrement(hash_t hash, size_t count) {
135 return ((hash >> 7) | (hash << 25)) % (count - 1) + 1;
136 }
137
138 // Returns the index of the next bucket that is in the collision chain
139 // that is defined by the specified increment, given the total number of buckets.
chainSeek(size_t index,size_t increment,size_t count)140 inline static size_t chainSeek(size_t index, size_t increment, size_t count) {
141 return (index + increment) % count;
142 }
143 };
144
145 /*
146 * A BasicHashtable stores entries that are indexed by hash code in place
147 * within an array. The basic operations are finding entries by key,
148 * adding new entries and removing existing entries.
149 *
150 * This class provides a very limited set of operations with simple semantics.
151 * It is intended to be used as a building block to construct more complex
152 * and interesting data structures such as HashMap. Think very hard before
153 * adding anything extra to BasicHashtable, it probably belongs at a
154 * higher level of abstraction.
155 *
156 * TKey: The key type.
157 * TEntry: The entry type which is what is actually stored in the array.
158 *
159 * TKey must support the following contract:
160 * bool operator==(const TKey& other) const; // return true if equal
161 * bool operator!=(const TKey& other) const; // return true if unequal
162 *
163 * TEntry must support the following contract:
164 * const TKey& getKey() const; // get the key from the entry
165 *
166 * This class supports storing entries with duplicate keys. Of course, it can't
167 * tell them apart during removal so only the first entry will be removed.
168 * We do this because it means that operations like add() can't fail.
169 */
170 template <typename TKey, typename TEntry>
171 class BasicHashtable : private BasicHashtableImpl {
172 public:
173 /* Creates a hashtable with the specified minimum initial capacity.
174 * The underlying array will be created when the first entry is added.
175 *
176 * minimumInitialCapacity: The minimum initial capacity for the hashtable.
177 * Default is 0.
178 * loadFactor: The desired load factor for the hashtable, between 0 and 1.
179 * Default is 0.75.
180 */
181 BasicHashtable(size_t minimumInitialCapacity = 0, float loadFactor = 0.75f);
182
183 /* Copies a hashtable.
184 * The underlying storage is shared copy-on-write.
185 */
186 BasicHashtable(const BasicHashtable& other);
187
188 /* Clears and destroys the hashtable.
189 */
190 virtual ~BasicHashtable();
191
192 /* Making this hashtable a copy of the other hashtable.
193 * The underlying storage is shared copy-on-write.
194 *
195 * other: The hashtable to copy.
196 */
197 inline BasicHashtable<TKey, TEntry>& operator =(const BasicHashtable<TKey, TEntry> & other) {
198 setTo(other);
199 return *this;
200 }
201
202 /* Returns the number of entries in the hashtable.
203 */
size()204 inline size_t size() const {
205 return mSize;
206 }
207
208 /* Returns the capacity of the hashtable, which is the number of elements that can
209 * added to the hashtable without requiring it to be grown.
210 */
capacity()211 inline size_t capacity() const {
212 return mCapacity;
213 }
214
215 /* Returns the number of buckets that the hashtable has, which is the size of its
216 * underlying array.
217 */
bucketCount()218 inline size_t bucketCount() const {
219 return mBucketCount;
220 }
221
222 /* Returns the load factor of the hashtable. */
loadFactor()223 inline float loadFactor() const {
224 return mLoadFactor;
225 };
226
227 /* Returns a const reference to the entry at the specified index.
228 *
229 * index: The index of the entry to retrieve. Must be a valid index within
230 * the bounds of the hashtable.
231 */
entryAt(size_t index)232 inline const TEntry& entryAt(size_t index) const {
233 return entryFor(bucketAt(mBuckets, index));
234 }
235
236 /* Returns a non-const reference to the entry at the specified index.
237 *
238 * index: The index of the entry to edit. Must be a valid index within
239 * the bounds of the hashtable.
240 */
editEntryAt(size_t index)241 inline TEntry& editEntryAt(size_t index) {
242 edit();
243 return entryFor(bucketAt(mBuckets, index));
244 }
245
246 /* Clears the hashtable.
247 * All entries in the hashtable are destroyed immediately.
248 * If you need to do something special with the entries in the hashtable then iterate
249 * over them and do what you need before clearing the hashtable.
250 */
clear()251 inline void clear() {
252 BasicHashtableImpl::clear();
253 }
254
255 /* Returns the index of the next entry in the hashtable given the index of a previous entry.
256 * If the given index is -1, then returns the index of the first entry in the hashtable,
257 * if there is one, or -1 otherwise.
258 * If the given index is not -1, then returns the index of the next entry in the hashtable,
259 * in strictly increasing order, or -1 if there are none left.
260 *
261 * index: The index of the previous entry that was iterated, or -1 to begin
262 * iteration at the beginning of the hashtable.
263 */
next(ssize_t index)264 inline ssize_t next(ssize_t index) const {
265 return BasicHashtableImpl::next(index);
266 }
267
268 /* Finds the index of an entry with the specified key.
269 * If the given index is -1, then returns the index of the first matching entry,
270 * otherwise returns the index of the next matching entry.
271 * If the hashtable contains multiple entries with keys that match the requested
272 * key, then the sequence of entries returned is arbitrary.
273 * Returns -1 if no entry was found.
274 *
275 * index: The index of the previous entry with the specified key, or -1 to
276 * find the first matching entry.
277 * hash: The hashcode of the key.
278 * key: The key.
279 */
find(ssize_t index,hash_t hash,const TKey & key)280 inline ssize_t find(ssize_t index, hash_t hash, const TKey& key) const {
281 return BasicHashtableImpl::find(index, hash, &key);
282 }
283
284 /* Adds the entry to the hashtable.
285 * Returns the index of the newly added entry.
286 * If an entry with the same key already exists, then a duplicate entry is added.
287 * If the entry will not fit, then the hashtable's capacity is increased and
288 * its contents are rehashed. See rehash().
289 *
290 * hash: The hashcode of the key.
291 * entry: The entry to add.
292 */
add(hash_t hash,const TEntry & entry)293 inline size_t add(hash_t hash, const TEntry& entry) {
294 return BasicHashtableImpl::add(hash, &entry);
295 }
296
297 /* Removes the entry with the specified index from the hashtable.
298 * The entry is destroyed immediately.
299 * The index must be valid.
300 *
301 * The hashtable is not compacted after an item is removed, so it is legal
302 * to continue iterating over the hashtable using next() or find().
303 *
304 * index: The index of the entry to remove. Must be a valid index within the
305 * bounds of the hashtable, and it must refer to an existing entry.
306 */
removeAt(size_t index)307 inline void removeAt(size_t index) {
308 BasicHashtableImpl::removeAt(index);
309 }
310
311 /* Rehashes the contents of the hashtable.
312 * Grows the hashtable to at least the specified minimum capacity or the
313 * current number of elements, whichever is larger.
314 *
315 * Rehashing causes all entries to be copied and the entry indices may change.
316 * Although the hash codes are cached by the hashtable, rehashing can be an
317 * expensive operation and should be avoided unless the hashtable's size
318 * needs to be changed.
319 *
320 * Rehashing is the only way to change the capacity or load factor of the
321 * hashtable once it has been created. It can be used to compact the
322 * hashtable by choosing a minimum capacity that is smaller than the current
323 * capacity (such as 0).
324 *
325 * minimumCapacity: The desired minimum capacity after rehashing.
326 * loadFactor: The desired load factor after rehashing.
327 */
rehash(size_t minimumCapacity,float loadFactor)328 inline void rehash(size_t minimumCapacity, float loadFactor) {
329 BasicHashtableImpl::rehash(minimumCapacity, loadFactor);
330 }
331
332 /* Determines whether there is room to add another entry without rehashing.
333 * When this returns true, a subsequent add() operation is guaranteed to
334 * complete without performing a rehash.
335 */
hasMoreRoom()336 inline bool hasMoreRoom() const {
337 return mCapacity > mFilledBuckets;
338 }
339
340 protected:
entryFor(const Bucket & bucket)341 static inline const TEntry& entryFor(const Bucket& bucket) {
342 return reinterpret_cast<const TEntry&>(bucket.entry);
343 }
344
entryFor(Bucket & bucket)345 static inline TEntry& entryFor(Bucket& bucket) {
346 return reinterpret_cast<TEntry&>(bucket.entry);
347 }
348
349 virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const;
350 virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const;
351 virtual void destroyBucketEntry(Bucket& bucket) const;
352
353 private:
354 // For dumping the raw contents of a hashtable during testing.
355 friend class BasicHashtableTest;
cookieAt(size_t index)356 inline uint32_t cookieAt(size_t index) const {
357 return bucketAt(mBuckets, index).cookie;
358 }
359 };
360
361 template <typename TKey, typename TEntry>
BasicHashtable(size_t minimumInitialCapacity,float loadFactor)362 BasicHashtable<TKey, TEntry>::BasicHashtable(size_t minimumInitialCapacity, float loadFactor) :
363 BasicHashtableImpl(sizeof(TEntry), traits<TEntry>::has_trivial_dtor,
364 minimumInitialCapacity, loadFactor) {
365 }
366
367 template <typename TKey, typename TEntry>
BasicHashtable(const BasicHashtable<TKey,TEntry> & other)368 BasicHashtable<TKey, TEntry>::BasicHashtable(const BasicHashtable<TKey, TEntry>& other) :
369 BasicHashtableImpl(other) {
370 }
371
372 template <typename TKey, typename TEntry>
~BasicHashtable()373 BasicHashtable<TKey, TEntry>::~BasicHashtable() {
374 dispose();
375 }
376
377 template <typename TKey, typename TEntry>
compareBucketKey(const Bucket & bucket,const void * __restrict__ key)378 bool BasicHashtable<TKey, TEntry>::compareBucketKey(const Bucket& bucket,
379 const void* __restrict__ key) const {
380 return entryFor(bucket).getKey() == *static_cast<const TKey*>(key);
381 }
382
383 template <typename TKey, typename TEntry>
initializeBucketEntry(Bucket & bucket,const void * __restrict__ entry)384 void BasicHashtable<TKey, TEntry>::initializeBucketEntry(Bucket& bucket,
385 const void* __restrict__ entry) const {
386 if (!traits<TEntry>::has_trivial_copy) {
387 new (&entryFor(bucket)) TEntry(*(static_cast<const TEntry*>(entry)));
388 } else {
389 memcpy(&entryFor(bucket), entry, sizeof(TEntry));
390 }
391 }
392
393 template <typename TKey, typename TEntry>
destroyBucketEntry(Bucket & bucket)394 void BasicHashtable<TKey, TEntry>::destroyBucketEntry(Bucket& bucket) const {
395 if (!traits<TEntry>::has_trivial_dtor) {
396 entryFor(bucket).~TEntry();
397 }
398 }
399
400 }; // namespace android
401
402 #endif // ANDROID_BASIC_HASHTABLE_H
403