• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_BASIC_HASHTABLE_H
18 #define ANDROID_BASIC_HASHTABLE_H
19 
20 #include <stdint.h>
21 #include <sys/types.h>
22 #include <utils/SharedBuffer.h>
23 #include <utils/TypeHelpers.h>
24 
25 namespace android {
26 
27 /* Implementation type.  Nothing to see here. */
28 class BasicHashtableImpl {
29 protected:
30     struct Bucket {
31         // The collision flag indicates that the bucket is part of a collision chain
32         // such that at least two entries both hash to this bucket.  When true, we
33         // may need to seek further along the chain to find the entry.
34         static const uint32_t COLLISION = 0x80000000UL;
35 
36         // The present flag indicates that the bucket contains an initialized entry value.
37         static const uint32_t PRESENT   = 0x40000000UL;
38 
39         // Mask for 30 bits worth of the hash code that are stored within the bucket to
40         // speed up lookups and rehashing by eliminating the need to recalculate the
41         // hash code of the entry's key.
42         static const uint32_t HASH_MASK = 0x3fffffffUL;
43 
44         // Combined value that stores the collision and present flags as well as
45         // a 30 bit hash code.
46         uint32_t cookie;
47 
48         // Storage for the entry begins here.
49         char entry[0];
50     };
51 
52     BasicHashtableImpl(size_t entrySize, bool hasTrivialDestructor,
53             size_t minimumInitialCapacity, float loadFactor);
54     BasicHashtableImpl(const BasicHashtableImpl& other);
55     virtual ~BasicHashtableImpl();
56 
57     void dispose();
58 
edit()59     inline void edit() {
60         if (mBuckets && !SharedBuffer::bufferFromData(mBuckets)->onlyOwner()) {
61             clone();
62         }
63     }
64 
65     void setTo(const BasicHashtableImpl& other);
66     void clear();
67 
68     ssize_t next(ssize_t index) const;
69     ssize_t find(ssize_t index, hash_t hash, const void* __restrict__ key) const;
70     size_t add(hash_t hash, const void* __restrict__ entry);
71     void removeAt(size_t index);
72     void rehash(size_t minimumCapacity, float loadFactor);
73 
74     const size_t mBucketSize; // number of bytes per bucket including the entry
75     const bool mHasTrivialDestructor; // true if the entry type does not require destruction
76     size_t mCapacity;         // number of buckets that can be filled before exceeding load factor
77     float mLoadFactor;        // load factor
78     size_t mSize;             // number of elements actually in the table
79     size_t mFilledBuckets;    // number of buckets for which collision or present is true
80     size_t mBucketCount;      // number of slots in the mBuckets array
81     void* mBuckets;           // array of buckets, as a SharedBuffer
82 
bucketAt(const void * __restrict__ buckets,size_t index)83     inline const Bucket& bucketAt(const void* __restrict__ buckets, size_t index) const {
84         return *reinterpret_cast<const Bucket*>(
85                 static_cast<const uint8_t*>(buckets) + index * mBucketSize);
86     }
87 
bucketAt(void * __restrict__ buckets,size_t index)88     inline Bucket& bucketAt(void* __restrict__ buckets, size_t index) const {
89         return *reinterpret_cast<Bucket*>(static_cast<uint8_t*>(buckets) + index * mBucketSize);
90     }
91 
92     virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const = 0;
93     virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const = 0;
94     virtual void destroyBucketEntry(Bucket& bucket) const = 0;
95 
96 private:
97     void clone();
98 
99     // Allocates a bucket array as a SharedBuffer.
100     void* allocateBuckets(size_t count) const;
101 
102     // Releases a bucket array's associated SharedBuffer.
103     void releaseBuckets(void* __restrict__ buckets, size_t count) const;
104 
105     // Destroys the contents of buckets (invokes destroyBucketEntry for each
106     // populated bucket if needed).
107     void destroyBuckets(void* __restrict__ buckets, size_t count) const;
108 
109     // Copies the content of buckets (copies the cookie and invokes copyBucketEntry
110     // for each populated bucket if needed).
111     void copyBuckets(const void* __restrict__ fromBuckets,
112             void* __restrict__ toBuckets, size_t count) const;
113 
114     // Determines the appropriate size of a bucket array to store a certain minimum
115     // number of entries and returns its effective capacity.
116     static void determineCapacity(size_t minimumCapacity, float loadFactor,
117             size_t* __restrict__ outBucketCount, size_t* __restrict__ outCapacity);
118 
119     // Trim a hash code to 30 bits to match what we store in the bucket's cookie.
trimHash(hash_t hash)120     inline static hash_t trimHash(hash_t hash) {
121         return (hash & Bucket::HASH_MASK) ^ (hash >> 30);
122     }
123 
124     // Returns the index of the first bucket that is in the collision chain
125     // for the specified hash code, given the total number of buckets.
126     // (Primary hash)
chainStart(hash_t hash,size_t count)127     inline static size_t chainStart(hash_t hash, size_t count) {
128         return hash % count;
129     }
130 
131     // Returns the increment to add to a bucket index to seek to the next bucket
132     // in the collision chain for the specified hash code, given the total number of buckets.
133     // (Secondary hash)
chainIncrement(hash_t hash,size_t count)134     inline static size_t chainIncrement(hash_t hash, size_t count) {
135         return ((hash >> 7) | (hash << 25)) % (count - 1) + 1;
136     }
137 
138     // Returns the index of the next bucket that is in the collision chain
139     // that is defined by the specified increment, given the total number of buckets.
chainSeek(size_t index,size_t increment,size_t count)140     inline static size_t chainSeek(size_t index, size_t increment, size_t count) {
141         return (index + increment) % count;
142     }
143 };
144 
145 /*
146  * A BasicHashtable stores entries that are indexed by hash code in place
147  * within an array.  The basic operations are finding entries by key,
148  * adding new entries and removing existing entries.
149  *
150  * This class provides a very limited set of operations with simple semantics.
151  * It is intended to be used as a building block to construct more complex
152  * and interesting data structures such as HashMap.  Think very hard before
153  * adding anything extra to BasicHashtable, it probably belongs at a
154  * higher level of abstraction.
155  *
156  * TKey: The key type.
157  * TEntry: The entry type which is what is actually stored in the array.
158  *
159  * TKey must support the following contract:
160  *     bool operator==(const TKey& other) const;  // return true if equal
161  *     bool operator!=(const TKey& other) const;  // return true if unequal
162  *
163  * TEntry must support the following contract:
164  *     const TKey& getKey() const;  // get the key from the entry
165  *
166  * This class supports storing entries with duplicate keys.  Of course, it can't
167  * tell them apart during removal so only the first entry will be removed.
168  * We do this because it means that operations like add() can't fail.
169  */
170 template <typename TKey, typename TEntry>
171 class BasicHashtable : private BasicHashtableImpl {
172 public:
173     /* Creates a hashtable with the specified minimum initial capacity.
174      * The underlying array will be created when the first entry is added.
175      *
176      * minimumInitialCapacity: The minimum initial capacity for the hashtable.
177      *     Default is 0.
178      * loadFactor: The desired load factor for the hashtable, between 0 and 1.
179      *     Default is 0.75.
180      */
181     BasicHashtable(size_t minimumInitialCapacity = 0, float loadFactor = 0.75f);
182 
183     /* Copies a hashtable.
184      * The underlying storage is shared copy-on-write.
185      */
186     BasicHashtable(const BasicHashtable& other);
187 
188     /* Clears and destroys the hashtable.
189      */
190     virtual ~BasicHashtable();
191 
192     /* Making this hashtable a copy of the other hashtable.
193      * The underlying storage is shared copy-on-write.
194      *
195      * other: The hashtable to copy.
196      */
197     inline BasicHashtable<TKey, TEntry>& operator =(const BasicHashtable<TKey, TEntry> & other) {
198         setTo(other);
199         return *this;
200     }
201 
202     /* Returns the number of entries in the hashtable.
203      */
size()204     inline size_t size() const {
205         return mSize;
206     }
207 
208     /* Returns the capacity of the hashtable, which is the number of elements that can
209      * added to the hashtable without requiring it to be grown.
210      */
capacity()211     inline size_t capacity() const {
212         return mCapacity;
213     }
214 
215     /* Returns the number of buckets that the hashtable has, which is the size of its
216      * underlying array.
217      */
bucketCount()218     inline size_t bucketCount() const {
219         return mBucketCount;
220     }
221 
222     /* Returns the load factor of the hashtable. */
loadFactor()223     inline float loadFactor() const {
224         return mLoadFactor;
225     };
226 
227     /* Returns a const reference to the entry at the specified index.
228      *
229      * index:   The index of the entry to retrieve.  Must be a valid index within
230      *          the bounds of the hashtable.
231      */
entryAt(size_t index)232     inline const TEntry& entryAt(size_t index) const {
233         return entryFor(bucketAt(mBuckets, index));
234     }
235 
236     /* Returns a non-const reference to the entry at the specified index.
237      *
238      * index: The index of the entry to edit.  Must be a valid index within
239      *        the bounds of the hashtable.
240      */
editEntryAt(size_t index)241     inline TEntry& editEntryAt(size_t index) {
242         edit();
243         return entryFor(bucketAt(mBuckets, index));
244     }
245 
246     /* Clears the hashtable.
247      * All entries in the hashtable are destroyed immediately.
248      * If you need to do something special with the entries in the hashtable then iterate
249      * over them and do what you need before clearing the hashtable.
250      */
clear()251     inline void clear() {
252         BasicHashtableImpl::clear();
253     }
254 
255     /* Returns the index of the next entry in the hashtable given the index of a previous entry.
256      * If the given index is -1, then returns the index of the first entry in the hashtable,
257      * if there is one, or -1 otherwise.
258      * If the given index is not -1, then returns the index of the next entry in the hashtable,
259      * in strictly increasing order, or -1 if there are none left.
260      *
261      * index:   The index of the previous entry that was iterated, or -1 to begin
262      *          iteration at the beginning of the hashtable.
263      */
next(ssize_t index)264     inline ssize_t next(ssize_t index) const {
265         return BasicHashtableImpl::next(index);
266     }
267 
268     /* Finds the index of an entry with the specified key.
269      * If the given index is -1, then returns the index of the first matching entry,
270      * otherwise returns the index of the next matching entry.
271      * If the hashtable contains multiple entries with keys that match the requested
272      * key, then the sequence of entries returned is arbitrary.
273      * Returns -1 if no entry was found.
274      *
275      * index:   The index of the previous entry with the specified key, or -1 to
276      *          find the first matching entry.
277      * hash:    The hashcode of the key.
278      * key:     The key.
279      */
find(ssize_t index,hash_t hash,const TKey & key)280     inline ssize_t find(ssize_t index, hash_t hash, const TKey& key) const {
281         return BasicHashtableImpl::find(index, hash, &key);
282     }
283 
284     /* Adds the entry to the hashtable.
285      * Returns the index of the newly added entry.
286      * If an entry with the same key already exists, then a duplicate entry is added.
287      * If the entry will not fit, then the hashtable's capacity is increased and
288      * its contents are rehashed.  See rehash().
289      *
290      * hash:    The hashcode of the key.
291      * entry:   The entry to add.
292      */
add(hash_t hash,const TEntry & entry)293     inline size_t add(hash_t hash, const TEntry& entry) {
294         return BasicHashtableImpl::add(hash, &entry);
295     }
296 
297     /* Removes the entry with the specified index from the hashtable.
298      * The entry is destroyed immediately.
299      * The index must be valid.
300      *
301      * The hashtable is not compacted after an item is removed, so it is legal
302      * to continue iterating over the hashtable using next() or find().
303      *
304      * index:   The index of the entry to remove.  Must be a valid index within the
305      *          bounds of the hashtable, and it must refer to an existing entry.
306      */
removeAt(size_t index)307     inline void removeAt(size_t index) {
308         BasicHashtableImpl::removeAt(index);
309     }
310 
311     /* Rehashes the contents of the hashtable.
312      * Grows the hashtable to at least the specified minimum capacity or the
313      * current number of elements, whichever is larger.
314      *
315      * Rehashing causes all entries to be copied and the entry indices may change.
316      * Although the hash codes are cached by the hashtable, rehashing can be an
317      * expensive operation and should be avoided unless the hashtable's size
318      * needs to be changed.
319      *
320      * Rehashing is the only way to change the capacity or load factor of the
321      * hashtable once it has been created.  It can be used to compact the
322      * hashtable by choosing a minimum capacity that is smaller than the current
323      * capacity (such as 0).
324      *
325      * minimumCapacity: The desired minimum capacity after rehashing.
326      * loadFactor: The desired load factor after rehashing.
327      */
rehash(size_t minimumCapacity,float loadFactor)328     inline void rehash(size_t minimumCapacity, float loadFactor) {
329         BasicHashtableImpl::rehash(minimumCapacity, loadFactor);
330     }
331 
332     /* Determines whether there is room to add another entry without rehashing.
333      * When this returns true, a subsequent add() operation is guaranteed to
334      * complete without performing a rehash.
335      */
hasMoreRoom()336     inline bool hasMoreRoom() const {
337         return mCapacity > mFilledBuckets;
338     }
339 
340 protected:
entryFor(const Bucket & bucket)341     static inline const TEntry& entryFor(const Bucket& bucket) {
342         return reinterpret_cast<const TEntry&>(bucket.entry);
343     }
344 
entryFor(Bucket & bucket)345     static inline TEntry& entryFor(Bucket& bucket) {
346         return reinterpret_cast<TEntry&>(bucket.entry);
347     }
348 
349     virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const;
350     virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const;
351     virtual void destroyBucketEntry(Bucket& bucket) const;
352 
353 private:
354     // For dumping the raw contents of a hashtable during testing.
355     friend class BasicHashtableTest;
cookieAt(size_t index)356     inline uint32_t cookieAt(size_t index) const {
357         return bucketAt(mBuckets, index).cookie;
358     }
359 };
360 
361 template <typename TKey, typename TEntry>
BasicHashtable(size_t minimumInitialCapacity,float loadFactor)362 BasicHashtable<TKey, TEntry>::BasicHashtable(size_t minimumInitialCapacity, float loadFactor) :
363         BasicHashtableImpl(sizeof(TEntry), traits<TEntry>::has_trivial_dtor,
364                 minimumInitialCapacity, loadFactor) {
365 }
366 
367 template <typename TKey, typename TEntry>
BasicHashtable(const BasicHashtable<TKey,TEntry> & other)368 BasicHashtable<TKey, TEntry>::BasicHashtable(const BasicHashtable<TKey, TEntry>& other) :
369         BasicHashtableImpl(other) {
370 }
371 
372 template <typename TKey, typename TEntry>
~BasicHashtable()373 BasicHashtable<TKey, TEntry>::~BasicHashtable() {
374     dispose();
375 }
376 
377 template <typename TKey, typename TEntry>
compareBucketKey(const Bucket & bucket,const void * __restrict__ key)378 bool BasicHashtable<TKey, TEntry>::compareBucketKey(const Bucket& bucket,
379         const void* __restrict__ key) const {
380     return entryFor(bucket).getKey() == *static_cast<const TKey*>(key);
381 }
382 
383 template <typename TKey, typename TEntry>
initializeBucketEntry(Bucket & bucket,const void * __restrict__ entry)384 void BasicHashtable<TKey, TEntry>::initializeBucketEntry(Bucket& bucket,
385         const void* __restrict__ entry) const {
386     if (!traits<TEntry>::has_trivial_copy) {
387         new (&entryFor(bucket)) TEntry(*(static_cast<const TEntry*>(entry)));
388     } else {
389         memcpy(&entryFor(bucket), entry, sizeof(TEntry));
390     }
391 }
392 
393 template <typename TKey, typename TEntry>
destroyBucketEntry(Bucket & bucket)394 void BasicHashtable<TKey, TEntry>::destroyBucketEntry(Bucket& bucket) const {
395     if (!traits<TEntry>::has_trivial_dtor) {
396         entryFor(bucket).~TEntry();
397     }
398 }
399 
400 }; // namespace android
401 
402 #endif // ANDROID_BASIC_HASHTABLE_H
403