• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Copyright (C) 2011 The Android Open Source Project
3   *
4   * Licensed under the Apache License, Version 2.0 (the "License");
5   * you may not use this file except in compliance with the License.
6   * You may obtain a copy of the License at
7   *
8   *      http://www.apache.org/licenses/LICENSE-2.0
9   *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  #ifndef ANDROID_BASIC_HASHTABLE_H
18  #define ANDROID_BASIC_HASHTABLE_H
19  
20  #include <stdint.h>
21  #include <sys/types.h>
22  #include <utils/SharedBuffer.h>
23  #include <utils/TypeHelpers.h>
24  
25  namespace android {
26  
27  /* Implementation type.  Nothing to see here. */
28  class BasicHashtableImpl {
29  protected:
30      struct Bucket {
31          // The collision flag indicates that the bucket is part of a collision chain
32          // such that at least two entries both hash to this bucket.  When true, we
33          // may need to seek further along the chain to find the entry.
34          static const uint32_t COLLISION = 0x80000000UL;
35  
36          // The present flag indicates that the bucket contains an initialized entry value.
37          static const uint32_t PRESENT   = 0x40000000UL;
38  
39          // Mask for 30 bits worth of the hash code that are stored within the bucket to
40          // speed up lookups and rehashing by eliminating the need to recalculate the
41          // hash code of the entry's key.
42          static const uint32_t HASH_MASK = 0x3fffffffUL;
43  
44          // Combined value that stores the collision and present flags as well as
45          // a 30 bit hash code.
46          uint32_t cookie;
47  
48          // Storage for the entry begins here.
49          char entry[0];
50      };
51  
52      BasicHashtableImpl(size_t entrySize, bool hasTrivialDestructor,
53              size_t minimumInitialCapacity, float loadFactor);
54      BasicHashtableImpl(const BasicHashtableImpl& other);
55      virtual ~BasicHashtableImpl();
56  
57      void dispose();
58  
edit()59      inline void edit() {
60          if (mBuckets && !SharedBuffer::bufferFromData(mBuckets)->onlyOwner()) {
61              clone();
62          }
63      }
64  
65      void setTo(const BasicHashtableImpl& other);
66      void clear();
67  
68      ssize_t next(ssize_t index) const;
69      ssize_t find(ssize_t index, hash_t hash, const void* __restrict__ key) const;
70      size_t add(hash_t hash, const void* __restrict__ entry);
71      void removeAt(size_t index);
72      void rehash(size_t minimumCapacity, float loadFactor);
73  
74      const size_t mBucketSize; // number of bytes per bucket including the entry
75      const bool mHasTrivialDestructor; // true if the entry type does not require destruction
76      size_t mCapacity;         // number of buckets that can be filled before exceeding load factor
77      float mLoadFactor;        // load factor
78      size_t mSize;             // number of elements actually in the table
79      size_t mFilledBuckets;    // number of buckets for which collision or present is true
80      size_t mBucketCount;      // number of slots in the mBuckets array
81      void* mBuckets;           // array of buckets, as a SharedBuffer
82  
bucketAt(const void * __restrict__ buckets,size_t index)83      inline const Bucket& bucketAt(const void* __restrict__ buckets, size_t index) const {
84          return *reinterpret_cast<const Bucket*>(
85                  static_cast<const uint8_t*>(buckets) + index * mBucketSize);
86      }
87  
bucketAt(void * __restrict__ buckets,size_t index)88      inline Bucket& bucketAt(void* __restrict__ buckets, size_t index) const {
89          return *reinterpret_cast<Bucket*>(static_cast<uint8_t*>(buckets) + index * mBucketSize);
90      }
91  
92      virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const = 0;
93      virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const = 0;
94      virtual void destroyBucketEntry(Bucket& bucket) const = 0;
95  
96  private:
97      void clone();
98  
99      // Allocates a bucket array as a SharedBuffer.
100      void* allocateBuckets(size_t count) const;
101  
102      // Releases a bucket array's associated SharedBuffer.
103      void releaseBuckets(void* __restrict__ buckets, size_t count) const;
104  
105      // Destroys the contents of buckets (invokes destroyBucketEntry for each
106      // populated bucket if needed).
107      void destroyBuckets(void* __restrict__ buckets, size_t count) const;
108  
109      // Copies the content of buckets (copies the cookie and invokes copyBucketEntry
110      // for each populated bucket if needed).
111      void copyBuckets(const void* __restrict__ fromBuckets,
112              void* __restrict__ toBuckets, size_t count) const;
113  
114      // Determines the appropriate size of a bucket array to store a certain minimum
115      // number of entries and returns its effective capacity.
116      static void determineCapacity(size_t minimumCapacity, float loadFactor,
117              size_t* __restrict__ outBucketCount, size_t* __restrict__ outCapacity);
118  
119      // Trim a hash code to 30 bits to match what we store in the bucket's cookie.
trimHash(hash_t hash)120      inline static hash_t trimHash(hash_t hash) {
121          return (hash & Bucket::HASH_MASK) ^ (hash >> 30);
122      }
123  
124      // Returns the index of the first bucket that is in the collision chain
125      // for the specified hash code, given the total number of buckets.
126      // (Primary hash)
chainStart(hash_t hash,size_t count)127      inline static size_t chainStart(hash_t hash, size_t count) {
128          return hash % count;
129      }
130  
131      // Returns the increment to add to a bucket index to seek to the next bucket
132      // in the collision chain for the specified hash code, given the total number of buckets.
133      // (Secondary hash)
chainIncrement(hash_t hash,size_t count)134      inline static size_t chainIncrement(hash_t hash, size_t count) {
135          return ((hash >> 7) | (hash << 25)) % (count - 1) + 1;
136      }
137  
138      // Returns the index of the next bucket that is in the collision chain
139      // that is defined by the specified increment, given the total number of buckets.
chainSeek(size_t index,size_t increment,size_t count)140      inline static size_t chainSeek(size_t index, size_t increment, size_t count) {
141          return (index + increment) % count;
142      }
143  };
144  
145  /*
146   * A BasicHashtable stores entries that are indexed by hash code in place
147   * within an array.  The basic operations are finding entries by key,
148   * adding new entries and removing existing entries.
149   *
150   * This class provides a very limited set of operations with simple semantics.
151   * It is intended to be used as a building block to construct more complex
152   * and interesting data structures such as HashMap.  Think very hard before
153   * adding anything extra to BasicHashtable, it probably belongs at a
154   * higher level of abstraction.
155   *
156   * TKey: The key type.
157   * TEntry: The entry type which is what is actually stored in the array.
158   *
159   * TKey must support the following contract:
160   *     bool operator==(const TKey& other) const;  // return true if equal
161   *     bool operator!=(const TKey& other) const;  // return true if unequal
162   *
163   * TEntry must support the following contract:
164   *     const TKey& getKey() const;  // get the key from the entry
165   *
166   * This class supports storing entries with duplicate keys.  Of course, it can't
167   * tell them apart during removal so only the first entry will be removed.
168   * We do this because it means that operations like add() can't fail.
169   */
170  template <typename TKey, typename TEntry>
171  class BasicHashtable : private BasicHashtableImpl {
172  public:
173      /* Creates a hashtable with the specified minimum initial capacity.
174       * The underlying array will be created when the first entry is added.
175       *
176       * minimumInitialCapacity: The minimum initial capacity for the hashtable.
177       *     Default is 0.
178       * loadFactor: The desired load factor for the hashtable, between 0 and 1.
179       *     Default is 0.75.
180       */
181      BasicHashtable(size_t minimumInitialCapacity = 0, float loadFactor = 0.75f);
182  
183      /* Copies a hashtable.
184       * The underlying storage is shared copy-on-write.
185       */
186      BasicHashtable(const BasicHashtable& other);
187  
188      /* Clears and destroys the hashtable.
189       */
190      virtual ~BasicHashtable();
191  
192      /* Making this hashtable a copy of the other hashtable.
193       * The underlying storage is shared copy-on-write.
194       *
195       * other: The hashtable to copy.
196       */
197      inline BasicHashtable<TKey, TEntry>& operator =(const BasicHashtable<TKey, TEntry> & other) {
198          setTo(other);
199          return *this;
200      }
201  
202      /* Returns the number of entries in the hashtable.
203       */
size()204      inline size_t size() const {
205          return mSize;
206      }
207  
208      /* Returns the capacity of the hashtable, which is the number of elements that can
209       * added to the hashtable without requiring it to be grown.
210       */
capacity()211      inline size_t capacity() const {
212          return mCapacity;
213      }
214  
215      /* Returns the number of buckets that the hashtable has, which is the size of its
216       * underlying array.
217       */
bucketCount()218      inline size_t bucketCount() const {
219          return mBucketCount;
220      }
221  
222      /* Returns the load factor of the hashtable. */
loadFactor()223      inline float loadFactor() const {
224          return mLoadFactor;
225      };
226  
227      /* Returns a const reference to the entry at the specified index.
228       *
229       * index:   The index of the entry to retrieve.  Must be a valid index within
230       *          the bounds of the hashtable.
231       */
entryAt(size_t index)232      inline const TEntry& entryAt(size_t index) const {
233          return entryFor(bucketAt(mBuckets, index));
234      }
235  
236      /* Returns a non-const reference to the entry at the specified index.
237       *
238       * index: The index of the entry to edit.  Must be a valid index within
239       *        the bounds of the hashtable.
240       */
editEntryAt(size_t index)241      inline TEntry& editEntryAt(size_t index) {
242          edit();
243          return entryFor(bucketAt(mBuckets, index));
244      }
245  
246      /* Clears the hashtable.
247       * All entries in the hashtable are destroyed immediately.
248       * If you need to do something special with the entries in the hashtable then iterate
249       * over them and do what you need before clearing the hashtable.
250       */
clear()251      inline void clear() {
252          BasicHashtableImpl::clear();
253      }
254  
255      /* Returns the index of the next entry in the hashtable given the index of a previous entry.
256       * If the given index is -1, then returns the index of the first entry in the hashtable,
257       * if there is one, or -1 otherwise.
258       * If the given index is not -1, then returns the index of the next entry in the hashtable,
259       * in strictly increasing order, or -1 if there are none left.
260       *
261       * index:   The index of the previous entry that was iterated, or -1 to begin
262       *          iteration at the beginning of the hashtable.
263       */
next(ssize_t index)264      inline ssize_t next(ssize_t index) const {
265          return BasicHashtableImpl::next(index);
266      }
267  
268      /* Finds the index of an entry with the specified key.
269       * If the given index is -1, then returns the index of the first matching entry,
270       * otherwise returns the index of the next matching entry.
271       * If the hashtable contains multiple entries with keys that match the requested
272       * key, then the sequence of entries returned is arbitrary.
273       * Returns -1 if no entry was found.
274       *
275       * index:   The index of the previous entry with the specified key, or -1 to
276       *          find the first matching entry.
277       * hash:    The hashcode of the key.
278       * key:     The key.
279       */
find(ssize_t index,hash_t hash,const TKey & key)280      inline ssize_t find(ssize_t index, hash_t hash, const TKey& key) const {
281          return BasicHashtableImpl::find(index, hash, &key);
282      }
283  
284      /* Adds the entry to the hashtable.
285       * Returns the index of the newly added entry.
286       * If an entry with the same key already exists, then a duplicate entry is added.
287       * If the entry will not fit, then the hashtable's capacity is increased and
288       * its contents are rehashed.  See rehash().
289       *
290       * hash:    The hashcode of the key.
291       * entry:   The entry to add.
292       */
add(hash_t hash,const TEntry & entry)293      inline size_t add(hash_t hash, const TEntry& entry) {
294          return BasicHashtableImpl::add(hash, &entry);
295      }
296  
297      /* Removes the entry with the specified index from the hashtable.
298       * The entry is destroyed immediately.
299       * The index must be valid.
300       *
301       * The hashtable is not compacted after an item is removed, so it is legal
302       * to continue iterating over the hashtable using next() or find().
303       *
304       * index:   The index of the entry to remove.  Must be a valid index within the
305       *          bounds of the hashtable, and it must refer to an existing entry.
306       */
removeAt(size_t index)307      inline void removeAt(size_t index) {
308          BasicHashtableImpl::removeAt(index);
309      }
310  
311      /* Rehashes the contents of the hashtable.
312       * Grows the hashtable to at least the specified minimum capacity or the
313       * current number of elements, whichever is larger.
314       *
315       * Rehashing causes all entries to be copied and the entry indices may change.
316       * Although the hash codes are cached by the hashtable, rehashing can be an
317       * expensive operation and should be avoided unless the hashtable's size
318       * needs to be changed.
319       *
320       * Rehashing is the only way to change the capacity or load factor of the
321       * hashtable once it has been created.  It can be used to compact the
322       * hashtable by choosing a minimum capacity that is smaller than the current
323       * capacity (such as 0).
324       *
325       * minimumCapacity: The desired minimum capacity after rehashing.
326       * loadFactor: The desired load factor after rehashing.
327       */
rehash(size_t minimumCapacity,float loadFactor)328      inline void rehash(size_t minimumCapacity, float loadFactor) {
329          BasicHashtableImpl::rehash(minimumCapacity, loadFactor);
330      }
331  
332      /* Determines whether there is room to add another entry without rehashing.
333       * When this returns true, a subsequent add() operation is guaranteed to
334       * complete without performing a rehash.
335       */
hasMoreRoom()336      inline bool hasMoreRoom() const {
337          return mCapacity > mFilledBuckets;
338      }
339  
340  protected:
entryFor(const Bucket & bucket)341      static inline const TEntry& entryFor(const Bucket& bucket) {
342          return reinterpret_cast<const TEntry&>(bucket.entry);
343      }
344  
entryFor(Bucket & bucket)345      static inline TEntry& entryFor(Bucket& bucket) {
346          return reinterpret_cast<TEntry&>(bucket.entry);
347      }
348  
349      virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const;
350      virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const;
351      virtual void destroyBucketEntry(Bucket& bucket) const;
352  
353  private:
354      // For dumping the raw contents of a hashtable during testing.
355      friend class BasicHashtableTest;
cookieAt(size_t index)356      inline uint32_t cookieAt(size_t index) const {
357          return bucketAt(mBuckets, index).cookie;
358      }
359  };
360  
361  template <typename TKey, typename TEntry>
BasicHashtable(size_t minimumInitialCapacity,float loadFactor)362  BasicHashtable<TKey, TEntry>::BasicHashtable(size_t minimumInitialCapacity, float loadFactor) :
363          BasicHashtableImpl(sizeof(TEntry), traits<TEntry>::has_trivial_dtor,
364                  minimumInitialCapacity, loadFactor) {
365  }
366  
367  template <typename TKey, typename TEntry>
BasicHashtable(const BasicHashtable<TKey,TEntry> & other)368  BasicHashtable<TKey, TEntry>::BasicHashtable(const BasicHashtable<TKey, TEntry>& other) :
369          BasicHashtableImpl(other) {
370  }
371  
372  template <typename TKey, typename TEntry>
~BasicHashtable()373  BasicHashtable<TKey, TEntry>::~BasicHashtable() {
374      dispose();
375  }
376  
377  template <typename TKey, typename TEntry>
compareBucketKey(const Bucket & bucket,const void * __restrict__ key)378  bool BasicHashtable<TKey, TEntry>::compareBucketKey(const Bucket& bucket,
379          const void* __restrict__ key) const {
380      return entryFor(bucket).getKey() == *static_cast<const TKey*>(key);
381  }
382  
383  template <typename TKey, typename TEntry>
initializeBucketEntry(Bucket & bucket,const void * __restrict__ entry)384  void BasicHashtable<TKey, TEntry>::initializeBucketEntry(Bucket& bucket,
385          const void* __restrict__ entry) const {
386      if (!traits<TEntry>::has_trivial_copy) {
387          new (&entryFor(bucket)) TEntry(*(static_cast<const TEntry*>(entry)));
388      } else {
389          memcpy(&entryFor(bucket), entry, sizeof(TEntry));
390      }
391  }
392  
393  template <typename TKey, typename TEntry>
destroyBucketEntry(Bucket & bucket)394  void BasicHashtable<TKey, TEntry>::destroyBucketEntry(Bucket& bucket) const {
395      if (!traits<TEntry>::has_trivial_dtor) {
396          entryFor(bucket).~TEntry();
397      }
398  }
399  
400  }; // namespace android
401  
402  #endif // ANDROID_BASIC_HASHTABLE_H
403