• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* The implementation of the hash table (_Py_hashtable_t) is based on the
2    cfuhash project:
3    http://sourceforge.net/projects/libcfu/
4 
5    Copyright of cfuhash:
6    ----------------------------------
7    Creation date: 2005-06-24 21:22:40
8    Authors: Don
9    Change log:
10 
11    Copyright (c) 2005 Don Owens
12    All rights reserved.
13 
14    This code is released under the BSD license:
15 
16    Redistribution and use in source and binary forms, with or without
17    modification, are permitted provided that the following conditions
18    are met:
19 
20      * Redistributions of source code must retain the above copyright
21        notice, this list of conditions and the following disclaimer.
22 
23      * Redistributions in binary form must reproduce the above
24        copyright notice, this list of conditions and the following
25        disclaimer in the documentation and/or other materials provided
26        with the distribution.
27 
28      * Neither the name of the author nor the names of its
29        contributors may be used to endorse or promote products derived
30        from this software without specific prior written permission.
31 
32    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43    OF THE POSSIBILITY OF SUCH DAMAGE.
44    ----------------------------------
45 */
46 
47 #include "Python.h"
48 #include "pycore_hashtable.h"
49 
50 #define HASHTABLE_MIN_SIZE 16
51 #define HASHTABLE_HIGH 0.50
52 #define HASHTABLE_LOW 0.10
53 #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
54 
55 #define BUCKETS_HEAD(SLIST) \
56         ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
57 #define TABLE_HEAD(HT, BUCKET) \
58         ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
59 #define ENTRY_NEXT(ENTRY) \
60         ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
61 
62 /* Forward declaration */
63 static int hashtable_rehash(_Py_hashtable_t *ht);
64 
65 static void
_Py_slist_init(_Py_slist_t * list)66 _Py_slist_init(_Py_slist_t *list)
67 {
68     list->head = NULL;
69 }
70 
71 
72 static void
_Py_slist_prepend(_Py_slist_t * list,_Py_slist_item_t * item)73 _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
74 {
75     item->next = list->head;
76     list->head = item;
77 }
78 
79 
80 static void
_Py_slist_remove(_Py_slist_t * list,_Py_slist_item_t * previous,_Py_slist_item_t * item)81 _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
82                  _Py_slist_item_t *item)
83 {
84     if (previous != NULL)
85         previous->next = item->next;
86     else
87         list->head = item->next;
88 }
89 
90 
91 Py_uhash_t
_Py_hashtable_hash_ptr(const void * key)92 _Py_hashtable_hash_ptr(const void *key)
93 {
94     return (Py_uhash_t)_Py_HashPointerRaw(key);
95 }
96 
97 
98 int
_Py_hashtable_compare_direct(const void * key1,const void * key2)99 _Py_hashtable_compare_direct(const void *key1, const void *key2)
100 {
101     return (key1 == key2);
102 }
103 
104 
105 /* makes sure the real size of the buckets array is a power of 2 */
106 static size_t
round_size(size_t s)107 round_size(size_t s)
108 {
109     size_t i;
110     if (s < HASHTABLE_MIN_SIZE)
111         return HASHTABLE_MIN_SIZE;
112     i = 1;
113     while (i < s)
114         i <<= 1;
115     return i;
116 }
117 
118 
119 size_t
_Py_hashtable_size(const _Py_hashtable_t * ht)120 _Py_hashtable_size(const _Py_hashtable_t *ht)
121 {
122     size_t size = sizeof(_Py_hashtable_t);
123     /* buckets */
124     size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *);
125     /* entries */
126     size += ht->nentries * sizeof(_Py_hashtable_entry_t);
127     return size;
128 }
129 
130 
131 _Py_hashtable_entry_t *
_Py_hashtable_get_entry_generic(_Py_hashtable_t * ht,const void * key)132 _Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
133 {
134     Py_uhash_t key_hash = ht->hash_func(key);
135     size_t index = key_hash & (ht->nbuckets - 1);
136     _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
137     while (1) {
138         if (entry == NULL) {
139             return NULL;
140         }
141         if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
142             break;
143         }
144         entry = ENTRY_NEXT(entry);
145     }
146     return entry;
147 }
148 
149 
150 // Specialized for:
151 // hash_func == _Py_hashtable_hash_ptr
152 // compare_func == _Py_hashtable_compare_direct
153 static _Py_hashtable_entry_t *
_Py_hashtable_get_entry_ptr(_Py_hashtable_t * ht,const void * key)154 _Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key)
155 {
156     Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key);
157     size_t index = key_hash & (ht->nbuckets - 1);
158     _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
159     while (1) {
160         if (entry == NULL) {
161             return NULL;
162         }
163         // Compare directly keys (ignore entry->key_hash)
164         if (entry->key == key) {
165             break;
166         }
167         entry = ENTRY_NEXT(entry);
168     }
169     return entry;
170 }
171 
172 
173 void*
_Py_hashtable_steal(_Py_hashtable_t * ht,const void * key)174 _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
175 {
176     Py_uhash_t key_hash = ht->hash_func(key);
177     size_t index = key_hash & (ht->nbuckets - 1);
178 
179     _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
180     _Py_hashtable_entry_t *previous = NULL;
181     while (1) {
182         if (entry == NULL) {
183             // not found
184             return NULL;
185         }
186         if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
187             break;
188         }
189         previous = entry;
190         entry = ENTRY_NEXT(entry);
191     }
192 
193     _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
194                      (_Py_slist_item_t *)entry);
195     ht->nentries--;
196 
197     void *value = entry->value;
198     ht->alloc.free(entry);
199 
200     if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {
201         // Ignore failure: error cannot be reported to the caller
202         hashtable_rehash(ht);
203     }
204     return value;
205 }
206 
207 
208 int
_Py_hashtable_set(_Py_hashtable_t * ht,const void * key,void * value)209 _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
210 {
211     _Py_hashtable_entry_t *entry;
212 
213 #ifndef NDEBUG
214     /* Don't write the assertion on a single line because it is interesting
215        to know the duplicated entry if the assertion failed. The entry can
216        be read using a debugger. */
217     entry = ht->get_entry_func(ht, key);
218     assert(entry == NULL);
219 #endif
220 
221 
222     entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t));
223     if (entry == NULL) {
224         /* memory allocation failed */
225         return -1;
226     }
227 
228     entry->key_hash = ht->hash_func(key);
229     entry->key = (void *)key;
230     entry->value = value;
231 
232     ht->nentries++;
233     if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {
234         if (hashtable_rehash(ht) < 0) {
235             ht->nentries--;
236             ht->alloc.free(entry);
237             return -1;
238         }
239     }
240 
241     size_t index = entry->key_hash & (ht->nbuckets - 1);
242     _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
243     return 0;
244 }
245 
246 
247 void*
_Py_hashtable_get(_Py_hashtable_t * ht,const void * key)248 _Py_hashtable_get(_Py_hashtable_t *ht, const void *key)
249 {
250     _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, key);
251     if (entry != NULL) {
252         return entry->value;
253     }
254     else {
255         return NULL;
256     }
257 }
258 
259 
260 int
_Py_hashtable_foreach(_Py_hashtable_t * ht,_Py_hashtable_foreach_func func,void * user_data)261 _Py_hashtable_foreach(_Py_hashtable_t *ht,
262                       _Py_hashtable_foreach_func func,
263                       void *user_data)
264 {
265     for (size_t hv = 0; hv < ht->nbuckets; hv++) {
266         _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv);
267         while (entry != NULL) {
268             int res = func(ht, entry->key, entry->value, user_data);
269             if (res) {
270                 return res;
271             }
272             entry = ENTRY_NEXT(entry);
273         }
274     }
275     return 0;
276 }
277 
278 
279 static int
hashtable_rehash(_Py_hashtable_t * ht)280 hashtable_rehash(_Py_hashtable_t *ht)
281 {
282     size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR));
283     if (new_size == ht->nbuckets) {
284         return 0;
285     }
286 
287     size_t buckets_size = new_size * sizeof(ht->buckets[0]);
288     _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size);
289     if (new_buckets == NULL) {
290         /* memory allocation failed */
291         return -1;
292     }
293     memset(new_buckets, 0, buckets_size);
294 
295     for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) {
296         _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]);
297         while (entry != NULL) {
298             assert(ht->hash_func(entry->key) == entry->key_hash);
299             _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
300             size_t entry_index = entry->key_hash & (new_size - 1);
301 
302             _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry);
303 
304             entry = next;
305         }
306     }
307 
308     ht->alloc.free(ht->buckets);
309     ht->nbuckets = new_size;
310     ht->buckets = new_buckets;
311     return 0;
312 }
313 
314 
315 _Py_hashtable_t *
_Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,_Py_hashtable_compare_func compare_func,_Py_hashtable_destroy_func key_destroy_func,_Py_hashtable_destroy_func value_destroy_func,_Py_hashtable_allocator_t * allocator)316 _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
317                        _Py_hashtable_compare_func compare_func,
318                        _Py_hashtable_destroy_func key_destroy_func,
319                        _Py_hashtable_destroy_func value_destroy_func,
320                        _Py_hashtable_allocator_t *allocator)
321 {
322     _Py_hashtable_allocator_t alloc;
323     if (allocator == NULL) {
324         alloc.malloc = PyMem_Malloc;
325         alloc.free = PyMem_Free;
326     }
327     else {
328         alloc = *allocator;
329     }
330 
331     _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
332     if (ht == NULL) {
333         return ht;
334     }
335 
336     ht->nbuckets = HASHTABLE_MIN_SIZE;
337     ht->nentries = 0;
338 
339     size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]);
340     ht->buckets = alloc.malloc(buckets_size);
341     if (ht->buckets == NULL) {
342         alloc.free(ht);
343         return NULL;
344     }
345     memset(ht->buckets, 0, buckets_size);
346 
347     ht->get_entry_func = _Py_hashtable_get_entry_generic;
348     ht->hash_func = hash_func;
349     ht->compare_func = compare_func;
350     ht->key_destroy_func = key_destroy_func;
351     ht->value_destroy_func = value_destroy_func;
352     ht->alloc = alloc;
353     if (ht->hash_func == _Py_hashtable_hash_ptr
354         && ht->compare_func == _Py_hashtable_compare_direct)
355     {
356         ht->get_entry_func = _Py_hashtable_get_entry_ptr;
357     }
358     return ht;
359 }
360 
361 
362 _Py_hashtable_t *
_Py_hashtable_new(_Py_hashtable_hash_func hash_func,_Py_hashtable_compare_func compare_func)363 _Py_hashtable_new(_Py_hashtable_hash_func hash_func,
364                   _Py_hashtable_compare_func compare_func)
365 {
366     return _Py_hashtable_new_full(hash_func, compare_func,
367                                   NULL, NULL, NULL);
368 }
369 
370 
371 static void
_Py_hashtable_destroy_entry(_Py_hashtable_t * ht,_Py_hashtable_entry_t * entry)372 _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry)
373 {
374     if (ht->key_destroy_func) {
375         ht->key_destroy_func(entry->key);
376     }
377     if (ht->value_destroy_func) {
378         ht->value_destroy_func(entry->value);
379     }
380     ht->alloc.free(entry);
381 }
382 
383 
384 void
_Py_hashtable_clear(_Py_hashtable_t * ht)385 _Py_hashtable_clear(_Py_hashtable_t *ht)
386 {
387     for (size_t i=0; i < ht->nbuckets; i++) {
388         _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
389         while (entry != NULL) {
390             _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
391             _Py_hashtable_destroy_entry(ht, entry);
392             entry = next;
393         }
394         _Py_slist_init(&ht->buckets[i]);
395     }
396     ht->nentries = 0;
397     // Ignore failure: clear function is not expected to fail
398     // because of a memory allocation failure.
399     (void)hashtable_rehash(ht);
400 }
401 
402 
403 void
_Py_hashtable_destroy(_Py_hashtable_t * ht)404 _Py_hashtable_destroy(_Py_hashtable_t *ht)
405 {
406     for (size_t i = 0; i < ht->nbuckets; i++) {
407         _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
408         while (entry) {
409             _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry);
410             _Py_hashtable_destroy_entry(ht, entry);
411             entry = entry_next;
412         }
413     }
414 
415     ht->alloc.free(ht->buckets);
416     ht->alloc.free(ht);
417 }
418