• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* The implementation of the hash table (_Py_hashtable_t) is based on the
2    cfuhash project:
3    http://sourceforge.net/projects/libcfu/
4 
5    Copyright of cfuhash:
6    ----------------------------------
7    Creation date: 2005-06-24 21:22:40
8    Authors: Don
9    Change log:
10 
11    Copyright (c) 2005 Don Owens
12    All rights reserved.
13 
14    This code is released under the BSD license:
15 
16    Redistribution and use in source and binary forms, with or without
17    modification, are permitted provided that the following conditions
18    are met:
19 
20      * Redistributions of source code must retain the above copyright
21        notice, this list of conditions and the following disclaimer.
22 
23      * Redistributions in binary form must reproduce the above
24        copyright notice, this list of conditions and the following
25        disclaimer in the documentation and/or other materials provided
26        with the distribution.
27 
28      * Neither the name of the author nor the names of its
29        contributors may be used to endorse or promote products derived
30        from this software without specific prior written permission.
31 
32    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43    OF THE POSSIBILITY OF SUCH DAMAGE.
44    ----------------------------------
45 */
46 
47 #include "Python.h"
48 #include "pycore_hashtable.h"     // export _Py_hashtable_new()
49 #include "pycore_pyhash.h"        // _Py_HashPointerRaw()
50 
51 #define HASHTABLE_MIN_SIZE 16
52 #define HASHTABLE_HIGH 0.50
53 #define HASHTABLE_LOW 0.10
54 #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
55 
56 #define BUCKETS_HEAD(SLIST) \
57         ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
58 #define TABLE_HEAD(HT, BUCKET) \
59         ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
60 #define ENTRY_NEXT(ENTRY) \
61         ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
62 
63 /* Forward declaration */
64 static int hashtable_rehash(_Py_hashtable_t *ht);
65 
66 static void
_Py_slist_init(_Py_slist_t * list)67 _Py_slist_init(_Py_slist_t *list)
68 {
69     list->head = NULL;
70 }
71 
72 
73 static void
_Py_slist_prepend(_Py_slist_t * list,_Py_slist_item_t * item)74 _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
75 {
76     item->next = list->head;
77     list->head = item;
78 }
79 
80 
81 static void
_Py_slist_remove(_Py_slist_t * list,_Py_slist_item_t * previous,_Py_slist_item_t * item)82 _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
83                  _Py_slist_item_t *item)
84 {
85     if (previous != NULL)
86         previous->next = item->next;
87     else
88         list->head = item->next;
89 }
90 
91 
92 Py_uhash_t
_Py_hashtable_hash_ptr(const void * key)93 _Py_hashtable_hash_ptr(const void *key)
94 {
95     return (Py_uhash_t)_Py_HashPointerRaw(key);
96 }
97 
98 
99 int
_Py_hashtable_compare_direct(const void * key1,const void * key2)100 _Py_hashtable_compare_direct(const void *key1, const void *key2)
101 {
102     return (key1 == key2);
103 }
104 
105 
106 /* makes sure the real size of the buckets array is a power of 2 */
107 static size_t
round_size(size_t s)108 round_size(size_t s)
109 {
110     size_t i;
111     if (s < HASHTABLE_MIN_SIZE)
112         return HASHTABLE_MIN_SIZE;
113     i = 1;
114     while (i < s)
115         i <<= 1;
116     return i;
117 }
118 
119 
120 size_t
_Py_hashtable_size(const _Py_hashtable_t * ht)121 _Py_hashtable_size(const _Py_hashtable_t *ht)
122 {
123     size_t size = sizeof(_Py_hashtable_t);
124     /* buckets */
125     size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *);
126     /* entries */
127     size += ht->nentries * sizeof(_Py_hashtable_entry_t);
128     return size;
129 }
130 
131 
132 size_t
_Py_hashtable_len(const _Py_hashtable_t * ht)133 _Py_hashtable_len(const _Py_hashtable_t *ht)
134 {
135     return ht->nentries;
136 }
137 
138 
139 _Py_hashtable_entry_t *
_Py_hashtable_get_entry_generic(_Py_hashtable_t * ht,const void * key)140 _Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
141 {
142     Py_uhash_t key_hash = ht->hash_func(key);
143     size_t index = key_hash & (ht->nbuckets - 1);
144     _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
145     while (1) {
146         if (entry == NULL) {
147             return NULL;
148         }
149         if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
150             break;
151         }
152         entry = ENTRY_NEXT(entry);
153     }
154     return entry;
155 }
156 
157 
158 // Specialized for:
159 // hash_func == _Py_hashtable_hash_ptr
160 // compare_func == _Py_hashtable_compare_direct
161 static _Py_hashtable_entry_t *
_Py_hashtable_get_entry_ptr(_Py_hashtable_t * ht,const void * key)162 _Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key)
163 {
164     Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key);
165     size_t index = key_hash & (ht->nbuckets - 1);
166     _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
167     while (1) {
168         if (entry == NULL) {
169             return NULL;
170         }
171         // Compare directly keys (ignore entry->key_hash)
172         if (entry->key == key) {
173             break;
174         }
175         entry = ENTRY_NEXT(entry);
176     }
177     return entry;
178 }
179 
180 
181 void*
_Py_hashtable_steal(_Py_hashtable_t * ht,const void * key)182 _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
183 {
184     Py_uhash_t key_hash = ht->hash_func(key);
185     size_t index = key_hash & (ht->nbuckets - 1);
186 
187     _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
188     _Py_hashtable_entry_t *previous = NULL;
189     while (1) {
190         if (entry == NULL) {
191             // not found
192             return NULL;
193         }
194         if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
195             break;
196         }
197         previous = entry;
198         entry = ENTRY_NEXT(entry);
199     }
200 
201     _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
202                      (_Py_slist_item_t *)entry);
203     ht->nentries--;
204 
205     void *value = entry->value;
206     ht->alloc.free(entry);
207 
208     if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {
209         // Ignore failure: error cannot be reported to the caller
210         hashtable_rehash(ht);
211     }
212     return value;
213 }
214 
215 
216 int
_Py_hashtable_set(_Py_hashtable_t * ht,const void * key,void * value)217 _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
218 {
219     _Py_hashtable_entry_t *entry;
220 
221 #ifndef NDEBUG
222     /* Don't write the assertion on a single line because it is interesting
223        to know the duplicated entry if the assertion failed. The entry can
224        be read using a debugger. */
225     entry = ht->get_entry_func(ht, key);
226     assert(entry == NULL);
227 #endif
228 
229     entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t));
230     if (entry == NULL) {
231         /* memory allocation failed */
232         return -1;
233     }
234 
235     entry->key_hash = ht->hash_func(key);
236     entry->key = (void *)key;
237     entry->value = value;
238 
239     ht->nentries++;
240     if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {
241         if (hashtable_rehash(ht) < 0) {
242             ht->nentries--;
243             ht->alloc.free(entry);
244             return -1;
245         }
246     }
247 
248     size_t index = entry->key_hash & (ht->nbuckets - 1);
249     _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
250     return 0;
251 }
252 
253 
254 void*
_Py_hashtable_get(_Py_hashtable_t * ht,const void * key)255 _Py_hashtable_get(_Py_hashtable_t *ht, const void *key)
256 {
257     _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, key);
258     if (entry != NULL) {
259         return entry->value;
260     }
261     else {
262         return NULL;
263     }
264 }
265 
266 
267 int
_Py_hashtable_foreach(_Py_hashtable_t * ht,_Py_hashtable_foreach_func func,void * user_data)268 _Py_hashtable_foreach(_Py_hashtable_t *ht,
269                       _Py_hashtable_foreach_func func,
270                       void *user_data)
271 {
272     for (size_t hv = 0; hv < ht->nbuckets; hv++) {
273         _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv);
274         while (entry != NULL) {
275             int res = func(ht, entry->key, entry->value, user_data);
276             if (res) {
277                 return res;
278             }
279             entry = ENTRY_NEXT(entry);
280         }
281     }
282     return 0;
283 }
284 
285 
286 static int
hashtable_rehash(_Py_hashtable_t * ht)287 hashtable_rehash(_Py_hashtable_t *ht)
288 {
289     size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR));
290     if (new_size == ht->nbuckets) {
291         return 0;
292     }
293 
294     size_t buckets_size = new_size * sizeof(ht->buckets[0]);
295     _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size);
296     if (new_buckets == NULL) {
297         /* memory allocation failed */
298         return -1;
299     }
300     memset(new_buckets, 0, buckets_size);
301 
302     for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) {
303         _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]);
304         while (entry != NULL) {
305             assert(ht->hash_func(entry->key) == entry->key_hash);
306             _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
307             size_t entry_index = entry->key_hash & (new_size - 1);
308 
309             _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry);
310 
311             entry = next;
312         }
313     }
314 
315     ht->alloc.free(ht->buckets);
316     ht->nbuckets = new_size;
317     ht->buckets = new_buckets;
318     return 0;
319 }
320 
321 
322 _Py_hashtable_t *
_Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,_Py_hashtable_compare_func compare_func,_Py_hashtable_destroy_func key_destroy_func,_Py_hashtable_destroy_func value_destroy_func,_Py_hashtable_allocator_t * allocator)323 _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
324                        _Py_hashtable_compare_func compare_func,
325                        _Py_hashtable_destroy_func key_destroy_func,
326                        _Py_hashtable_destroy_func value_destroy_func,
327                        _Py_hashtable_allocator_t *allocator)
328 {
329     _Py_hashtable_allocator_t alloc;
330     if (allocator == NULL) {
331         alloc.malloc = PyMem_Malloc;
332         alloc.free = PyMem_Free;
333     }
334     else {
335         alloc = *allocator;
336     }
337 
338     _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
339     if (ht == NULL) {
340         return ht;
341     }
342 
343     ht->nbuckets = HASHTABLE_MIN_SIZE;
344     ht->nentries = 0;
345 
346     size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]);
347     ht->buckets = alloc.malloc(buckets_size);
348     if (ht->buckets == NULL) {
349         alloc.free(ht);
350         return NULL;
351     }
352     memset(ht->buckets, 0, buckets_size);
353 
354     ht->get_entry_func = _Py_hashtable_get_entry_generic;
355     ht->hash_func = hash_func;
356     ht->compare_func = compare_func;
357     ht->key_destroy_func = key_destroy_func;
358     ht->value_destroy_func = value_destroy_func;
359     ht->alloc = alloc;
360     if (ht->hash_func == _Py_hashtable_hash_ptr
361         && ht->compare_func == _Py_hashtable_compare_direct)
362     {
363         ht->get_entry_func = _Py_hashtable_get_entry_ptr;
364     }
365     return ht;
366 }
367 
368 
369 _Py_hashtable_t *
_Py_hashtable_new(_Py_hashtable_hash_func hash_func,_Py_hashtable_compare_func compare_func)370 _Py_hashtable_new(_Py_hashtable_hash_func hash_func,
371                   _Py_hashtable_compare_func compare_func)
372 {
373     return _Py_hashtable_new_full(hash_func, compare_func,
374                                   NULL, NULL, NULL);
375 }
376 
377 
378 static void
_Py_hashtable_destroy_entry(_Py_hashtable_t * ht,_Py_hashtable_entry_t * entry)379 _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry)
380 {
381     if (ht->key_destroy_func) {
382         ht->key_destroy_func(entry->key);
383     }
384     if (ht->value_destroy_func) {
385         ht->value_destroy_func(entry->value);
386     }
387     ht->alloc.free(entry);
388 }
389 
390 
391 void
_Py_hashtable_clear(_Py_hashtable_t * ht)392 _Py_hashtable_clear(_Py_hashtable_t *ht)
393 {
394     for (size_t i=0; i < ht->nbuckets; i++) {
395         _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
396         while (entry != NULL) {
397             _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
398             _Py_hashtable_destroy_entry(ht, entry);
399             entry = next;
400         }
401         _Py_slist_init(&ht->buckets[i]);
402     }
403     ht->nentries = 0;
404     // Ignore failure: clear function is not expected to fail
405     // because of a memory allocation failure.
406     (void)hashtable_rehash(ht);
407 }
408 
409 
410 void
_Py_hashtable_destroy(_Py_hashtable_t * ht)411 _Py_hashtable_destroy(_Py_hashtable_t *ht)
412 {
413     for (size_t i = 0; i < ht->nbuckets; i++) {
414         _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
415         while (entry) {
416             _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry);
417             _Py_hashtable_destroy_entry(ht, entry);
418             entry = entry_next;
419         }
420     }
421 
422     ht->alloc.free(ht->buckets);
423     ht->alloc.free(ht);
424 }
425