1 /* The implementation of the hash table (_Py_hashtable_t) is based on the
2 cfuhash project:
3 http://sourceforge.net/projects/libcfu/
4
5 Copyright of cfuhash:
6 ----------------------------------
7 Creation date: 2005-06-24 21:22:40
8 Authors: Don
9 Change log:
10
11 Copyright (c) 2005 Don Owens
12 All rights reserved.
13
14 This code is released under the BSD license:
15
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions
18 are met:
19
20 * Redistributions of source code must retain the above copyright
21 notice, this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above
24 copyright notice, this list of conditions and the following
25 disclaimer in the documentation and/or other materials provided
26 with the distribution.
27
28 * Neither the name of the author nor the names of its
29 contributors may be used to endorse or promote products derived
30 from this software without specific prior written permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 OF THE POSSIBILITY OF SUCH DAMAGE.
44 ----------------------------------
45 */
46
47 #include "Python.h"
48 #include "pycore_hashtable.h"
49
50 #define HASHTABLE_MIN_SIZE 16
51 #define HASHTABLE_HIGH 0.50
52 #define HASHTABLE_LOW 0.10
53 #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
54
55 #define BUCKETS_HEAD(SLIST) \
56 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
57 #define TABLE_HEAD(HT, BUCKET) \
58 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
59 #define ENTRY_NEXT(ENTRY) \
60 ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
61
62 /* Forward declaration */
63 static int hashtable_rehash(_Py_hashtable_t *ht);
64
65 static void
_Py_slist_init(_Py_slist_t * list)66 _Py_slist_init(_Py_slist_t *list)
67 {
68 list->head = NULL;
69 }
70
71
72 static void
_Py_slist_prepend(_Py_slist_t * list,_Py_slist_item_t * item)73 _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
74 {
75 item->next = list->head;
76 list->head = item;
77 }
78
79
80 static void
_Py_slist_remove(_Py_slist_t * list,_Py_slist_item_t * previous,_Py_slist_item_t * item)81 _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
82 _Py_slist_item_t *item)
83 {
84 if (previous != NULL)
85 previous->next = item->next;
86 else
87 list->head = item->next;
88 }
89
90
91 Py_uhash_t
_Py_hashtable_hash_ptr(const void * key)92 _Py_hashtable_hash_ptr(const void *key)
93 {
94 return (Py_uhash_t)_Py_HashPointerRaw(key);
95 }
96
97
98 int
_Py_hashtable_compare_direct(const void * key1,const void * key2)99 _Py_hashtable_compare_direct(const void *key1, const void *key2)
100 {
101 return (key1 == key2);
102 }
103
104
105 /* makes sure the real size of the buckets array is a power of 2 */
106 static size_t
round_size(size_t s)107 round_size(size_t s)
108 {
109 size_t i;
110 if (s < HASHTABLE_MIN_SIZE)
111 return HASHTABLE_MIN_SIZE;
112 i = 1;
113 while (i < s)
114 i <<= 1;
115 return i;
116 }
117
118
119 size_t
_Py_hashtable_size(const _Py_hashtable_t * ht)120 _Py_hashtable_size(const _Py_hashtable_t *ht)
121 {
122 size_t size = sizeof(_Py_hashtable_t);
123 /* buckets */
124 size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *);
125 /* entries */
126 size += ht->nentries * sizeof(_Py_hashtable_entry_t);
127 return size;
128 }
129
130
131 _Py_hashtable_entry_t *
_Py_hashtable_get_entry_generic(_Py_hashtable_t * ht,const void * key)132 _Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
133 {
134 Py_uhash_t key_hash = ht->hash_func(key);
135 size_t index = key_hash & (ht->nbuckets - 1);
136 _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
137 while (1) {
138 if (entry == NULL) {
139 return NULL;
140 }
141 if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
142 break;
143 }
144 entry = ENTRY_NEXT(entry);
145 }
146 return entry;
147 }
148
149
150 // Specialized for:
151 // hash_func == _Py_hashtable_hash_ptr
152 // compare_func == _Py_hashtable_compare_direct
153 static _Py_hashtable_entry_t *
_Py_hashtable_get_entry_ptr(_Py_hashtable_t * ht,const void * key)154 _Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key)
155 {
156 Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key);
157 size_t index = key_hash & (ht->nbuckets - 1);
158 _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
159 while (1) {
160 if (entry == NULL) {
161 return NULL;
162 }
163 // Compare directly keys (ignore entry->key_hash)
164 if (entry->key == key) {
165 break;
166 }
167 entry = ENTRY_NEXT(entry);
168 }
169 return entry;
170 }
171
172
173 void*
_Py_hashtable_steal(_Py_hashtable_t * ht,const void * key)174 _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
175 {
176 Py_uhash_t key_hash = ht->hash_func(key);
177 size_t index = key_hash & (ht->nbuckets - 1);
178
179 _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
180 _Py_hashtable_entry_t *previous = NULL;
181 while (1) {
182 if (entry == NULL) {
183 // not found
184 return NULL;
185 }
186 if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
187 break;
188 }
189 previous = entry;
190 entry = ENTRY_NEXT(entry);
191 }
192
193 _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
194 (_Py_slist_item_t *)entry);
195 ht->nentries--;
196
197 void *value = entry->value;
198 ht->alloc.free(entry);
199
200 if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {
201 // Ignore failure: error cannot be reported to the caller
202 hashtable_rehash(ht);
203 }
204 return value;
205 }
206
207
208 int
_Py_hashtable_set(_Py_hashtable_t * ht,const void * key,void * value)209 _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
210 {
211 _Py_hashtable_entry_t *entry;
212
213 #ifndef NDEBUG
214 /* Don't write the assertion on a single line because it is interesting
215 to know the duplicated entry if the assertion failed. The entry can
216 be read using a debugger. */
217 entry = ht->get_entry_func(ht, key);
218 assert(entry == NULL);
219 #endif
220
221
222 entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t));
223 if (entry == NULL) {
224 /* memory allocation failed */
225 return -1;
226 }
227
228 entry->key_hash = ht->hash_func(key);
229 entry->key = (void *)key;
230 entry->value = value;
231
232 ht->nentries++;
233 if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {
234 if (hashtable_rehash(ht) < 0) {
235 ht->nentries--;
236 ht->alloc.free(entry);
237 return -1;
238 }
239 }
240
241 size_t index = entry->key_hash & (ht->nbuckets - 1);
242 _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
243 return 0;
244 }
245
246
247 void*
_Py_hashtable_get(_Py_hashtable_t * ht,const void * key)248 _Py_hashtable_get(_Py_hashtable_t *ht, const void *key)
249 {
250 _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, key);
251 if (entry != NULL) {
252 return entry->value;
253 }
254 else {
255 return NULL;
256 }
257 }
258
259
260 int
_Py_hashtable_foreach(_Py_hashtable_t * ht,_Py_hashtable_foreach_func func,void * user_data)261 _Py_hashtable_foreach(_Py_hashtable_t *ht,
262 _Py_hashtable_foreach_func func,
263 void *user_data)
264 {
265 for (size_t hv = 0; hv < ht->nbuckets; hv++) {
266 _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv);
267 while (entry != NULL) {
268 int res = func(ht, entry->key, entry->value, user_data);
269 if (res) {
270 return res;
271 }
272 entry = ENTRY_NEXT(entry);
273 }
274 }
275 return 0;
276 }
277
278
279 static int
hashtable_rehash(_Py_hashtable_t * ht)280 hashtable_rehash(_Py_hashtable_t *ht)
281 {
282 size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR));
283 if (new_size == ht->nbuckets) {
284 return 0;
285 }
286
287 size_t buckets_size = new_size * sizeof(ht->buckets[0]);
288 _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size);
289 if (new_buckets == NULL) {
290 /* memory allocation failed */
291 return -1;
292 }
293 memset(new_buckets, 0, buckets_size);
294
295 for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) {
296 _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]);
297 while (entry != NULL) {
298 assert(ht->hash_func(entry->key) == entry->key_hash);
299 _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
300 size_t entry_index = entry->key_hash & (new_size - 1);
301
302 _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry);
303
304 entry = next;
305 }
306 }
307
308 ht->alloc.free(ht->buckets);
309 ht->nbuckets = new_size;
310 ht->buckets = new_buckets;
311 return 0;
312 }
313
314
315 _Py_hashtable_t *
_Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,_Py_hashtable_compare_func compare_func,_Py_hashtable_destroy_func key_destroy_func,_Py_hashtable_destroy_func value_destroy_func,_Py_hashtable_allocator_t * allocator)316 _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
317 _Py_hashtable_compare_func compare_func,
318 _Py_hashtable_destroy_func key_destroy_func,
319 _Py_hashtable_destroy_func value_destroy_func,
320 _Py_hashtable_allocator_t *allocator)
321 {
322 _Py_hashtable_allocator_t alloc;
323 if (allocator == NULL) {
324 alloc.malloc = PyMem_Malloc;
325 alloc.free = PyMem_Free;
326 }
327 else {
328 alloc = *allocator;
329 }
330
331 _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
332 if (ht == NULL) {
333 return ht;
334 }
335
336 ht->nbuckets = HASHTABLE_MIN_SIZE;
337 ht->nentries = 0;
338
339 size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]);
340 ht->buckets = alloc.malloc(buckets_size);
341 if (ht->buckets == NULL) {
342 alloc.free(ht);
343 return NULL;
344 }
345 memset(ht->buckets, 0, buckets_size);
346
347 ht->get_entry_func = _Py_hashtable_get_entry_generic;
348 ht->hash_func = hash_func;
349 ht->compare_func = compare_func;
350 ht->key_destroy_func = key_destroy_func;
351 ht->value_destroy_func = value_destroy_func;
352 ht->alloc = alloc;
353 if (ht->hash_func == _Py_hashtable_hash_ptr
354 && ht->compare_func == _Py_hashtable_compare_direct)
355 {
356 ht->get_entry_func = _Py_hashtable_get_entry_ptr;
357 }
358 return ht;
359 }
360
361
362 _Py_hashtable_t *
_Py_hashtable_new(_Py_hashtable_hash_func hash_func,_Py_hashtable_compare_func compare_func)363 _Py_hashtable_new(_Py_hashtable_hash_func hash_func,
364 _Py_hashtable_compare_func compare_func)
365 {
366 return _Py_hashtable_new_full(hash_func, compare_func,
367 NULL, NULL, NULL);
368 }
369
370
371 static void
_Py_hashtable_destroy_entry(_Py_hashtable_t * ht,_Py_hashtable_entry_t * entry)372 _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry)
373 {
374 if (ht->key_destroy_func) {
375 ht->key_destroy_func(entry->key);
376 }
377 if (ht->value_destroy_func) {
378 ht->value_destroy_func(entry->value);
379 }
380 ht->alloc.free(entry);
381 }
382
383
384 void
_Py_hashtable_clear(_Py_hashtable_t * ht)385 _Py_hashtable_clear(_Py_hashtable_t *ht)
386 {
387 for (size_t i=0; i < ht->nbuckets; i++) {
388 _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
389 while (entry != NULL) {
390 _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
391 _Py_hashtable_destroy_entry(ht, entry);
392 entry = next;
393 }
394 _Py_slist_init(&ht->buckets[i]);
395 }
396 ht->nentries = 0;
397 // Ignore failure: clear function is not expected to fail
398 // because of a memory allocation failure.
399 (void)hashtable_rehash(ht);
400 }
401
402
403 void
_Py_hashtable_destroy(_Py_hashtable_t * ht)404 _Py_hashtable_destroy(_Py_hashtable_t *ht)
405 {
406 for (size_t i = 0; i < ht->nbuckets; i++) {
407 _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
408 while (entry) {
409 _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry);
410 _Py_hashtable_destroy_entry(ht, entry);
411 entry = entry_next;
412 }
413 }
414
415 ht->alloc.free(ht->buckets);
416 ht->alloc.free(ht);
417 }
418