1 /* The implementation of the hash table (_Py_hashtable_t) is based on the
2 cfuhash project:
3 http://sourceforge.net/projects/libcfu/
4
5 Copyright of cfuhash:
6 ----------------------------------
7 Creation date: 2005-06-24 21:22:40
8 Authors: Don
9 Change log:
10
11 Copyright (c) 2005 Don Owens
12 All rights reserved.
13
14 This code is released under the BSD license:
15
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions
18 are met:
19
20 * Redistributions of source code must retain the above copyright
21 notice, this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above
24 copyright notice, this list of conditions and the following
25 disclaimer in the documentation and/or other materials provided
26 with the distribution.
27
28 * Neither the name of the author nor the names of its
29 contributors may be used to endorse or promote products derived
30 from this software without specific prior written permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 OF THE POSSIBILITY OF SUCH DAMAGE.
44 ----------------------------------
45 */
46
47 #include "Python.h"
48 #include "pycore_hashtable.h" // export _Py_hashtable_new()
49 #include "pycore_pyhash.h" // _Py_HashPointerRaw()
50
51 #define HASHTABLE_MIN_SIZE 16
52 #define HASHTABLE_HIGH 0.50
53 #define HASHTABLE_LOW 0.10
54 #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
55
56 #define BUCKETS_HEAD(SLIST) \
57 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
58 #define TABLE_HEAD(HT, BUCKET) \
59 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
60 #define ENTRY_NEXT(ENTRY) \
61 ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
62
63 /* Forward declaration */
64 static int hashtable_rehash(_Py_hashtable_t *ht);
65
66 static void
_Py_slist_init(_Py_slist_t * list)67 _Py_slist_init(_Py_slist_t *list)
68 {
69 list->head = NULL;
70 }
71
72
73 static void
_Py_slist_prepend(_Py_slist_t * list,_Py_slist_item_t * item)74 _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
75 {
76 item->next = list->head;
77 list->head = item;
78 }
79
80
81 static void
_Py_slist_remove(_Py_slist_t * list,_Py_slist_item_t * previous,_Py_slist_item_t * item)82 _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
83 _Py_slist_item_t *item)
84 {
85 if (previous != NULL)
86 previous->next = item->next;
87 else
88 list->head = item->next;
89 }
90
91
92 Py_uhash_t
_Py_hashtable_hash_ptr(const void * key)93 _Py_hashtable_hash_ptr(const void *key)
94 {
95 return (Py_uhash_t)_Py_HashPointerRaw(key);
96 }
97
98
99 int
_Py_hashtable_compare_direct(const void * key1,const void * key2)100 _Py_hashtable_compare_direct(const void *key1, const void *key2)
101 {
102 return (key1 == key2);
103 }
104
105
106 /* makes sure the real size of the buckets array is a power of 2 */
107 static size_t
round_size(size_t s)108 round_size(size_t s)
109 {
110 size_t i;
111 if (s < HASHTABLE_MIN_SIZE)
112 return HASHTABLE_MIN_SIZE;
113 i = 1;
114 while (i < s)
115 i <<= 1;
116 return i;
117 }
118
119
120 size_t
_Py_hashtable_size(const _Py_hashtable_t * ht)121 _Py_hashtable_size(const _Py_hashtable_t *ht)
122 {
123 size_t size = sizeof(_Py_hashtable_t);
124 /* buckets */
125 size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *);
126 /* entries */
127 size += ht->nentries * sizeof(_Py_hashtable_entry_t);
128 return size;
129 }
130
131
132 size_t
_Py_hashtable_len(const _Py_hashtable_t * ht)133 _Py_hashtable_len(const _Py_hashtable_t *ht)
134 {
135 return ht->nentries;
136 }
137
138
139 _Py_hashtable_entry_t *
_Py_hashtable_get_entry_generic(_Py_hashtable_t * ht,const void * key)140 _Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
141 {
142 Py_uhash_t key_hash = ht->hash_func(key);
143 size_t index = key_hash & (ht->nbuckets - 1);
144 _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
145 while (1) {
146 if (entry == NULL) {
147 return NULL;
148 }
149 if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
150 break;
151 }
152 entry = ENTRY_NEXT(entry);
153 }
154 return entry;
155 }
156
157
158 // Specialized for:
159 // hash_func == _Py_hashtable_hash_ptr
160 // compare_func == _Py_hashtable_compare_direct
161 static _Py_hashtable_entry_t *
_Py_hashtable_get_entry_ptr(_Py_hashtable_t * ht,const void * key)162 _Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key)
163 {
164 Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key);
165 size_t index = key_hash & (ht->nbuckets - 1);
166 _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
167 while (1) {
168 if (entry == NULL) {
169 return NULL;
170 }
171 // Compare directly keys (ignore entry->key_hash)
172 if (entry->key == key) {
173 break;
174 }
175 entry = ENTRY_NEXT(entry);
176 }
177 return entry;
178 }
179
180
181 void*
_Py_hashtable_steal(_Py_hashtable_t * ht,const void * key)182 _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
183 {
184 Py_uhash_t key_hash = ht->hash_func(key);
185 size_t index = key_hash & (ht->nbuckets - 1);
186
187 _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
188 _Py_hashtable_entry_t *previous = NULL;
189 while (1) {
190 if (entry == NULL) {
191 // not found
192 return NULL;
193 }
194 if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
195 break;
196 }
197 previous = entry;
198 entry = ENTRY_NEXT(entry);
199 }
200
201 _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
202 (_Py_slist_item_t *)entry);
203 ht->nentries--;
204
205 void *value = entry->value;
206 ht->alloc.free(entry);
207
208 if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {
209 // Ignore failure: error cannot be reported to the caller
210 hashtable_rehash(ht);
211 }
212 return value;
213 }
214
215
216 int
_Py_hashtable_set(_Py_hashtable_t * ht,const void * key,void * value)217 _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
218 {
219 _Py_hashtable_entry_t *entry;
220
221 #ifndef NDEBUG
222 /* Don't write the assertion on a single line because it is interesting
223 to know the duplicated entry if the assertion failed. The entry can
224 be read using a debugger. */
225 entry = ht->get_entry_func(ht, key);
226 assert(entry == NULL);
227 #endif
228
229 entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t));
230 if (entry == NULL) {
231 /* memory allocation failed */
232 return -1;
233 }
234
235 entry->key_hash = ht->hash_func(key);
236 entry->key = (void *)key;
237 entry->value = value;
238
239 ht->nentries++;
240 if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {
241 if (hashtable_rehash(ht) < 0) {
242 ht->nentries--;
243 ht->alloc.free(entry);
244 return -1;
245 }
246 }
247
248 size_t index = entry->key_hash & (ht->nbuckets - 1);
249 _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
250 return 0;
251 }
252
253
254 void*
_Py_hashtable_get(_Py_hashtable_t * ht,const void * key)255 _Py_hashtable_get(_Py_hashtable_t *ht, const void *key)
256 {
257 _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, key);
258 if (entry != NULL) {
259 return entry->value;
260 }
261 else {
262 return NULL;
263 }
264 }
265
266
267 int
_Py_hashtable_foreach(_Py_hashtable_t * ht,_Py_hashtable_foreach_func func,void * user_data)268 _Py_hashtable_foreach(_Py_hashtable_t *ht,
269 _Py_hashtable_foreach_func func,
270 void *user_data)
271 {
272 for (size_t hv = 0; hv < ht->nbuckets; hv++) {
273 _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv);
274 while (entry != NULL) {
275 int res = func(ht, entry->key, entry->value, user_data);
276 if (res) {
277 return res;
278 }
279 entry = ENTRY_NEXT(entry);
280 }
281 }
282 return 0;
283 }
284
285
286 static int
hashtable_rehash(_Py_hashtable_t * ht)287 hashtable_rehash(_Py_hashtable_t *ht)
288 {
289 size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR));
290 if (new_size == ht->nbuckets) {
291 return 0;
292 }
293
294 size_t buckets_size = new_size * sizeof(ht->buckets[0]);
295 _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size);
296 if (new_buckets == NULL) {
297 /* memory allocation failed */
298 return -1;
299 }
300 memset(new_buckets, 0, buckets_size);
301
302 for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) {
303 _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]);
304 while (entry != NULL) {
305 assert(ht->hash_func(entry->key) == entry->key_hash);
306 _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
307 size_t entry_index = entry->key_hash & (new_size - 1);
308
309 _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry);
310
311 entry = next;
312 }
313 }
314
315 ht->alloc.free(ht->buckets);
316 ht->nbuckets = new_size;
317 ht->buckets = new_buckets;
318 return 0;
319 }
320
321
322 _Py_hashtable_t *
_Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,_Py_hashtable_compare_func compare_func,_Py_hashtable_destroy_func key_destroy_func,_Py_hashtable_destroy_func value_destroy_func,_Py_hashtable_allocator_t * allocator)323 _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
324 _Py_hashtable_compare_func compare_func,
325 _Py_hashtable_destroy_func key_destroy_func,
326 _Py_hashtable_destroy_func value_destroy_func,
327 _Py_hashtable_allocator_t *allocator)
328 {
329 _Py_hashtable_allocator_t alloc;
330 if (allocator == NULL) {
331 alloc.malloc = PyMem_Malloc;
332 alloc.free = PyMem_Free;
333 }
334 else {
335 alloc = *allocator;
336 }
337
338 _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
339 if (ht == NULL) {
340 return ht;
341 }
342
343 ht->nbuckets = HASHTABLE_MIN_SIZE;
344 ht->nentries = 0;
345
346 size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]);
347 ht->buckets = alloc.malloc(buckets_size);
348 if (ht->buckets == NULL) {
349 alloc.free(ht);
350 return NULL;
351 }
352 memset(ht->buckets, 0, buckets_size);
353
354 ht->get_entry_func = _Py_hashtable_get_entry_generic;
355 ht->hash_func = hash_func;
356 ht->compare_func = compare_func;
357 ht->key_destroy_func = key_destroy_func;
358 ht->value_destroy_func = value_destroy_func;
359 ht->alloc = alloc;
360 if (ht->hash_func == _Py_hashtable_hash_ptr
361 && ht->compare_func == _Py_hashtable_compare_direct)
362 {
363 ht->get_entry_func = _Py_hashtable_get_entry_ptr;
364 }
365 return ht;
366 }
367
368
369 _Py_hashtable_t *
_Py_hashtable_new(_Py_hashtable_hash_func hash_func,_Py_hashtable_compare_func compare_func)370 _Py_hashtable_new(_Py_hashtable_hash_func hash_func,
371 _Py_hashtable_compare_func compare_func)
372 {
373 return _Py_hashtable_new_full(hash_func, compare_func,
374 NULL, NULL, NULL);
375 }
376
377
378 static void
_Py_hashtable_destroy_entry(_Py_hashtable_t * ht,_Py_hashtable_entry_t * entry)379 _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry)
380 {
381 if (ht->key_destroy_func) {
382 ht->key_destroy_func(entry->key);
383 }
384 if (ht->value_destroy_func) {
385 ht->value_destroy_func(entry->value);
386 }
387 ht->alloc.free(entry);
388 }
389
390
391 void
_Py_hashtable_clear(_Py_hashtable_t * ht)392 _Py_hashtable_clear(_Py_hashtable_t *ht)
393 {
394 for (size_t i=0; i < ht->nbuckets; i++) {
395 _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
396 while (entry != NULL) {
397 _Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
398 _Py_hashtable_destroy_entry(ht, entry);
399 entry = next;
400 }
401 _Py_slist_init(&ht->buckets[i]);
402 }
403 ht->nentries = 0;
404 // Ignore failure: clear function is not expected to fail
405 // because of a memory allocation failure.
406 (void)hashtable_rehash(ht);
407 }
408
409
410 void
_Py_hashtable_destroy(_Py_hashtable_t * ht)411 _Py_hashtable_destroy(_Py_hashtable_t *ht)
412 {
413 for (size_t i = 0; i < ht->nbuckets; i++) {
414 _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
415 while (entry) {
416 _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry);
417 _Py_hashtable_destroy_entry(ht, entry);
418 entry = entry_next;
419 }
420 }
421
422 ht->alloc.free(ht->buckets);
423 ht->alloc.free(ht);
424 }
425