1 /* The implementation of the hash table (_Py_hashtable_t) is based on the
2 cfuhash project:
3 http://sourceforge.net/projects/libcfu/
4
5 Copyright of cfuhash:
6 ----------------------------------
7 Creation date: 2005-06-24 21:22:40
8 Authors: Don
9 Change log:
10
11 Copyright (c) 2005 Don Owens
12 All rights reserved.
13
14 This code is released under the BSD license:
15
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions
18 are met:
19
20 * Redistributions of source code must retain the above copyright
21 notice, this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above
24 copyright notice, this list of conditions and the following
25 disclaimer in the documentation and/or other materials provided
26 with the distribution.
27
28 * Neither the name of the author nor the names of its
29 contributors may be used to endorse or promote products derived
30 from this software without specific prior written permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
37 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 OF THE POSSIBILITY OF SUCH DAMAGE.
44 ----------------------------------
45 */
46
47 #include "Python.h"
48 #include "hashtable.h"
49
50 #define HASHTABLE_MIN_SIZE 16
51 #define HASHTABLE_HIGH 0.50
52 #define HASHTABLE_LOW 0.10
53 #define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
54
55 #define BUCKETS_HEAD(SLIST) \
56 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
57 #define TABLE_HEAD(HT, BUCKET) \
58 ((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
59 #define ENTRY_NEXT(ENTRY) \
60 ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
61 #define HASHTABLE_ITEM_SIZE(HT) \
62 (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size)
63
64 #define ENTRY_READ_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \
65 do { \
66 assert((DATA_SIZE) == (TABLE)->data_size); \
67 memcpy((PDATA), _Py_HASHTABLE_ENTRY_PDATA(TABLE, (ENTRY)), \
68 (DATA_SIZE)); \
69 } while (0)
70
71 #define ENTRY_WRITE_PDATA(TABLE, ENTRY, DATA_SIZE, PDATA) \
72 do { \
73 assert((DATA_SIZE) == (TABLE)->data_size); \
74 memcpy((void *)_Py_HASHTABLE_ENTRY_PDATA((TABLE), (ENTRY)), \
75 (PDATA), (DATA_SIZE)); \
76 } while (0)
77
78 /* Forward declaration */
79 static void hashtable_rehash(_Py_hashtable_t *ht);
80
81 static void
_Py_slist_init(_Py_slist_t * list)82 _Py_slist_init(_Py_slist_t *list)
83 {
84 list->head = NULL;
85 }
86
87
88 static void
_Py_slist_prepend(_Py_slist_t * list,_Py_slist_item_t * item)89 _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
90 {
91 item->next = list->head;
92 list->head = item;
93 }
94
95
96 static void
_Py_slist_remove(_Py_slist_t * list,_Py_slist_item_t * previous,_Py_slist_item_t * item)97 _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
98 _Py_slist_item_t *item)
99 {
100 if (previous != NULL)
101 previous->next = item->next;
102 else
103 list->head = item->next;
104 }
105
106
107 Py_uhash_t
_Py_hashtable_hash_ptr(struct _Py_hashtable_t * ht,const void * pkey)108 _Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
109 {
110 void *key;
111
112 _Py_HASHTABLE_READ_KEY(ht, pkey, key);
113 return (Py_uhash_t)_Py_HashPointer(key);
114 }
115
116
117 int
_Py_hashtable_compare_direct(_Py_hashtable_t * ht,const void * pkey,const _Py_hashtable_entry_t * entry)118 _Py_hashtable_compare_direct(_Py_hashtable_t *ht, const void *pkey,
119 const _Py_hashtable_entry_t *entry)
120 {
121 const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
122 return (memcmp(pkey, pkey2, ht->key_size) == 0);
123 }
124
125
126 /* makes sure the real size of the buckets array is a power of 2 */
127 static size_t
round_size(size_t s)128 round_size(size_t s)
129 {
130 size_t i;
131 if (s < HASHTABLE_MIN_SIZE)
132 return HASHTABLE_MIN_SIZE;
133 i = 1;
134 while (i < s)
135 i <<= 1;
136 return i;
137 }
138
139
140 _Py_hashtable_t *
_Py_hashtable_new_full(size_t key_size,size_t data_size,size_t init_size,_Py_hashtable_hash_func hash_func,_Py_hashtable_compare_func compare_func,_Py_hashtable_allocator_t * allocator)141 _Py_hashtable_new_full(size_t key_size, size_t data_size,
142 size_t init_size,
143 _Py_hashtable_hash_func hash_func,
144 _Py_hashtable_compare_func compare_func,
145 _Py_hashtable_allocator_t *allocator)
146 {
147 _Py_hashtable_t *ht;
148 size_t buckets_size;
149 _Py_hashtable_allocator_t alloc;
150
151 if (allocator == NULL) {
152 alloc.malloc = PyMem_RawMalloc;
153 alloc.free = PyMem_RawFree;
154 }
155 else
156 alloc = *allocator;
157
158 ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
159 if (ht == NULL)
160 return ht;
161
162 ht->num_buckets = round_size(init_size);
163 ht->entries = 0;
164 ht->key_size = key_size;
165 ht->data_size = data_size;
166
167 buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
168 ht->buckets = alloc.malloc(buckets_size);
169 if (ht->buckets == NULL) {
170 alloc.free(ht);
171 return NULL;
172 }
173 memset(ht->buckets, 0, buckets_size);
174
175 ht->hash_func = hash_func;
176 ht->compare_func = compare_func;
177 ht->alloc = alloc;
178 return ht;
179 }
180
181
182 _Py_hashtable_t *
_Py_hashtable_new(size_t key_size,size_t data_size,_Py_hashtable_hash_func hash_func,_Py_hashtable_compare_func compare_func)183 _Py_hashtable_new(size_t key_size, size_t data_size,
184 _Py_hashtable_hash_func hash_func,
185 _Py_hashtable_compare_func compare_func)
186 {
187 return _Py_hashtable_new_full(key_size, data_size,
188 HASHTABLE_MIN_SIZE,
189 hash_func, compare_func,
190 NULL);
191 }
192
193
194 size_t
_Py_hashtable_size(_Py_hashtable_t * ht)195 _Py_hashtable_size(_Py_hashtable_t *ht)
196 {
197 size_t size;
198
199 size = sizeof(_Py_hashtable_t);
200
201 /* buckets */
202 size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
203
204 /* entries */
205 size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
206
207 return size;
208 }
209
210
211 #ifdef Py_DEBUG
212 void
_Py_hashtable_print_stats(_Py_hashtable_t * ht)213 _Py_hashtable_print_stats(_Py_hashtable_t *ht)
214 {
215 size_t size;
216 size_t chain_len, max_chain_len, total_chain_len, nchains;
217 _Py_hashtable_entry_t *entry;
218 size_t hv;
219 double load;
220
221 size = _Py_hashtable_size(ht);
222
223 load = (double)ht->entries / ht->num_buckets;
224
225 max_chain_len = 0;
226 total_chain_len = 0;
227 nchains = 0;
228 for (hv = 0; hv < ht->num_buckets; hv++) {
229 entry = TABLE_HEAD(ht, hv);
230 if (entry != NULL) {
231 chain_len = 0;
232 for (; entry; entry = ENTRY_NEXT(entry)) {
233 chain_len++;
234 }
235 if (chain_len > max_chain_len)
236 max_chain_len = chain_len;
237 total_chain_len += chain_len;
238 nchains++;
239 }
240 }
241 printf("hash table %p: entries=%"
242 PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
243 ht, ht->entries, ht->num_buckets, load * 100.0);
244 if (nchains)
245 printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
246 printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u KiB\n",
247 max_chain_len, size / 1024);
248 }
249 #endif
250
251
252 _Py_hashtable_entry_t *
_Py_hashtable_get_entry(_Py_hashtable_t * ht,size_t key_size,const void * pkey)253 _Py_hashtable_get_entry(_Py_hashtable_t *ht,
254 size_t key_size, const void *pkey)
255 {
256 Py_uhash_t key_hash;
257 size_t index;
258 _Py_hashtable_entry_t *entry;
259
260 assert(key_size == ht->key_size);
261
262 key_hash = ht->hash_func(ht, pkey);
263 index = key_hash & (ht->num_buckets - 1);
264
265 for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
266 if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
267 break;
268 }
269
270 return entry;
271 }
272
273
274 static int
_Py_hashtable_pop_entry(_Py_hashtable_t * ht,size_t key_size,const void * pkey,void * data,size_t data_size)275 _Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
276 void *data, size_t data_size)
277 {
278 Py_uhash_t key_hash;
279 size_t index;
280 _Py_hashtable_entry_t *entry, *previous;
281
282 assert(key_size == ht->key_size);
283
284 key_hash = ht->hash_func(ht, pkey);
285 index = key_hash & (ht->num_buckets - 1);
286
287 previous = NULL;
288 for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
289 if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
290 break;
291 previous = entry;
292 }
293
294 if (entry == NULL)
295 return 0;
296
297 _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
298 (_Py_slist_item_t *)entry);
299 ht->entries--;
300
301 if (data != NULL)
302 ENTRY_READ_PDATA(ht, entry, data_size, data);
303 ht->alloc.free(entry);
304
305 if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
306 hashtable_rehash(ht);
307 return 1;
308 }
309
310
311 int
_Py_hashtable_set(_Py_hashtable_t * ht,size_t key_size,const void * pkey,size_t data_size,const void * data)312 _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
313 size_t data_size, const void *data)
314 {
315 Py_uhash_t key_hash;
316 size_t index;
317 _Py_hashtable_entry_t *entry;
318
319 assert(key_size == ht->key_size);
320
321 assert(data != NULL || data_size == 0);
322 #ifndef NDEBUG
323 /* Don't write the assertion on a single line because it is interesting
324 to know the duplicated entry if the assertion failed. The entry can
325 be read using a debugger. */
326 entry = _Py_hashtable_get_entry(ht, key_size, pkey);
327 assert(entry == NULL);
328 #endif
329
330 key_hash = ht->hash_func(ht, pkey);
331 index = key_hash & (ht->num_buckets - 1);
332
333 entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
334 if (entry == NULL) {
335 /* memory allocation failed */
336 return -1;
337 }
338
339 entry->key_hash = key_hash;
340 memcpy((void *)_Py_HASHTABLE_ENTRY_PKEY(entry), pkey, ht->key_size);
341 if (data)
342 ENTRY_WRITE_PDATA(ht, entry, data_size, data);
343
344 _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
345 ht->entries++;
346
347 if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
348 hashtable_rehash(ht);
349 return 0;
350 }
351
352
353 int
_Py_hashtable_get(_Py_hashtable_t * ht,size_t key_size,const void * pkey,size_t data_size,void * data)354 _Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
355 size_t data_size, void *data)
356 {
357 _Py_hashtable_entry_t *entry;
358
359 assert(data != NULL);
360
361 entry = _Py_hashtable_get_entry(ht, key_size, pkey);
362 if (entry == NULL)
363 return 0;
364 ENTRY_READ_PDATA(ht, entry, data_size, data);
365 return 1;
366 }
367
368
369 int
_Py_hashtable_pop(_Py_hashtable_t * ht,size_t key_size,const void * pkey,size_t data_size,void * data)370 _Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
371 size_t data_size, void *data)
372 {
373 assert(data != NULL);
374 return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size);
375 }
376
377
378 /* Code commented since the function is not needed in Python */
379 #if 0
380 void
381 _Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
382 {
383 #ifndef NDEBUG
384 int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
385 assert(found);
386 #else
387 (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
388 #endif
389 }
390 #endif
391
392
393 int
_Py_hashtable_foreach(_Py_hashtable_t * ht,_Py_hashtable_foreach_func func,void * arg)394 _Py_hashtable_foreach(_Py_hashtable_t *ht,
395 _Py_hashtable_foreach_func func,
396 void *arg)
397 {
398 _Py_hashtable_entry_t *entry;
399 size_t hv;
400
401 for (hv = 0; hv < ht->num_buckets; hv++) {
402 for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
403 int res = func(ht, entry, arg);
404 if (res)
405 return res;
406 }
407 }
408 return 0;
409 }
410
411
412 static void
hashtable_rehash(_Py_hashtable_t * ht)413 hashtable_rehash(_Py_hashtable_t *ht)
414 {
415 size_t buckets_size, new_size, bucket;
416 _Py_slist_t *old_buckets = NULL;
417 size_t old_num_buckets;
418
419 new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
420 if (new_size == ht->num_buckets)
421 return;
422
423 old_num_buckets = ht->num_buckets;
424
425 buckets_size = new_size * sizeof(ht->buckets[0]);
426 old_buckets = ht->buckets;
427 ht->buckets = ht->alloc.malloc(buckets_size);
428 if (ht->buckets == NULL) {
429 /* cancel rehash on memory allocation failure */
430 ht->buckets = old_buckets ;
431 /* memory allocation failed */
432 return;
433 }
434 memset(ht->buckets, 0, buckets_size);
435
436 ht->num_buckets = new_size;
437
438 for (bucket = 0; bucket < old_num_buckets; bucket++) {
439 _Py_hashtable_entry_t *entry, *next;
440 for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
441 size_t entry_index;
442
443
444 assert(ht->hash_func(ht, _Py_HASHTABLE_ENTRY_PKEY(entry)) == entry->key_hash);
445 next = ENTRY_NEXT(entry);
446 entry_index = entry->key_hash & (new_size - 1);
447
448 _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
449 }
450 }
451
452 ht->alloc.free(old_buckets);
453 }
454
455
456 void
_Py_hashtable_clear(_Py_hashtable_t * ht)457 _Py_hashtable_clear(_Py_hashtable_t *ht)
458 {
459 _Py_hashtable_entry_t *entry, *next;
460 size_t i;
461
462 for (i=0; i < ht->num_buckets; i++) {
463 for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
464 next = ENTRY_NEXT(entry);
465 ht->alloc.free(entry);
466 }
467 _Py_slist_init(&ht->buckets[i]);
468 }
469 ht->entries = 0;
470 hashtable_rehash(ht);
471 }
472
473
474 void
_Py_hashtable_destroy(_Py_hashtable_t * ht)475 _Py_hashtable_destroy(_Py_hashtable_t *ht)
476 {
477 size_t i;
478
479 for (i = 0; i < ht->num_buckets; i++) {
480 _Py_slist_item_t *entry = ht->buckets[i].head;
481 while (entry) {
482 _Py_slist_item_t *entry_next = entry->next;
483 ht->alloc.free(entry);
484 entry = entry_next;
485 }
486 }
487
488 ht->alloc.free(ht->buckets);
489 ht->alloc.free(ht);
490 }
491
492
493 _Py_hashtable_t *
_Py_hashtable_copy(_Py_hashtable_t * src)494 _Py_hashtable_copy(_Py_hashtable_t *src)
495 {
496 const size_t key_size = src->key_size;
497 const size_t data_size = src->data_size;
498 _Py_hashtable_t *dst;
499 _Py_hashtable_entry_t *entry;
500 size_t bucket;
501 int err;
502
503 dst = _Py_hashtable_new_full(key_size, data_size,
504 src->num_buckets,
505 src->hash_func,
506 src->compare_func,
507 &src->alloc);
508 if (dst == NULL)
509 return NULL;
510
511 for (bucket=0; bucket < src->num_buckets; bucket++) {
512 entry = TABLE_HEAD(src, bucket);
513 for (; entry; entry = ENTRY_NEXT(entry)) {
514 const void *pkey = _Py_HASHTABLE_ENTRY_PKEY(entry);
515 const void *pdata = _Py_HASHTABLE_ENTRY_PDATA(src, entry);
516 err = _Py_hashtable_set(dst, key_size, pkey, data_size, pdata);
517 if (err) {
518 _Py_hashtable_destroy(dst);
519 return NULL;
520 }
521 }
522 }
523 return dst;
524 }
525