• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 
3 #include "pycore_hamt.h"
4 #include "pycore_object.h"
5 #include "pycore_pystate.h"
6 #include "structmember.h"
7 
8 /*
9 This file provides an implementation of an immutable mapping using the
10 Hash Array Mapped Trie (or HAMT) datastructure.
11 
12 This design allows to have:
13 
14 1. Efficient copy: immutable mappings can be copied by reference,
15    making it an O(1) operation.
16 
17 2. Efficient mutations: due to structural sharing, only a portion of
18    the trie needs to be copied when the collection is mutated.  The
19    cost of set/delete operations is O(log N).
20 
21 3. Efficient lookups: O(log N).
22 
23 (where N is number of key/value items in the immutable mapping.)
24 
25 
26 HAMT
27 ====
28 
29 The core idea of HAMT is that the shape of the trie is encoded into the
30 hashes of keys.
31 
32 Say we want to store a K/V pair in our mapping.  First, we calculate the
33 hash of K, let's say it's 19830128, or in binary:
34 
35     0b1001011101001010101110000 = 19830128
36 
37 Now let's partition this bit representation of the hash into blocks of
38 5 bits each:
39 
40     0b00_00000_10010_11101_00101_01011_10000 = 19830128
41           (6)   (5)   (4)   (3)   (2)   (1)
42 
43 Each block of 5 bits represents a number between 0 and 31.  So if we have
44 a tree that consists of nodes, each of which is an array of 32 pointers,
45 those 5-bit blocks will encode a position on a single tree level.
46 
47 For example, storing the key K with hash 19830128, results in the following
48 tree structure:
49 
50                      (array of 32 pointers)
51                      +---+ -- +----+----+----+ -- +----+
52   root node          | 0 | .. | 15 | 16 | 17 | .. | 31 |   0b10000 = 16 (1)
53   (level 1)          +---+ -- +----+----+----+ -- +----+
54                                       |
55                      +---+ -- +----+----+----+ -- +----+
56   a 2nd level node   | 0 | .. | 10 | 11 | 12 | .. | 31 |   0b01011 = 11 (2)
57                      +---+ -- +----+----+----+ -- +----+
58                                       |
59                      +---+ -- +----+----+----+ -- +----+
60   a 3rd level node   | 0 | .. | 04 | 05 | 06 | .. | 31 |   0b00101 = 5  (3)
61                      +---+ -- +----+----+----+ -- +----+
62                                       |
63                      +---+ -- +----+----+----+----+
64   a 4th level node   | 0 | .. | 04 | 29 | 30 | 31 |        0b11101 = 29 (4)
65                      +---+ -- +----+----+----+----+
66                                       |
67                      +---+ -- +----+----+----+ -- +----+
68   a 5th level node   | 0 | .. | 17 | 18 | 19 | .. | 31 |   0b10010 = 18 (5)
69                      +---+ -- +----+----+----+ -- +----+
70                                       |
71                        +--------------+
72                        |
73                      +---+ -- +----+----+----+ -- +----+
74   a 6th level node   | 0 | .. | 15 | 16 | 17 | .. | 31 |   0b00000 = 0  (6)
75                      +---+ -- +----+----+----+ -- +----+
76                        |
77                        V -- our value (or collision)
78 
79 To rehash: for a K/V pair, the hash of K encodes where in the tree V will
80 be stored.
81 
82 To optimize memory footprint and handle hash collisions, our implementation
83 uses three different types of nodes:
84 
85  * A Bitmap node;
86  * An Array node;
87  * A Collision node.
88 
89 Because we implement an immutable dictionary, our nodes are also
90 immutable.  Therefore, when we need to modify a node, we copy it, and
91 do that modification to the copy.
92 
93 
94 Array Nodes
95 -----------
96 
97 These nodes are very simple.  Essentially they are arrays of 32 pointers
98 we used to illustrate the high-level idea in the previous section.
99 
100 We use Array nodes only when we need to store more than 16 pointers
101 in a single node.
102 
103 Array nodes do not store key objects or value objects.  They are used
104 only as an indirection level - their pointers point to other nodes in
105 the tree.
106 
107 
108 Bitmap Node
109 -----------
110 
111 Allocating a new 32-pointers array for every node of our tree would be
112 very expensive.  Unless we store millions of keys, most of tree nodes would
113 be very sparse.
114 
115 When we have less than 16 elements in a node, we don't want to use the
116 Array node, that would mean that we waste a lot of memory.  Instead,
117 we can use bitmap compression and can have just as many pointers
118 as we need!
119 
120 Bitmap nodes consist of two fields:
121 
122 1. An array of pointers.  If a Bitmap node holds N elements, the
123    array will be of N pointers.
124 
125 2. A 32bit integer -- a bitmap field.  If an N-th bit is set in the
126    bitmap, it means that the node has an N-th element.
127 
128 For example, say we need to store a 3 elements sparse array:
129 
130    +---+  --  +---+  --  +----+  --  +----+
131    | 0 |  ..  | 4 |  ..  | 11 |  ..  | 17 |
132    +---+  --  +---+  --  +----+  --  +----+
133                 |          |           |
134                 o1         o2          o3
135 
136 We allocate a three-pointer Bitmap node.  Its bitmap field will be
137 then set to:
138 
139    0b_00100_00010_00000_10000 == (1 << 17) | (1 << 11) | (1 << 4)
140 
141 To check if our Bitmap node has an I-th element we can do:
142 
143    bitmap & (1 << I)
144 
145 
146 And here's a formula to calculate a position in our pointer array
147 which would correspond to an I-th element:
148 
149    popcount(bitmap & ((1 << I) - 1))
150 
151 
152 Let's break it down:
153 
154  * `popcount` is a function that returns a number of bits set to 1;
155 
156  * `((1 << I) - 1)` is a mask to filter the bitmask to contain bits
157    set to the *right* of our bit.
158 
159 
160 So for our 17, 11, and 4 indexes:
161 
162  * bitmap & ((1 << 17) - 1) == 0b100000010000 => 2 bits are set => index is 2.
163 
164  * bitmap & ((1 << 11) - 1) == 0b10000 => 1 bit is set => index is 1.
165 
166  * bitmap & ((1 << 4) - 1) == 0b0 => 0 bits are set => index is 0.
167 
168 
169 To conclude: Bitmap nodes are just like Array nodes -- they can store
170 a number of pointers, but use bitmap compression to eliminate unused
171 pointers.
172 
173 
174 Bitmap nodes have two pointers for each item:
175 
176   +----+----+----+----+  --  +----+----+
177   | k1 | v1 | k2 | v2 |  ..  | kN | vN |
178   +----+----+----+----+  --  +----+----+
179 
180 When kI == NULL, vI points to another tree level.
181 
182 When kI != NULL, the actual key object is stored in kI, and its
183 value is stored in vI.
184 
185 
186 Collision Nodes
187 ---------------
188 
189 Collision nodes are simple arrays of pointers -- two pointers per
190 key/value.  When there's a hash collision, say for k1/v1 and k2/v2
191 we have `hash(k1)==hash(k2)`.  Then our collision node will be:
192 
193   +----+----+----+----+
194   | k1 | v1 | k2 | v2 |
195   +----+----+----+----+
196 
197 
198 Tree Structure
199 --------------
200 
201 All nodes are PyObjects.
202 
203 The `PyHamtObject` object has a pointer to the root node (h_root),
204 and has a length field (h_count).
205 
206 High-level functions accept a PyHamtObject object and dispatch to
207 lower-level functions depending on what kind of node h_root points to.
208 
209 
210 Operations
211 ==========
212 
213 There are three fundamental operations on an immutable dictionary:
214 
215 1. "o.assoc(k, v)" will return a new immutable dictionary, that will be
216    a copy of "o", but with the "k/v" item set.
217 
218    Functions in this file:
219 
220         hamt_node_assoc, hamt_node_bitmap_assoc,
221         hamt_node_array_assoc, hamt_node_collision_assoc
222 
223    `hamt_node_assoc` function accepts a node object, and calls
224    other functions depending on its actual type.
225 
226 2. "o.find(k)" will lookup key "k" in "o".
227 
228    Functions:
229 
230         hamt_node_find, hamt_node_bitmap_find,
231         hamt_node_array_find, hamt_node_collision_find
232 
233 3. "o.without(k)" will return a new immutable dictionary, that will be
234    a copy of "o", buth without the "k" key.
235 
236    Functions:
237 
238         hamt_node_without, hamt_node_bitmap_without,
239         hamt_node_array_without, hamt_node_collision_without
240 
241 
242 Further Reading
243 ===============
244 
245 1. http://blog.higher-order.net/2009/09/08/understanding-clojures-persistenthashmap-deftwice.html
246 
247 2. http://blog.higher-order.net/2010/08/16/assoc-and-clojures-persistenthashmap-part-ii.html
248 
249 3. Clojure's PersistentHashMap implementation:
250    https://github.com/clojure/clojure/blob/master/src/jvm/clojure/lang/PersistentHashMap.java
251 
252 
253 Debug
254 =====
255 
256 The HAMT datatype is accessible for testing purposes under the
257 `_testcapi` module:
258 
259     >>> from _testcapi import hamt
260     >>> h = hamt()
261     >>> h2 = h.set('a', 2)
262     >>> h3 = h2.set('b', 3)
263     >>> list(h3)
264     ['a', 'b']
265 
266 When CPython is built in debug mode, a '__dump__()' method is available
267 to introspect the tree:
268 
269     >>> print(h3.__dump__())
270     HAMT(len=2):
271         BitmapNode(size=4 count=2 bitmap=0b110 id=0x10eb9d9e8):
272             'a': 2
273             'b': 3
274 */
275 
276 
277 #define IS_ARRAY_NODE(node)     (Py_TYPE(node) == &_PyHamt_ArrayNode_Type)
278 #define IS_BITMAP_NODE(node)    (Py_TYPE(node) == &_PyHamt_BitmapNode_Type)
279 #define IS_COLLISION_NODE(node) (Py_TYPE(node) == &_PyHamt_CollisionNode_Type)
280 
281 
282 /* Return type for 'find' (lookup a key) functions.
283 
284    * F_ERROR - an error occurred;
285    * F_NOT_FOUND - the key was not found;
286    * F_FOUND - the key was found.
287 */
288 typedef enum {F_ERROR, F_NOT_FOUND, F_FOUND} hamt_find_t;
289 
290 
291 /* Return type for 'without' (delete a key) functions.
292 
293    * W_ERROR - an error occurred;
294    * W_NOT_FOUND - the key was not found: there's nothing to delete;
295    * W_EMPTY - the key was found: the node/tree would be empty
296      if the key is deleted;
297    * W_NEWNODE - the key was found: a new node/tree is returned
298      without that key.
299 */
300 typedef enum {W_ERROR, W_NOT_FOUND, W_EMPTY, W_NEWNODE} hamt_without_t;
301 
302 
303 /* Low-level iterator protocol type.
304 
305    * I_ITEM - a new item has been yielded;
306    * I_END - the whole tree was visited (similar to StopIteration).
307 */
308 typedef enum {I_ITEM, I_END} hamt_iter_t;
309 
310 
311 #define HAMT_ARRAY_NODE_SIZE 32
312 
313 
314 typedef struct {
315     PyObject_HEAD
316     PyHamtNode *a_array[HAMT_ARRAY_NODE_SIZE];
317     Py_ssize_t a_count;
318 } PyHamtNode_Array;
319 
320 
321 typedef struct {
322     PyObject_VAR_HEAD
323     uint32_t b_bitmap;
324     PyObject *b_array[1];
325 } PyHamtNode_Bitmap;
326 
327 
328 typedef struct {
329     PyObject_VAR_HEAD
330     int32_t c_hash;
331     PyObject *c_array[1];
332 } PyHamtNode_Collision;
333 
334 
335 static PyHamtNode_Bitmap *_empty_bitmap_node;
336 static PyHamtObject *_empty_hamt;
337 
338 
339 static PyHamtObject *
340 hamt_alloc(void);
341 
342 static PyHamtNode *
343 hamt_node_assoc(PyHamtNode *node,
344                 uint32_t shift, int32_t hash,
345                 PyObject *key, PyObject *val, int* added_leaf);
346 
347 static hamt_without_t
348 hamt_node_without(PyHamtNode *node,
349                   uint32_t shift, int32_t hash,
350                   PyObject *key,
351                   PyHamtNode **new_node);
352 
353 static hamt_find_t
354 hamt_node_find(PyHamtNode *node,
355                uint32_t shift, int32_t hash,
356                PyObject *key, PyObject **val);
357 
358 #ifdef Py_DEBUG
359 static int
360 hamt_node_dump(PyHamtNode *node,
361                _PyUnicodeWriter *writer, int level);
362 #endif
363 
364 static PyHamtNode *
365 hamt_node_array_new(Py_ssize_t);
366 
367 static PyHamtNode *
368 hamt_node_collision_new(int32_t hash, Py_ssize_t size);
369 
370 static inline Py_ssize_t
371 hamt_node_collision_count(PyHamtNode_Collision *node);
372 
373 
374 #ifdef Py_DEBUG
375 static void
_hamt_node_array_validate(void * obj_raw)376 _hamt_node_array_validate(void *obj_raw)
377 {
378     PyObject *obj = _PyObject_CAST(obj_raw);
379     assert(IS_ARRAY_NODE(obj));
380     PyHamtNode_Array *node = (PyHamtNode_Array*)obj;
381     Py_ssize_t i = 0, count = 0;
382     for (; i < HAMT_ARRAY_NODE_SIZE; i++) {
383         if (node->a_array[i] != NULL) {
384             count++;
385         }
386     }
387     assert(count == node->a_count);
388 }
389 
390 #define VALIDATE_ARRAY_NODE(NODE) \
391     do { _hamt_node_array_validate(NODE); } while (0);
392 #else
393 #define VALIDATE_ARRAY_NODE(NODE)
394 #endif
395 
396 
397 /* Returns -1 on error */
398 static inline int32_t
hamt_hash(PyObject * o)399 hamt_hash(PyObject *o)
400 {
401     Py_hash_t hash = PyObject_Hash(o);
402 
403 #if SIZEOF_PY_HASH_T <= 4
404     return hash;
405 #else
406     if (hash == -1) {
407         /* exception */
408         return -1;
409     }
410 
411     /* While it's suboptimal to reduce Python's 64 bit hash to
412        32 bits via XOR, it seems that the resulting hash function
413        is good enough (this is also how Long type is hashed in Java.)
414        Storing 10, 100, 1000 Python strings results in a relatively
415        shallow and uniform tree structure.
416 
417        Please don't change this hashing algorithm, as there are many
418        tests that test some exact tree shape to cover all code paths.
419     */
420     int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32);
421     return xored == -1 ? -2 : xored;
422 #endif
423 }
424 
425 static inline uint32_t
hamt_mask(int32_t hash,uint32_t shift)426 hamt_mask(int32_t hash, uint32_t shift)
427 {
428     return (((uint32_t)hash >> shift) & 0x01f);
429 }
430 
431 static inline uint32_t
hamt_bitpos(int32_t hash,uint32_t shift)432 hamt_bitpos(int32_t hash, uint32_t shift)
433 {
434     return (uint32_t)1 << hamt_mask(hash, shift);
435 }
436 
437 static inline uint32_t
hamt_bitcount(uint32_t i)438 hamt_bitcount(uint32_t i)
439 {
440     /* We could use native popcount instruction but that would
441        require to either add configure flags to enable SSE4.2
442        support or to detect it dynamically.  Otherwise, we have
443        a risk of CPython not working properly on older hardware.
444 
445        In practice, there's no observable difference in
446        performance between using a popcount instruction or the
447        following fallback code.
448 
449        The algorithm is copied from:
450        https://graphics.stanford.edu/~seander/bithacks.html
451     */
452     i = i - ((i >> 1) & 0x55555555);
453     i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
454     return (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
455 }
456 
457 static inline uint32_t
hamt_bitindex(uint32_t bitmap,uint32_t bit)458 hamt_bitindex(uint32_t bitmap, uint32_t bit)
459 {
460     return hamt_bitcount(bitmap & (bit - 1));
461 }
462 
463 
464 /////////////////////////////////// Dump Helpers
465 #ifdef Py_DEBUG
466 
467 static int
_hamt_dump_ident(_PyUnicodeWriter * writer,int level)468 _hamt_dump_ident(_PyUnicodeWriter *writer, int level)
469 {
470     /* Write `'    ' * level` to the `writer` */
471     PyObject *str = NULL;
472     PyObject *num = NULL;
473     PyObject *res = NULL;
474     int ret = -1;
475 
476     str = PyUnicode_FromString("    ");
477     if (str == NULL) {
478         goto error;
479     }
480 
481     num = PyLong_FromLong((long)level);
482     if (num == NULL) {
483         goto error;
484     }
485 
486     res = PyNumber_Multiply(str, num);
487     if (res == NULL) {
488         goto error;
489     }
490 
491     ret = _PyUnicodeWriter_WriteStr(writer, res);
492 
493 error:
494     Py_XDECREF(res);
495     Py_XDECREF(str);
496     Py_XDECREF(num);
497     return ret;
498 }
499 
500 static int
_hamt_dump_format(_PyUnicodeWriter * writer,const char * format,...)501 _hamt_dump_format(_PyUnicodeWriter *writer, const char *format, ...)
502 {
503     /* A convenient helper combining _PyUnicodeWriter_WriteStr and
504        PyUnicode_FromFormatV.
505     */
506     PyObject* msg;
507     int ret;
508 
509     va_list vargs;
510 #ifdef HAVE_STDARG_PROTOTYPES
511     va_start(vargs, format);
512 #else
513     va_start(vargs);
514 #endif
515     msg = PyUnicode_FromFormatV(format, vargs);
516     va_end(vargs);
517 
518     if (msg == NULL) {
519         return -1;
520     }
521 
522     ret = _PyUnicodeWriter_WriteStr(writer, msg);
523     Py_DECREF(msg);
524     return ret;
525 }
526 
527 #endif  /* Py_DEBUG */
528 /////////////////////////////////// Bitmap Node
529 
530 
531 static PyHamtNode *
hamt_node_bitmap_new(Py_ssize_t size)532 hamt_node_bitmap_new(Py_ssize_t size)
533 {
534     /* Create a new bitmap node of size 'size' */
535 
536     PyHamtNode_Bitmap *node;
537     Py_ssize_t i;
538 
539     assert(size >= 0);
540     assert(size % 2 == 0);
541 
542     if (size == 0 && _empty_bitmap_node != NULL) {
543         Py_INCREF(_empty_bitmap_node);
544         return (PyHamtNode *)_empty_bitmap_node;
545     }
546 
547     /* No freelist; allocate a new bitmap node */
548     node = PyObject_GC_NewVar(
549         PyHamtNode_Bitmap, &_PyHamt_BitmapNode_Type, size);
550     if (node == NULL) {
551         return NULL;
552     }
553 
554     Py_SIZE(node) = size;
555 
556     for (i = 0; i < size; i++) {
557         node->b_array[i] = NULL;
558     }
559 
560     node->b_bitmap = 0;
561 
562     _PyObject_GC_TRACK(node);
563 
564     if (size == 0 && _empty_bitmap_node == NULL) {
565         /* Since bitmap nodes are immutable, we can cache the instance
566            for size=0 and reuse it whenever we need an empty bitmap node.
567         */
568         _empty_bitmap_node = node;
569         Py_INCREF(_empty_bitmap_node);
570     }
571 
572     return (PyHamtNode *)node;
573 }
574 
575 static inline Py_ssize_t
hamt_node_bitmap_count(PyHamtNode_Bitmap * node)576 hamt_node_bitmap_count(PyHamtNode_Bitmap *node)
577 {
578     return Py_SIZE(node) / 2;
579 }
580 
581 static PyHamtNode_Bitmap *
hamt_node_bitmap_clone(PyHamtNode_Bitmap * node)582 hamt_node_bitmap_clone(PyHamtNode_Bitmap *node)
583 {
584     /* Clone a bitmap node; return a new one with the same child notes. */
585 
586     PyHamtNode_Bitmap *clone;
587     Py_ssize_t i;
588 
589     clone = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(Py_SIZE(node));
590     if (clone == NULL) {
591         return NULL;
592     }
593 
594     for (i = 0; i < Py_SIZE(node); i++) {
595         Py_XINCREF(node->b_array[i]);
596         clone->b_array[i] = node->b_array[i];
597     }
598 
599     clone->b_bitmap = node->b_bitmap;
600     return clone;
601 }
602 
603 static PyHamtNode_Bitmap *
hamt_node_bitmap_clone_without(PyHamtNode_Bitmap * o,uint32_t bit)604 hamt_node_bitmap_clone_without(PyHamtNode_Bitmap *o, uint32_t bit)
605 {
606     assert(bit & o->b_bitmap);
607     assert(hamt_node_bitmap_count(o) > 1);
608 
609     PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(
610         Py_SIZE(o) - 2);
611     if (new == NULL) {
612         return NULL;
613     }
614 
615     uint32_t idx = hamt_bitindex(o->b_bitmap, bit);
616     uint32_t key_idx = 2 * idx;
617     uint32_t val_idx = key_idx + 1;
618     uint32_t i;
619 
620     for (i = 0; i < key_idx; i++) {
621         Py_XINCREF(o->b_array[i]);
622         new->b_array[i] = o->b_array[i];
623     }
624 
625     assert(Py_SIZE(o) >= 0 && Py_SIZE(o) <= 32);
626     for (i = val_idx + 1; i < (uint32_t)Py_SIZE(o); i++) {
627         Py_XINCREF(o->b_array[i]);
628         new->b_array[i - 2] = o->b_array[i];
629     }
630 
631     new->b_bitmap = o->b_bitmap & ~bit;
632     return new;
633 }
634 
635 static PyHamtNode *
hamt_node_new_bitmap_or_collision(uint32_t shift,PyObject * key1,PyObject * val1,int32_t key2_hash,PyObject * key2,PyObject * val2)636 hamt_node_new_bitmap_or_collision(uint32_t shift,
637                                   PyObject *key1, PyObject *val1,
638                                   int32_t key2_hash,
639                                   PyObject *key2, PyObject *val2)
640 {
641     /* Helper method.  Creates a new node for key1/val and key2/val2
642        pairs.
643 
644        If key1 hash is equal to the hash of key2, a Collision node
645        will be created.  If they are not equal, a Bitmap node is
646        created.
647     */
648 
649     int32_t key1_hash = hamt_hash(key1);
650     if (key1_hash == -1) {
651         return NULL;
652     }
653 
654     if (key1_hash == key2_hash) {
655         PyHamtNode_Collision *n;
656         n = (PyHamtNode_Collision *)hamt_node_collision_new(key1_hash, 4);
657         if (n == NULL) {
658             return NULL;
659         }
660 
661         Py_INCREF(key1);
662         n->c_array[0] = key1;
663         Py_INCREF(val1);
664         n->c_array[1] = val1;
665 
666         Py_INCREF(key2);
667         n->c_array[2] = key2;
668         Py_INCREF(val2);
669         n->c_array[3] = val2;
670 
671         return (PyHamtNode *)n;
672     }
673     else {
674         int added_leaf = 0;
675         PyHamtNode *n = hamt_node_bitmap_new(0);
676         if (n == NULL) {
677             return NULL;
678         }
679 
680         PyHamtNode *n2 = hamt_node_assoc(
681             n, shift, key1_hash, key1, val1, &added_leaf);
682         Py_DECREF(n);
683         if (n2 == NULL) {
684             return NULL;
685         }
686 
687         n = hamt_node_assoc(n2, shift, key2_hash, key2, val2, &added_leaf);
688         Py_DECREF(n2);
689         if (n == NULL) {
690             return NULL;
691         }
692 
693         return n;
694     }
695 }
696 
697 static PyHamtNode *
hamt_node_bitmap_assoc(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)698 hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self,
699                        uint32_t shift, int32_t hash,
700                        PyObject *key, PyObject *val, int* added_leaf)
701 {
702     /* assoc operation for bitmap nodes.
703 
704        Return: a new node, or self if key/val already is in the
705        collection.
706 
707        'added_leaf' is later used in '_PyHamt_Assoc' to determine if
708        `hamt.set(key, val)` increased the size of the collection.
709     */
710 
711     uint32_t bit = hamt_bitpos(hash, shift);
712     uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
713 
714     /* Bitmap node layout:
715 
716     +------+------+------+------+  ---  +------+------+
717     | key1 | val1 | key2 | val2 |  ...  | keyN | valN |
718     +------+------+------+------+  ---  +------+------+
719     where `N < Py_SIZE(node)`.
720 
721     The `node->b_bitmap` field is a bitmap.  For a given
722     `(shift, hash)` pair we can determine:
723 
724      - If this node has the corresponding key/val slots.
725      - The index of key/val slots.
726     */
727 
728     if (self->b_bitmap & bit) {
729         /* The key is set in this node */
730 
731         uint32_t key_idx = 2 * idx;
732         uint32_t val_idx = key_idx + 1;
733 
734         assert(val_idx < (size_t)Py_SIZE(self));
735 
736         PyObject *key_or_null = self->b_array[key_idx];
737         PyObject *val_or_node = self->b_array[val_idx];
738 
739         if (key_or_null == NULL) {
740             /* key is NULL.  This means that we have a few keys
741                that have the same (hash, shift) pair. */
742 
743             assert(val_or_node != NULL);
744 
745             PyHamtNode *sub_node = hamt_node_assoc(
746                 (PyHamtNode *)val_or_node,
747                 shift + 5, hash, key, val, added_leaf);
748             if (sub_node == NULL) {
749                 return NULL;
750             }
751 
752             if (val_or_node == (PyObject *)sub_node) {
753                 Py_DECREF(sub_node);
754                 Py_INCREF(self);
755                 return (PyHamtNode *)self;
756             }
757 
758             PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
759             if (ret == NULL) {
760                 return NULL;
761             }
762             Py_SETREF(ret->b_array[val_idx], (PyObject*)sub_node);
763             return (PyHamtNode *)ret;
764         }
765 
766         assert(key != NULL);
767         /* key is not NULL.  This means that we have only one other
768            key in this collection that matches our hash for this shift. */
769 
770         int comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
771         if (comp_err < 0) {  /* exception in __eq__ */
772             return NULL;
773         }
774         if (comp_err == 1) {  /* key == key_or_null */
775             if (val == val_or_node) {
776                 /* we already have the same key/val pair; return self. */
777                 Py_INCREF(self);
778                 return (PyHamtNode *)self;
779             }
780 
781             /* We're setting a new value for the key we had before.
782                Make a new bitmap node with a replaced value, and return it. */
783             PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
784             if (ret == NULL) {
785                 return NULL;
786             }
787             Py_INCREF(val);
788             Py_SETREF(ret->b_array[val_idx], val);
789             return (PyHamtNode *)ret;
790         }
791 
792         /* It's a new key, and it has the same index as *one* another key.
793            We have a collision.  We need to create a new node which will
794            combine the existing key and the key we're adding.
795 
796            `hamt_node_new_bitmap_or_collision` will either create a new
797            Collision node if the keys have identical hashes, or
798            a new Bitmap node.
799         */
800         PyHamtNode *sub_node = hamt_node_new_bitmap_or_collision(
801             shift + 5,
802             key_or_null, val_or_node,  /* existing key/val */
803             hash,
804             key, val  /* new key/val */
805         );
806         if (sub_node == NULL) {
807             return NULL;
808         }
809 
810         PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
811         if (ret == NULL) {
812             Py_DECREF(sub_node);
813             return NULL;
814         }
815         Py_SETREF(ret->b_array[key_idx], NULL);
816         Py_SETREF(ret->b_array[val_idx], (PyObject *)sub_node);
817 
818         *added_leaf = 1;
819         return (PyHamtNode *)ret;
820     }
821     else {
822         /* There was no key before with the same (shift,hash). */
823 
824         uint32_t n = hamt_bitcount(self->b_bitmap);
825 
826         if (n >= 16) {
827             /* When we have a situation where we want to store more
828                than 16 nodes at one level of the tree, we no longer
829                want to use the Bitmap node with bitmap encoding.
830 
831                Instead we start using an Array node, which has
832                simpler (faster) implementation at the expense of
833                having prealocated 32 pointers for its keys/values
834                pairs.
835 
836                Small hamt objects (<30 keys) usually don't have any
837                Array nodes at all.  Between ~30 and ~400 keys hamt
838                objects usually have one Array node, and usually it's
839                a root node.
840             */
841 
842             uint32_t jdx = hamt_mask(hash, shift);
843             /* 'jdx' is the index of where the new key should be added
844                in the new Array node we're about to create. */
845 
846             PyHamtNode *empty = NULL;
847             PyHamtNode_Array *new_node = NULL;
848             PyHamtNode *res = NULL;
849 
850             /* Create a new Array node. */
851             new_node = (PyHamtNode_Array *)hamt_node_array_new(n + 1);
852             if (new_node == NULL) {
853                 goto fin;
854             }
855 
856             /* Create an empty bitmap node for the next
857                hamt_node_assoc call. */
858             empty = hamt_node_bitmap_new(0);
859             if (empty == NULL) {
860                 goto fin;
861             }
862 
863             /* Make a new bitmap node for the key/val we're adding.
864                Set that bitmap node to new-array-node[jdx]. */
865             new_node->a_array[jdx] = hamt_node_assoc(
866                 empty, shift + 5, hash, key, val, added_leaf);
867             if (new_node->a_array[jdx] == NULL) {
868                 goto fin;
869             }
870 
871             /* Copy existing key/value pairs from the current Bitmap
872                node to the new Array node we've just created. */
873             Py_ssize_t i, j;
874             for (i = 0, j = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
875                 if (((self->b_bitmap >> i) & 1) != 0) {
876                     /* Ensure we don't accidentally override `jdx` element
877                        we set few lines above.
878                     */
879                     assert(new_node->a_array[i] == NULL);
880 
881                     if (self->b_array[j] == NULL) {
882                         new_node->a_array[i] =
883                             (PyHamtNode *)self->b_array[j + 1];
884                         Py_INCREF(new_node->a_array[i]);
885                     }
886                     else {
887                         int32_t rehash = hamt_hash(self->b_array[j]);
888                         if (rehash == -1) {
889                             goto fin;
890                         }
891 
892                         new_node->a_array[i] = hamt_node_assoc(
893                             empty, shift + 5,
894                             rehash,
895                             self->b_array[j],
896                             self->b_array[j + 1],
897                             added_leaf);
898 
899                         if (new_node->a_array[i] == NULL) {
900                             goto fin;
901                         }
902                     }
903                     j += 2;
904                 }
905             }
906 
907             VALIDATE_ARRAY_NODE(new_node)
908 
909             /* That's it! */
910             res = (PyHamtNode *)new_node;
911 
912         fin:
913             Py_XDECREF(empty);
914             if (res == NULL) {
915                 Py_XDECREF(new_node);
916             }
917             return res;
918         }
919         else {
920             /* We have less than 16 keys at this level; let's just
921                create a new bitmap node out of this node with the
922                new key/val pair added. */
923 
924             uint32_t key_idx = 2 * idx;
925             uint32_t val_idx = key_idx + 1;
926             uint32_t i;
927 
928             *added_leaf = 1;
929 
930             /* Allocate new Bitmap node which can have one more key/val
931                pair in addition to what we have already. */
932             PyHamtNode_Bitmap *new_node =
933                 (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2 * (n + 1));
934             if (new_node == NULL) {
935                 return NULL;
936             }
937 
938             /* Copy all keys/values that will be before the new key/value
939                we are adding. */
940             for (i = 0; i < key_idx; i++) {
941                 Py_XINCREF(self->b_array[i]);
942                 new_node->b_array[i] = self->b_array[i];
943             }
944 
945             /* Set the new key/value to the new Bitmap node. */
946             Py_INCREF(key);
947             new_node->b_array[key_idx] = key;
948             Py_INCREF(val);
949             new_node->b_array[val_idx] = val;
950 
951             /* Copy all keys/values that will be after the new key/value
952                we are adding. */
953             assert(Py_SIZE(self) >= 0 && Py_SIZE(self) <= 32);
954             for (i = key_idx; i < (uint32_t)Py_SIZE(self); i++) {
955                 Py_XINCREF(self->b_array[i]);
956                 new_node->b_array[i + 2] = self->b_array[i];
957             }
958 
959             new_node->b_bitmap = self->b_bitmap | bit;
960             return (PyHamtNode *)new_node;
961         }
962     }
963 }
964 
965 static hamt_without_t
hamt_node_bitmap_without(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)966 hamt_node_bitmap_without(PyHamtNode_Bitmap *self,
967                          uint32_t shift, int32_t hash,
968                          PyObject *key,
969                          PyHamtNode **new_node)
970 {
971     uint32_t bit = hamt_bitpos(hash, shift);
972     if ((self->b_bitmap & bit) == 0) {
973         return W_NOT_FOUND;
974     }
975 
976     uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
977 
978     uint32_t key_idx = 2 * idx;
979     uint32_t val_idx = key_idx + 1;
980 
981     PyObject *key_or_null = self->b_array[key_idx];
982     PyObject *val_or_node = self->b_array[val_idx];
983 
984     if (key_or_null == NULL) {
985         /* key == NULL means that 'value' is another tree node. */
986 
987         PyHamtNode *sub_node = NULL;
988 
989         hamt_without_t res = hamt_node_without(
990             (PyHamtNode *)val_or_node,
991             shift + 5, hash, key, &sub_node);
992 
993         switch (res) {
994             case W_EMPTY:
995                 /* It's impossible for us to receive a W_EMPTY here:
996 
997                     - Array nodes are converted to Bitmap nodes when
998                       we delete 16th item from them;
999 
1000                     - Collision nodes are converted to Bitmap when
1001                       there is one item in them;
1002 
1003                     - Bitmap node's without() inlines single-item
1004                       sub-nodes.
1005 
1006                    So in no situation we can have a single-item
1007                    Bitmap child of another Bitmap node.
1008                 */
1009                 Py_UNREACHABLE();
1010 
1011             case W_NEWNODE: {
1012                 assert(sub_node != NULL);
1013 
1014                 if (IS_BITMAP_NODE(sub_node)) {
1015                     PyHamtNode_Bitmap *sub_tree = (PyHamtNode_Bitmap *)sub_node;
1016                     if (hamt_node_bitmap_count(sub_tree) == 1 &&
1017                             sub_tree->b_array[0] != NULL)
1018                     {
1019                         /* A bitmap node with one key/value pair.  Just
1020                            merge it into this node.
1021 
1022                            Note that we don't inline Bitmap nodes that
1023                            have a NULL key -- those nodes point to another
1024                            tree level, and we cannot simply move tree levels
1025                            up or down.
1026                         */
1027 
1028                         PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1029                         if (clone == NULL) {
1030                             Py_DECREF(sub_node);
1031                             return W_ERROR;
1032                         }
1033 
1034                         PyObject *key = sub_tree->b_array[0];
1035                         PyObject *val = sub_tree->b_array[1];
1036 
1037                         Py_INCREF(key);
1038                         Py_XSETREF(clone->b_array[key_idx], key);
1039                         Py_INCREF(val);
1040                         Py_SETREF(clone->b_array[val_idx], val);
1041 
1042                         Py_DECREF(sub_tree);
1043 
1044                         *new_node = (PyHamtNode *)clone;
1045                         return W_NEWNODE;
1046                     }
1047                 }
1048 
1049 #ifdef Py_DEBUG
1050                 /* Ensure that Collision.without implementation
1051                    converts to Bitmap nodes itself.
1052                 */
1053                 if (IS_COLLISION_NODE(sub_node)) {
1054                     assert(hamt_node_collision_count(
1055                             (PyHamtNode_Collision*)sub_node) > 1);
1056                 }
1057 #endif
1058 
1059                 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1060                 if (clone == NULL) {
1061                     return W_ERROR;
1062                 }
1063 
1064                 Py_SETREF(clone->b_array[val_idx],
1065                           (PyObject *)sub_node);  /* borrow */
1066 
1067                 *new_node = (PyHamtNode *)clone;
1068                 return W_NEWNODE;
1069             }
1070 
1071             case W_ERROR:
1072             case W_NOT_FOUND:
1073                 assert(sub_node == NULL);
1074                 return res;
1075 
1076             default:
1077                 Py_UNREACHABLE();
1078         }
1079     }
1080     else {
1081         /* We have a regular key/value pair */
1082 
1083         int cmp = PyObject_RichCompareBool(key_or_null, key, Py_EQ);
1084         if (cmp < 0) {
1085             return W_ERROR;
1086         }
1087         if (cmp == 0) {
1088             return W_NOT_FOUND;
1089         }
1090 
1091         if (hamt_node_bitmap_count(self) == 1) {
1092             return W_EMPTY;
1093         }
1094 
1095         *new_node = (PyHamtNode *)
1096             hamt_node_bitmap_clone_without(self, bit);
1097         if (*new_node == NULL) {
1098             return W_ERROR;
1099         }
1100 
1101         return W_NEWNODE;
1102     }
1103 }
1104 
1105 static hamt_find_t
hamt_node_bitmap_find(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1106 hamt_node_bitmap_find(PyHamtNode_Bitmap *self,
1107                       uint32_t shift, int32_t hash,
1108                       PyObject *key, PyObject **val)
1109 {
1110     /* Lookup a key in a Bitmap node. */
1111 
1112     uint32_t bit = hamt_bitpos(hash, shift);
1113     uint32_t idx;
1114     uint32_t key_idx;
1115     uint32_t val_idx;
1116     PyObject *key_or_null;
1117     PyObject *val_or_node;
1118     int comp_err;
1119 
1120     if ((self->b_bitmap & bit) == 0) {
1121         return F_NOT_FOUND;
1122     }
1123 
1124     idx = hamt_bitindex(self->b_bitmap, bit);
1125     key_idx = idx * 2;
1126     val_idx = key_idx + 1;
1127 
1128     assert(val_idx < (size_t)Py_SIZE(self));
1129 
1130     key_or_null = self->b_array[key_idx];
1131     val_or_node = self->b_array[val_idx];
1132 
1133     if (key_or_null == NULL) {
1134         /* There are a few keys that have the same hash at the current shift
1135            that match our key.  Dispatch the lookup further down the tree. */
1136         assert(val_or_node != NULL);
1137         return hamt_node_find((PyHamtNode *)val_or_node,
1138                               shift + 5, hash, key, val);
1139     }
1140 
1141     /* We have only one key -- a potential match.  Let's compare if the
1142        key we are looking at is equal to the key we are looking for. */
1143     assert(key != NULL);
1144     comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
1145     if (comp_err < 0) {  /* exception in __eq__ */
1146         return F_ERROR;
1147     }
1148     if (comp_err == 1) {  /* key == key_or_null */
1149         *val = val_or_node;
1150         return F_FOUND;
1151     }
1152 
1153     return F_NOT_FOUND;
1154 }
1155 
1156 static int
hamt_node_bitmap_traverse(PyHamtNode_Bitmap * self,visitproc visit,void * arg)1157 hamt_node_bitmap_traverse(PyHamtNode_Bitmap *self, visitproc visit, void *arg)
1158 {
1159     /* Bitmap's tp_traverse */
1160 
1161     Py_ssize_t i;
1162 
1163     for (i = Py_SIZE(self); --i >= 0; ) {
1164         Py_VISIT(self->b_array[i]);
1165     }
1166 
1167     return 0;
1168 }
1169 
1170 static void
hamt_node_bitmap_dealloc(PyHamtNode_Bitmap * self)1171 hamt_node_bitmap_dealloc(PyHamtNode_Bitmap *self)
1172 {
1173     /* Bitmap's tp_dealloc */
1174 
1175     Py_ssize_t len = Py_SIZE(self);
1176     Py_ssize_t i;
1177 
1178     PyObject_GC_UnTrack(self);
1179     Py_TRASHCAN_BEGIN(self, hamt_node_bitmap_dealloc)
1180 
1181     if (len > 0) {
1182         i = len;
1183         while (--i >= 0) {
1184             Py_XDECREF(self->b_array[i]);
1185         }
1186     }
1187 
1188     Py_TYPE(self)->tp_free((PyObject *)self);
1189     Py_TRASHCAN_END
1190 }
1191 
1192 #ifdef Py_DEBUG
1193 static int
hamt_node_bitmap_dump(PyHamtNode_Bitmap * node,_PyUnicodeWriter * writer,int level)1194 hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
1195                       _PyUnicodeWriter *writer, int level)
1196 {
1197     /* Debug build: __dump__() method implementation for Bitmap nodes. */
1198 
1199     Py_ssize_t i;
1200     PyObject *tmp1;
1201     PyObject *tmp2;
1202 
1203     if (_hamt_dump_ident(writer, level + 1)) {
1204         goto error;
1205     }
1206 
1207     if (_hamt_dump_format(writer, "BitmapNode(size=%zd count=%zd ",
1208                           Py_SIZE(node), Py_SIZE(node) / 2))
1209     {
1210         goto error;
1211     }
1212 
1213     tmp1 = PyLong_FromUnsignedLong(node->b_bitmap);
1214     if (tmp1 == NULL) {
1215         goto error;
1216     }
1217     tmp2 = _PyLong_Format(tmp1, 2);
1218     Py_DECREF(tmp1);
1219     if (tmp2 == NULL) {
1220         goto error;
1221     }
1222     if (_hamt_dump_format(writer, "bitmap=%S id=%p):\n", tmp2, node)) {
1223         Py_DECREF(tmp2);
1224         goto error;
1225     }
1226     Py_DECREF(tmp2);
1227 
1228     for (i = 0; i < Py_SIZE(node); i += 2) {
1229         PyObject *key_or_null = node->b_array[i];
1230         PyObject *val_or_node = node->b_array[i + 1];
1231 
1232         if (_hamt_dump_ident(writer, level + 2)) {
1233             goto error;
1234         }
1235 
1236         if (key_or_null == NULL) {
1237             if (_hamt_dump_format(writer, "NULL:\n")) {
1238                 goto error;
1239             }
1240 
1241             if (hamt_node_dump((PyHamtNode *)val_or_node,
1242                                writer, level + 2))
1243             {
1244                 goto error;
1245             }
1246         }
1247         else {
1248             if (_hamt_dump_format(writer, "%R: %R", key_or_null,
1249                                   val_or_node))
1250             {
1251                 goto error;
1252             }
1253         }
1254 
1255         if (_hamt_dump_format(writer, "\n")) {
1256             goto error;
1257         }
1258     }
1259 
1260     return 0;
1261 error:
1262     return -1;
1263 }
1264 #endif  /* Py_DEBUG */
1265 
1266 
1267 /////////////////////////////////// Collision Node
1268 
1269 
1270 static PyHamtNode *
hamt_node_collision_new(int32_t hash,Py_ssize_t size)1271 hamt_node_collision_new(int32_t hash, Py_ssize_t size)
1272 {
1273     /* Create a new Collision node. */
1274 
1275     PyHamtNode_Collision *node;
1276     Py_ssize_t i;
1277 
1278     assert(size >= 4);
1279     assert(size % 2 == 0);
1280 
1281     node = PyObject_GC_NewVar(
1282         PyHamtNode_Collision, &_PyHamt_CollisionNode_Type, size);
1283     if (node == NULL) {
1284         return NULL;
1285     }
1286 
1287     for (i = 0; i < size; i++) {
1288         node->c_array[i] = NULL;
1289     }
1290 
1291     Py_SIZE(node) = size;
1292     node->c_hash = hash;
1293 
1294     _PyObject_GC_TRACK(node);
1295 
1296     return (PyHamtNode *)node;
1297 }
1298 
1299 static hamt_find_t
hamt_node_collision_find_index(PyHamtNode_Collision * self,PyObject * key,Py_ssize_t * idx)1300 hamt_node_collision_find_index(PyHamtNode_Collision *self, PyObject *key,
1301                                Py_ssize_t *idx)
1302 {
1303     /* Lookup `key` in the Collision node `self`.  Set the index of the
1304        found key to 'idx'. */
1305 
1306     Py_ssize_t i;
1307     PyObject *el;
1308 
1309     for (i = 0; i < Py_SIZE(self); i += 2) {
1310         el = self->c_array[i];
1311 
1312         assert(el != NULL);
1313         int cmp = PyObject_RichCompareBool(key, el, Py_EQ);
1314         if (cmp < 0) {
1315             return F_ERROR;
1316         }
1317         if (cmp == 1) {
1318             *idx = i;
1319             return F_FOUND;
1320         }
1321     }
1322 
1323     return F_NOT_FOUND;
1324 }
1325 
1326 static PyHamtNode *
hamt_node_collision_assoc(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)1327 hamt_node_collision_assoc(PyHamtNode_Collision *self,
1328                           uint32_t shift, int32_t hash,
1329                           PyObject *key, PyObject *val, int* added_leaf)
1330 {
1331     /* Set a new key to this level (currently a Collision node)
1332        of the tree. */
1333 
1334     if (hash == self->c_hash) {
1335         /* The hash of the 'key' we are adding matches the hash of
1336            other keys in this Collision node. */
1337 
1338         Py_ssize_t key_idx = -1;
1339         hamt_find_t found;
1340         PyHamtNode_Collision *new_node;
1341         Py_ssize_t i;
1342 
1343         /* Let's try to lookup the new 'key', maybe we already have it. */
1344         found = hamt_node_collision_find_index(self, key, &key_idx);
1345         switch (found) {
1346             case F_ERROR:
1347                 /* Exception. */
1348                 return NULL;
1349 
1350             case F_NOT_FOUND:
1351                 /* This is a totally new key.  Clone the current node,
1352                    add a new key/value to the cloned node. */
1353 
1354                 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1355                     self->c_hash, Py_SIZE(self) + 2);
1356                 if (new_node == NULL) {
1357                     return NULL;
1358                 }
1359 
1360                 for (i = 0; i < Py_SIZE(self); i++) {
1361                     Py_INCREF(self->c_array[i]);
1362                     new_node->c_array[i] = self->c_array[i];
1363                 }
1364 
1365                 Py_INCREF(key);
1366                 new_node->c_array[i] = key;
1367                 Py_INCREF(val);
1368                 new_node->c_array[i + 1] = val;
1369 
1370                 *added_leaf = 1;
1371                 return (PyHamtNode *)new_node;
1372 
1373             case F_FOUND:
1374                 /* There's a key which is equal to the key we are adding. */
1375 
1376                 assert(key_idx >= 0);
1377                 assert(key_idx < Py_SIZE(self));
1378                 Py_ssize_t val_idx = key_idx + 1;
1379 
1380                 if (self->c_array[val_idx] == val) {
1381                     /* We're setting a key/value pair that's already set. */
1382                     Py_INCREF(self);
1383                     return (PyHamtNode *)self;
1384                 }
1385 
1386                 /* We need to replace old value for the key
1387                    with a new value.  Create a new Collision node.*/
1388                 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1389                     self->c_hash, Py_SIZE(self));
1390                 if (new_node == NULL) {
1391                     return NULL;
1392                 }
1393 
1394                 /* Copy all elements of the old node to the new one. */
1395                 for (i = 0; i < Py_SIZE(self); i++) {
1396                     Py_INCREF(self->c_array[i]);
1397                     new_node->c_array[i] = self->c_array[i];
1398                 }
1399 
1400                 /* Replace the old value with the new value for the our key. */
1401                 Py_DECREF(new_node->c_array[val_idx]);
1402                 Py_INCREF(val);
1403                 new_node->c_array[val_idx] = val;
1404 
1405                 return (PyHamtNode *)new_node;
1406 
1407             default:
1408                 Py_UNREACHABLE();
1409         }
1410     }
1411     else {
1412         /* The hash of the new key is different from the hash that
1413            all keys of this Collision node have.
1414 
1415            Create a Bitmap node inplace with two children:
1416            key/value pair that we're adding, and the Collision node
1417            we're replacing on this tree level.
1418         */
1419 
1420         PyHamtNode_Bitmap *new_node;
1421         PyHamtNode *assoc_res;
1422 
1423         new_node = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2);
1424         if (new_node == NULL) {
1425             return NULL;
1426         }
1427         new_node->b_bitmap = hamt_bitpos(self->c_hash, shift);
1428         Py_INCREF(self);
1429         new_node->b_array[1] = (PyObject*) self;
1430 
1431         assoc_res = hamt_node_bitmap_assoc(
1432             new_node, shift, hash, key, val, added_leaf);
1433         Py_DECREF(new_node);
1434         return assoc_res;
1435     }
1436 }
1437 
1438 static inline Py_ssize_t
hamt_node_collision_count(PyHamtNode_Collision * node)1439 hamt_node_collision_count(PyHamtNode_Collision *node)
1440 {
1441     return Py_SIZE(node) / 2;
1442 }
1443 
1444 static hamt_without_t
hamt_node_collision_without(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)1445 hamt_node_collision_without(PyHamtNode_Collision *self,
1446                             uint32_t shift, int32_t hash,
1447                             PyObject *key,
1448                             PyHamtNode **new_node)
1449 {
1450     if (hash != self->c_hash) {
1451         return W_NOT_FOUND;
1452     }
1453 
1454     Py_ssize_t key_idx = -1;
1455     hamt_find_t found = hamt_node_collision_find_index(self, key, &key_idx);
1456 
1457     switch (found) {
1458         case F_ERROR:
1459             return W_ERROR;
1460 
1461         case F_NOT_FOUND:
1462             return W_NOT_FOUND;
1463 
1464         case F_FOUND:
1465             assert(key_idx >= 0);
1466             assert(key_idx < Py_SIZE(self));
1467 
1468             Py_ssize_t new_count = hamt_node_collision_count(self) - 1;
1469 
1470             if (new_count == 0) {
1471                 /* The node has only one key/value pair and it's for the
1472                    key we're trying to delete.  So a new node will be empty
1473                    after the removal.
1474                 */
1475                 return W_EMPTY;
1476             }
1477 
1478             if (new_count == 1) {
1479                 /* The node has two keys, and after deletion the
1480                    new Collision node would have one.  Collision nodes
1481                    with one key shouldn't exist, so convert it to a
1482                    Bitmap node.
1483                 */
1484                 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)
1485                     hamt_node_bitmap_new(2);
1486                 if (node == NULL) {
1487                     return W_ERROR;
1488                 }
1489 
1490                 if (key_idx == 0) {
1491                     Py_INCREF(self->c_array[2]);
1492                     node->b_array[0] = self->c_array[2];
1493                     Py_INCREF(self->c_array[3]);
1494                     node->b_array[1] = self->c_array[3];
1495                 }
1496                 else {
1497                     assert(key_idx == 2);
1498                     Py_INCREF(self->c_array[0]);
1499                     node->b_array[0] = self->c_array[0];
1500                     Py_INCREF(self->c_array[1]);
1501                     node->b_array[1] = self->c_array[1];
1502                 }
1503 
1504                 node->b_bitmap = hamt_bitpos(hash, shift);
1505 
1506                 *new_node = (PyHamtNode *)node;
1507                 return W_NEWNODE;
1508             }
1509 
1510             /* Allocate a new Collision node with capacity for one
1511                less key/value pair */
1512             PyHamtNode_Collision *new = (PyHamtNode_Collision *)
1513                 hamt_node_collision_new(
1514                     self->c_hash, Py_SIZE(self) - 2);
1515             if (new == NULL) {
1516                 return W_ERROR;
1517             }
1518 
1519             /* Copy all other keys from `self` to `new` */
1520             Py_ssize_t i;
1521             for (i = 0; i < key_idx; i++) {
1522                 Py_INCREF(self->c_array[i]);
1523                 new->c_array[i] = self->c_array[i];
1524             }
1525             for (i = key_idx + 2; i < Py_SIZE(self); i++) {
1526                 Py_INCREF(self->c_array[i]);
1527                 new->c_array[i - 2] = self->c_array[i];
1528             }
1529 
1530             *new_node = (PyHamtNode*)new;
1531             return W_NEWNODE;
1532 
1533         default:
1534             Py_UNREACHABLE();
1535     }
1536 }
1537 
1538 static hamt_find_t
hamt_node_collision_find(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1539 hamt_node_collision_find(PyHamtNode_Collision *self,
1540                          uint32_t shift, int32_t hash,
1541                          PyObject *key, PyObject **val)
1542 {
1543     /* Lookup `key` in the Collision node `self`.  Set the value
1544        for the found key to 'val'. */
1545 
1546     Py_ssize_t idx = -1;
1547     hamt_find_t res;
1548 
1549     res = hamt_node_collision_find_index(self, key, &idx);
1550     if (res == F_ERROR || res == F_NOT_FOUND) {
1551         return res;
1552     }
1553 
1554     assert(idx >= 0);
1555     assert(idx + 1 < Py_SIZE(self));
1556 
1557     *val = self->c_array[idx + 1];
1558     assert(*val != NULL);
1559 
1560     return F_FOUND;
1561 }
1562 
1563 
1564 static int
hamt_node_collision_traverse(PyHamtNode_Collision * self,visitproc visit,void * arg)1565 hamt_node_collision_traverse(PyHamtNode_Collision *self,
1566                              visitproc visit, void *arg)
1567 {
1568     /* Collision's tp_traverse */
1569 
1570     Py_ssize_t i;
1571 
1572     for (i = Py_SIZE(self); --i >= 0; ) {
1573         Py_VISIT(self->c_array[i]);
1574     }
1575 
1576     return 0;
1577 }
1578 
1579 static void
hamt_node_collision_dealloc(PyHamtNode_Collision * self)1580 hamt_node_collision_dealloc(PyHamtNode_Collision *self)
1581 {
1582     /* Collision's tp_dealloc */
1583 
1584     Py_ssize_t len = Py_SIZE(self);
1585 
1586     PyObject_GC_UnTrack(self);
1587     Py_TRASHCAN_BEGIN(self, hamt_node_collision_dealloc)
1588 
1589     if (len > 0) {
1590 
1591         while (--len >= 0) {
1592             Py_XDECREF(self->c_array[len]);
1593         }
1594     }
1595 
1596     Py_TYPE(self)->tp_free((PyObject *)self);
1597     Py_TRASHCAN_END
1598 }
1599 
1600 #ifdef Py_DEBUG
1601 static int
hamt_node_collision_dump(PyHamtNode_Collision * node,_PyUnicodeWriter * writer,int level)1602 hamt_node_collision_dump(PyHamtNode_Collision *node,
1603                          _PyUnicodeWriter *writer, int level)
1604 {
1605     /* Debug build: __dump__() method implementation for Collision nodes. */
1606 
1607     Py_ssize_t i;
1608 
1609     if (_hamt_dump_ident(writer, level + 1)) {
1610         goto error;
1611     }
1612 
1613     if (_hamt_dump_format(writer, "CollisionNode(size=%zd id=%p):\n",
1614                           Py_SIZE(node), node))
1615     {
1616         goto error;
1617     }
1618 
1619     for (i = 0; i < Py_SIZE(node); i += 2) {
1620         PyObject *key = node->c_array[i];
1621         PyObject *val = node->c_array[i + 1];
1622 
1623         if (_hamt_dump_ident(writer, level + 2)) {
1624             goto error;
1625         }
1626 
1627         if (_hamt_dump_format(writer, "%R: %R\n", key, val)) {
1628             goto error;
1629         }
1630     }
1631 
1632     return 0;
1633 error:
1634     return -1;
1635 }
1636 #endif  /* Py_DEBUG */
1637 
1638 
1639 /////////////////////////////////// Array Node
1640 
1641 
1642 static PyHamtNode *
hamt_node_array_new(Py_ssize_t count)1643 hamt_node_array_new(Py_ssize_t count)
1644 {
1645     Py_ssize_t i;
1646 
1647     PyHamtNode_Array *node = PyObject_GC_New(
1648         PyHamtNode_Array, &_PyHamt_ArrayNode_Type);
1649     if (node == NULL) {
1650         return NULL;
1651     }
1652 
1653     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1654         node->a_array[i] = NULL;
1655     }
1656 
1657     node->a_count = count;
1658 
1659     _PyObject_GC_TRACK(node);
1660     return (PyHamtNode *)node;
1661 }
1662 
1663 static PyHamtNode_Array *
hamt_node_array_clone(PyHamtNode_Array * node)1664 hamt_node_array_clone(PyHamtNode_Array *node)
1665 {
1666     PyHamtNode_Array *clone;
1667     Py_ssize_t i;
1668 
1669     VALIDATE_ARRAY_NODE(node)
1670 
1671     /* Create a new Array node. */
1672     clone = (PyHamtNode_Array *)hamt_node_array_new(node->a_count);
1673     if (clone == NULL) {
1674         return NULL;
1675     }
1676 
1677     /* Copy all elements from the current Array node to the new one. */
1678     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1679         Py_XINCREF(node->a_array[i]);
1680         clone->a_array[i] = node->a_array[i];
1681     }
1682 
1683     VALIDATE_ARRAY_NODE(clone)
1684     return clone;
1685 }
1686 
1687 static PyHamtNode *
hamt_node_array_assoc(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)1688 hamt_node_array_assoc(PyHamtNode_Array *self,
1689                       uint32_t shift, int32_t hash,
1690                       PyObject *key, PyObject *val, int* added_leaf)
1691 {
1692     /* Set a new key to this level (currently a Collision node)
1693        of the tree.
1694 
1695        Array nodes don't store values, they can only point to
1696        other nodes.  They are simple arrays of 32 BaseNode pointers/
1697      */
1698 
1699     uint32_t idx = hamt_mask(hash, shift);
1700     PyHamtNode *node = self->a_array[idx];
1701     PyHamtNode *child_node;
1702     PyHamtNode_Array *new_node;
1703     Py_ssize_t i;
1704 
1705     if (node == NULL) {
1706         /* There's no child node for the given hash.  Create a new
1707            Bitmap node for this key. */
1708 
1709         PyHamtNode_Bitmap *empty = NULL;
1710 
1711         /* Get an empty Bitmap node to work with. */
1712         empty = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(0);
1713         if (empty == NULL) {
1714             return NULL;
1715         }
1716 
1717         /* Set key/val to the newly created empty Bitmap, thus
1718            creating a new Bitmap node with our key/value pair. */
1719         child_node = hamt_node_bitmap_assoc(
1720             empty,
1721             shift + 5, hash, key, val, added_leaf);
1722         Py_DECREF(empty);
1723         if (child_node == NULL) {
1724             return NULL;
1725         }
1726 
1727         /* Create a new Array node. */
1728         new_node = (PyHamtNode_Array *)hamt_node_array_new(self->a_count + 1);
1729         if (new_node == NULL) {
1730             Py_DECREF(child_node);
1731             return NULL;
1732         }
1733 
1734         /* Copy all elements from the current Array node to the
1735            new one. */
1736         for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1737             Py_XINCREF(self->a_array[i]);
1738             new_node->a_array[i] = self->a_array[i];
1739         }
1740 
1741         assert(new_node->a_array[idx] == NULL);
1742         new_node->a_array[idx] = child_node;  /* borrow */
1743         VALIDATE_ARRAY_NODE(new_node)
1744     }
1745     else {
1746         /* There's a child node for the given hash.
1747            Set the key to it./ */
1748         child_node = hamt_node_assoc(
1749             node, shift + 5, hash, key, val, added_leaf);
1750         if (child_node == NULL) {
1751             return NULL;
1752         }
1753         else if (child_node == (PyHamtNode *)self) {
1754             Py_DECREF(child_node);
1755             return (PyHamtNode *)self;
1756         }
1757 
1758         new_node = hamt_node_array_clone(self);
1759         if (new_node == NULL) {
1760             Py_DECREF(child_node);
1761             return NULL;
1762         }
1763 
1764         Py_SETREF(new_node->a_array[idx], child_node);  /* borrow */
1765         VALIDATE_ARRAY_NODE(new_node)
1766     }
1767 
1768     return (PyHamtNode *)new_node;
1769 }
1770 
1771 static hamt_without_t
hamt_node_array_without(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)1772 hamt_node_array_without(PyHamtNode_Array *self,
1773                         uint32_t shift, int32_t hash,
1774                         PyObject *key,
1775                         PyHamtNode **new_node)
1776 {
1777     uint32_t idx = hamt_mask(hash, shift);
1778     PyHamtNode *node = self->a_array[idx];
1779 
1780     if (node == NULL) {
1781         return W_NOT_FOUND;
1782     }
1783 
1784     PyHamtNode *sub_node = NULL;
1785     hamt_without_t res = hamt_node_without(
1786         (PyHamtNode *)node,
1787         shift + 5, hash, key, &sub_node);
1788 
1789     switch (res) {
1790         case W_NOT_FOUND:
1791         case W_ERROR:
1792             assert(sub_node == NULL);
1793             return res;
1794 
1795         case W_NEWNODE: {
1796             /* We need to replace a node at the `idx` index.
1797                Clone this node and replace.
1798             */
1799             assert(sub_node != NULL);
1800 
1801             PyHamtNode_Array *clone = hamt_node_array_clone(self);
1802             if (clone == NULL) {
1803                 Py_DECREF(sub_node);
1804                 return W_ERROR;
1805             }
1806 
1807             Py_SETREF(clone->a_array[idx], sub_node);  /* borrow */
1808             *new_node = (PyHamtNode*)clone;  /* borrow */
1809             return W_NEWNODE;
1810         }
1811 
1812         case W_EMPTY: {
1813             assert(sub_node == NULL);
1814             /* We need to remove a node at the `idx` index.
1815                Calculate the size of the replacement Array node.
1816             */
1817             Py_ssize_t new_count = self->a_count - 1;
1818 
1819             if (new_count == 0) {
1820                 return W_EMPTY;
1821             }
1822 
1823             if (new_count >= 16) {
1824                 /* We convert Bitmap nodes to Array nodes, when a
1825                    Bitmap node needs to store more than 15 key/value
1826                    pairs.  So we will create a new Array node if we
1827                    the number of key/values after deletion is still
1828                    greater than 15.
1829                 */
1830 
1831                 PyHamtNode_Array *new = hamt_node_array_clone(self);
1832                 if (new == NULL) {
1833                     return W_ERROR;
1834                 }
1835                 new->a_count = new_count;
1836                 Py_CLEAR(new->a_array[idx]);
1837 
1838                 *new_node = (PyHamtNode*)new;  /* borrow */
1839                 return W_NEWNODE;
1840             }
1841 
1842             /* New Array node would have less than 16 key/value
1843                pairs.  We need to create a replacement Bitmap node. */
1844 
1845             Py_ssize_t bitmap_size = new_count * 2;
1846             uint32_t bitmap = 0;
1847 
1848             PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)
1849                 hamt_node_bitmap_new(bitmap_size);
1850             if (new == NULL) {
1851                 return W_ERROR;
1852             }
1853 
1854             Py_ssize_t new_i = 0;
1855             for (uint32_t i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1856                 if (i == idx) {
1857                     /* Skip the node we are deleting. */
1858                     continue;
1859                 }
1860 
1861                 PyHamtNode *node = self->a_array[i];
1862                 if (node == NULL) {
1863                     /* Skip any missing nodes. */
1864                     continue;
1865                 }
1866 
1867                 bitmap |= 1 << i;
1868 
1869                 if (IS_BITMAP_NODE(node)) {
1870                     PyHamtNode_Bitmap *child = (PyHamtNode_Bitmap *)node;
1871 
1872                     if (hamt_node_bitmap_count(child) == 1 &&
1873                             child->b_array[0] != NULL)
1874                     {
1875                         /* node is a Bitmap with one key/value pair, just
1876                            merge it into the new Bitmap node we're building.
1877 
1878                            Note that we don't inline Bitmap nodes that
1879                            have a NULL key -- those nodes point to another
1880                            tree level, and we cannot simply move tree levels
1881                            up or down.
1882                         */
1883                         PyObject *key = child->b_array[0];
1884                         PyObject *val = child->b_array[1];
1885 
1886                         Py_INCREF(key);
1887                         new->b_array[new_i] = key;
1888                         Py_INCREF(val);
1889                         new->b_array[new_i + 1] = val;
1890                     }
1891                     else {
1892                         new->b_array[new_i] = NULL;
1893                         Py_INCREF(node);
1894                         new->b_array[new_i + 1] = (PyObject*)node;
1895                     }
1896                 }
1897                 else {
1898 
1899 #ifdef Py_DEBUG
1900                     if (IS_COLLISION_NODE(node)) {
1901                         Py_ssize_t child_count = hamt_node_collision_count(
1902                             (PyHamtNode_Collision*)node);
1903                         assert(child_count > 1);
1904                     }
1905                     else if (IS_ARRAY_NODE(node)) {
1906                         assert(((PyHamtNode_Array*)node)->a_count >= 16);
1907                     }
1908 #endif
1909 
1910                     /* Just copy the node into our new Bitmap */
1911                     new->b_array[new_i] = NULL;
1912                     Py_INCREF(node);
1913                     new->b_array[new_i + 1] = (PyObject*)node;
1914                 }
1915 
1916                 new_i += 2;
1917             }
1918 
1919             new->b_bitmap = bitmap;
1920             *new_node = (PyHamtNode*)new;  /* borrow */
1921             return W_NEWNODE;
1922         }
1923 
1924         default:
1925             Py_UNREACHABLE();
1926     }
1927 }
1928 
1929 static hamt_find_t
hamt_node_array_find(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1930 hamt_node_array_find(PyHamtNode_Array *self,
1931                      uint32_t shift, int32_t hash,
1932                      PyObject *key, PyObject **val)
1933 {
1934     /* Lookup `key` in the Array node `self`.  Set the value
1935        for the found key to 'val'. */
1936 
1937     uint32_t idx = hamt_mask(hash, shift);
1938     PyHamtNode *node;
1939 
1940     node = self->a_array[idx];
1941     if (node == NULL) {
1942         return F_NOT_FOUND;
1943     }
1944 
1945     /* Dispatch to the generic hamt_node_find */
1946     return hamt_node_find(node, shift + 5, hash, key, val);
1947 }
1948 
1949 static int
hamt_node_array_traverse(PyHamtNode_Array * self,visitproc visit,void * arg)1950 hamt_node_array_traverse(PyHamtNode_Array *self,
1951                          visitproc visit, void *arg)
1952 {
1953     /* Array's tp_traverse */
1954 
1955     Py_ssize_t i;
1956 
1957     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1958         Py_VISIT(self->a_array[i]);
1959     }
1960 
1961     return 0;
1962 }
1963 
1964 static void
hamt_node_array_dealloc(PyHamtNode_Array * self)1965 hamt_node_array_dealloc(PyHamtNode_Array *self)
1966 {
1967     /* Array's tp_dealloc */
1968 
1969     Py_ssize_t i;
1970 
1971     PyObject_GC_UnTrack(self);
1972     Py_TRASHCAN_BEGIN(self, hamt_node_array_dealloc)
1973 
1974     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1975         Py_XDECREF(self->a_array[i]);
1976     }
1977 
1978     Py_TYPE(self)->tp_free((PyObject *)self);
1979     Py_TRASHCAN_END
1980 }
1981 
1982 #ifdef Py_DEBUG
1983 static int
hamt_node_array_dump(PyHamtNode_Array * node,_PyUnicodeWriter * writer,int level)1984 hamt_node_array_dump(PyHamtNode_Array *node,
1985                      _PyUnicodeWriter *writer, int level)
1986 {
1987     /* Debug build: __dump__() method implementation for Array nodes. */
1988 
1989     Py_ssize_t i;
1990 
1991     if (_hamt_dump_ident(writer, level + 1)) {
1992         goto error;
1993     }
1994 
1995     if (_hamt_dump_format(writer, "ArrayNode(id=%p):\n", node)) {
1996         goto error;
1997     }
1998 
1999     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
2000         if (node->a_array[i] == NULL) {
2001             continue;
2002         }
2003 
2004         if (_hamt_dump_ident(writer, level + 2)) {
2005             goto error;
2006         }
2007 
2008         if (_hamt_dump_format(writer, "%zd::\n", i)) {
2009             goto error;
2010         }
2011 
2012         if (hamt_node_dump(node->a_array[i], writer, level + 1)) {
2013             goto error;
2014         }
2015 
2016         if (_hamt_dump_format(writer, "\n")) {
2017             goto error;
2018         }
2019     }
2020 
2021     return 0;
2022 error:
2023     return -1;
2024 }
2025 #endif  /* Py_DEBUG */
2026 
2027 
2028 /////////////////////////////////// Node Dispatch
2029 
2030 
2031 static PyHamtNode *
hamt_node_assoc(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)2032 hamt_node_assoc(PyHamtNode *node,
2033                 uint32_t shift, int32_t hash,
2034                 PyObject *key, PyObject *val, int* added_leaf)
2035 {
2036     /* Set key/value to the 'node' starting with the given shift/hash.
2037        Return a new node, or the same node if key/value already
2038        set.
2039 
2040        added_leaf will be set to 1 if key/value wasn't in the
2041        tree before.
2042 
2043        This method automatically dispatches to the suitable
2044        hamt_node_{nodetype}_assoc method.
2045     */
2046 
2047     if (IS_BITMAP_NODE(node)) {
2048         return hamt_node_bitmap_assoc(
2049             (PyHamtNode_Bitmap *)node,
2050             shift, hash, key, val, added_leaf);
2051     }
2052     else if (IS_ARRAY_NODE(node)) {
2053         return hamt_node_array_assoc(
2054             (PyHamtNode_Array *)node,
2055             shift, hash, key, val, added_leaf);
2056     }
2057     else {
2058         assert(IS_COLLISION_NODE(node));
2059         return hamt_node_collision_assoc(
2060             (PyHamtNode_Collision *)node,
2061             shift, hash, key, val, added_leaf);
2062     }
2063 }
2064 
2065 static hamt_without_t
hamt_node_without(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)2066 hamt_node_without(PyHamtNode *node,
2067                   uint32_t shift, int32_t hash,
2068                   PyObject *key,
2069                   PyHamtNode **new_node)
2070 {
2071     if (IS_BITMAP_NODE(node)) {
2072         return hamt_node_bitmap_without(
2073             (PyHamtNode_Bitmap *)node,
2074             shift, hash, key,
2075             new_node);
2076     }
2077     else if (IS_ARRAY_NODE(node)) {
2078         return hamt_node_array_without(
2079             (PyHamtNode_Array *)node,
2080             shift, hash, key,
2081             new_node);
2082     }
2083     else {
2084         assert(IS_COLLISION_NODE(node));
2085         return hamt_node_collision_without(
2086             (PyHamtNode_Collision *)node,
2087             shift, hash, key,
2088             new_node);
2089     }
2090 }
2091 
2092 static hamt_find_t
hamt_node_find(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)2093 hamt_node_find(PyHamtNode *node,
2094                uint32_t shift, int32_t hash,
2095                PyObject *key, PyObject **val)
2096 {
2097     /* Find the key in the node starting with the given shift/hash.
2098 
2099        If a value is found, the result will be set to F_FOUND, and
2100        *val will point to the found value object.
2101 
2102        If a value wasn't found, the result will be set to F_NOT_FOUND.
2103 
2104        If an exception occurs during the call, the result will be F_ERROR.
2105 
2106        This method automatically dispatches to the suitable
2107        hamt_node_{nodetype}_find method.
2108     */
2109 
2110     if (IS_BITMAP_NODE(node)) {
2111         return hamt_node_bitmap_find(
2112             (PyHamtNode_Bitmap *)node,
2113             shift, hash, key, val);
2114 
2115     }
2116     else if (IS_ARRAY_NODE(node)) {
2117         return hamt_node_array_find(
2118             (PyHamtNode_Array *)node,
2119             shift, hash, key, val);
2120     }
2121     else {
2122         assert(IS_COLLISION_NODE(node));
2123         return hamt_node_collision_find(
2124             (PyHamtNode_Collision *)node,
2125             shift, hash, key, val);
2126     }
2127 }
2128 
2129 #ifdef Py_DEBUG
2130 static int
hamt_node_dump(PyHamtNode * node,_PyUnicodeWriter * writer,int level)2131 hamt_node_dump(PyHamtNode *node,
2132                _PyUnicodeWriter *writer, int level)
2133 {
2134     /* Debug build: __dump__() method implementation for a node.
2135 
2136        This method automatically dispatches to the suitable
2137        hamt_node_{nodetype})_dump method.
2138     */
2139 
2140     if (IS_BITMAP_NODE(node)) {
2141         return hamt_node_bitmap_dump(
2142             (PyHamtNode_Bitmap *)node, writer, level);
2143     }
2144     else if (IS_ARRAY_NODE(node)) {
2145         return hamt_node_array_dump(
2146             (PyHamtNode_Array *)node, writer, level);
2147     }
2148     else {
2149         assert(IS_COLLISION_NODE(node));
2150         return hamt_node_collision_dump(
2151             (PyHamtNode_Collision *)node, writer, level);
2152     }
2153 }
2154 #endif  /* Py_DEBUG */
2155 
2156 
2157 /////////////////////////////////// Iterators: Machinery
2158 
2159 
2160 static hamt_iter_t
2161 hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val);
2162 
2163 
2164 static void
hamt_iterator_init(PyHamtIteratorState * iter,PyHamtNode * root)2165 hamt_iterator_init(PyHamtIteratorState *iter, PyHamtNode *root)
2166 {
2167     for (uint32_t i = 0; i < _Py_HAMT_MAX_TREE_DEPTH; i++) {
2168         iter->i_nodes[i] = NULL;
2169         iter->i_pos[i] = 0;
2170     }
2171 
2172     iter->i_level = 0;
2173 
2174     /* Note: we don't incref/decref nodes in i_nodes. */
2175     iter->i_nodes[0] = root;
2176 }
2177 
2178 static hamt_iter_t
hamt_iterator_bitmap_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2179 hamt_iterator_bitmap_next(PyHamtIteratorState *iter,
2180                           PyObject **key, PyObject **val)
2181 {
2182     int8_t level = iter->i_level;
2183 
2184     PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)(iter->i_nodes[level]);
2185     Py_ssize_t pos = iter->i_pos[level];
2186 
2187     if (pos + 1 >= Py_SIZE(node)) {
2188 #ifdef Py_DEBUG
2189         assert(iter->i_level >= 0);
2190         iter->i_nodes[iter->i_level] = NULL;
2191 #endif
2192         iter->i_level--;
2193         return hamt_iterator_next(iter, key, val);
2194     }
2195 
2196     if (node->b_array[pos] == NULL) {
2197         iter->i_pos[level] = pos + 2;
2198 
2199         int8_t next_level = level + 1;
2200         assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2201         iter->i_level = next_level;
2202         iter->i_pos[next_level] = 0;
2203         iter->i_nodes[next_level] = (PyHamtNode *)
2204             node->b_array[pos + 1];
2205 
2206         return hamt_iterator_next(iter, key, val);
2207     }
2208 
2209     *key = node->b_array[pos];
2210     *val = node->b_array[pos + 1];
2211     iter->i_pos[level] = pos + 2;
2212     return I_ITEM;
2213 }
2214 
2215 static hamt_iter_t
hamt_iterator_collision_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2216 hamt_iterator_collision_next(PyHamtIteratorState *iter,
2217                              PyObject **key, PyObject **val)
2218 {
2219     int8_t level = iter->i_level;
2220 
2221     PyHamtNode_Collision *node = (PyHamtNode_Collision *)(iter->i_nodes[level]);
2222     Py_ssize_t pos = iter->i_pos[level];
2223 
2224     if (pos + 1 >= Py_SIZE(node)) {
2225 #ifdef Py_DEBUG
2226         assert(iter->i_level >= 0);
2227         iter->i_nodes[iter->i_level] = NULL;
2228 #endif
2229         iter->i_level--;
2230         return hamt_iterator_next(iter, key, val);
2231     }
2232 
2233     *key = node->c_array[pos];
2234     *val = node->c_array[pos + 1];
2235     iter->i_pos[level] = pos + 2;
2236     return I_ITEM;
2237 }
2238 
2239 static hamt_iter_t
hamt_iterator_array_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2240 hamt_iterator_array_next(PyHamtIteratorState *iter,
2241                          PyObject **key, PyObject **val)
2242 {
2243     int8_t level = iter->i_level;
2244 
2245     PyHamtNode_Array *node = (PyHamtNode_Array *)(iter->i_nodes[level]);
2246     Py_ssize_t pos = iter->i_pos[level];
2247 
2248     if (pos >= HAMT_ARRAY_NODE_SIZE) {
2249 #ifdef Py_DEBUG
2250         assert(iter->i_level >= 0);
2251         iter->i_nodes[iter->i_level] = NULL;
2252 #endif
2253         iter->i_level--;
2254         return hamt_iterator_next(iter, key, val);
2255     }
2256 
2257     for (Py_ssize_t i = pos; i < HAMT_ARRAY_NODE_SIZE; i++) {
2258         if (node->a_array[i] != NULL) {
2259             iter->i_pos[level] = i + 1;
2260 
2261             int8_t next_level = level + 1;
2262             assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2263             iter->i_pos[next_level] = 0;
2264             iter->i_nodes[next_level] = node->a_array[i];
2265             iter->i_level = next_level;
2266 
2267             return hamt_iterator_next(iter, key, val);
2268         }
2269     }
2270 
2271 #ifdef Py_DEBUG
2272         assert(iter->i_level >= 0);
2273         iter->i_nodes[iter->i_level] = NULL;
2274 #endif
2275 
2276     iter->i_level--;
2277     return hamt_iterator_next(iter, key, val);
2278 }
2279 
2280 static hamt_iter_t
hamt_iterator_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2281 hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val)
2282 {
2283     if (iter->i_level < 0) {
2284         return I_END;
2285     }
2286 
2287     assert(iter->i_level < _Py_HAMT_MAX_TREE_DEPTH);
2288 
2289     PyHamtNode *current = iter->i_nodes[iter->i_level];
2290 
2291     if (IS_BITMAP_NODE(current)) {
2292         return hamt_iterator_bitmap_next(iter, key, val);
2293     }
2294     else if (IS_ARRAY_NODE(current)) {
2295         return hamt_iterator_array_next(iter, key, val);
2296     }
2297     else {
2298         assert(IS_COLLISION_NODE(current));
2299         return hamt_iterator_collision_next(iter, key, val);
2300     }
2301 }
2302 
2303 
2304 /////////////////////////////////// HAMT high-level functions
2305 
2306 
2307 PyHamtObject *
_PyHamt_Assoc(PyHamtObject * o,PyObject * key,PyObject * val)2308 _PyHamt_Assoc(PyHamtObject *o, PyObject *key, PyObject *val)
2309 {
2310     int32_t key_hash;
2311     int added_leaf = 0;
2312     PyHamtNode *new_root;
2313     PyHamtObject *new_o;
2314 
2315     key_hash = hamt_hash(key);
2316     if (key_hash == -1) {
2317         return NULL;
2318     }
2319 
2320     new_root = hamt_node_assoc(
2321         (PyHamtNode *)(o->h_root),
2322         0, key_hash, key, val, &added_leaf);
2323     if (new_root == NULL) {
2324         return NULL;
2325     }
2326 
2327     if (new_root == o->h_root) {
2328         Py_DECREF(new_root);
2329         Py_INCREF(o);
2330         return o;
2331     }
2332 
2333     new_o = hamt_alloc();
2334     if (new_o == NULL) {
2335         Py_DECREF(new_root);
2336         return NULL;
2337     }
2338 
2339     new_o->h_root = new_root;  /* borrow */
2340     new_o->h_count = added_leaf ? o->h_count + 1 : o->h_count;
2341 
2342     return new_o;
2343 }
2344 
2345 PyHamtObject *
_PyHamt_Without(PyHamtObject * o,PyObject * key)2346 _PyHamt_Without(PyHamtObject *o, PyObject *key)
2347 {
2348     int32_t key_hash = hamt_hash(key);
2349     if (key_hash == -1) {
2350         return NULL;
2351     }
2352 
2353     PyHamtNode *new_root = NULL;
2354 
2355     hamt_without_t res = hamt_node_without(
2356         (PyHamtNode *)(o->h_root),
2357         0, key_hash, key,
2358         &new_root);
2359 
2360     switch (res) {
2361         case W_ERROR:
2362             return NULL;
2363         case W_EMPTY:
2364             return _PyHamt_New();
2365         case W_NOT_FOUND:
2366             Py_INCREF(o);
2367             return o;
2368         case W_NEWNODE: {
2369             assert(new_root != NULL);
2370 
2371             PyHamtObject *new_o = hamt_alloc();
2372             if (new_o == NULL) {
2373                 Py_DECREF(new_root);
2374                 return NULL;
2375             }
2376 
2377             new_o->h_root = new_root;  /* borrow */
2378             new_o->h_count = o->h_count - 1;
2379             assert(new_o->h_count >= 0);
2380             return new_o;
2381         }
2382         default:
2383             Py_UNREACHABLE();
2384     }
2385 }
2386 
2387 static hamt_find_t
hamt_find(PyHamtObject * o,PyObject * key,PyObject ** val)2388 hamt_find(PyHamtObject *o, PyObject *key, PyObject **val)
2389 {
2390     if (o->h_count == 0) {
2391         return F_NOT_FOUND;
2392     }
2393 
2394     int32_t key_hash = hamt_hash(key);
2395     if (key_hash == -1) {
2396         return F_ERROR;
2397     }
2398 
2399     return hamt_node_find(o->h_root, 0, key_hash, key, val);
2400 }
2401 
2402 
2403 int
_PyHamt_Find(PyHamtObject * o,PyObject * key,PyObject ** val)2404 _PyHamt_Find(PyHamtObject *o, PyObject *key, PyObject **val)
2405 {
2406     hamt_find_t res = hamt_find(o, key, val);
2407     switch (res) {
2408         case F_ERROR:
2409             return -1;
2410         case F_NOT_FOUND:
2411             return 0;
2412         case F_FOUND:
2413             return 1;
2414         default:
2415             Py_UNREACHABLE();
2416     }
2417 }
2418 
2419 
2420 int
_PyHamt_Eq(PyHamtObject * v,PyHamtObject * w)2421 _PyHamt_Eq(PyHamtObject *v, PyHamtObject *w)
2422 {
2423     if (v == w) {
2424         return 1;
2425     }
2426 
2427     if (v->h_count != w->h_count) {
2428         return 0;
2429     }
2430 
2431     PyHamtIteratorState iter;
2432     hamt_iter_t iter_res;
2433     hamt_find_t find_res;
2434     PyObject *v_key;
2435     PyObject *v_val;
2436     PyObject *w_val;
2437 
2438     hamt_iterator_init(&iter, v->h_root);
2439 
2440     do {
2441         iter_res = hamt_iterator_next(&iter, &v_key, &v_val);
2442         if (iter_res == I_ITEM) {
2443             find_res = hamt_find(w, v_key, &w_val);
2444             switch (find_res) {
2445                 case F_ERROR:
2446                     return -1;
2447 
2448                 case F_NOT_FOUND:
2449                     return 0;
2450 
2451                 case F_FOUND: {
2452                     int cmp = PyObject_RichCompareBool(v_val, w_val, Py_EQ);
2453                     if (cmp < 0) {
2454                         return -1;
2455                     }
2456                     if (cmp == 0) {
2457                         return 0;
2458                     }
2459                 }
2460             }
2461         }
2462     } while (iter_res != I_END);
2463 
2464     return 1;
2465 }
2466 
2467 Py_ssize_t
_PyHamt_Len(PyHamtObject * o)2468 _PyHamt_Len(PyHamtObject *o)
2469 {
2470     return o->h_count;
2471 }
2472 
2473 static PyHamtObject *
hamt_alloc(void)2474 hamt_alloc(void)
2475 {
2476     PyHamtObject *o;
2477     o = PyObject_GC_New(PyHamtObject, &_PyHamt_Type);
2478     if (o == NULL) {
2479         return NULL;
2480     }
2481     o->h_count = 0;
2482     o->h_root = NULL;
2483     o->h_weakreflist = NULL;
2484     PyObject_GC_Track(o);
2485     return o;
2486 }
2487 
2488 PyHamtObject *
_PyHamt_New(void)2489 _PyHamt_New(void)
2490 {
2491     if (_empty_hamt != NULL) {
2492         /* HAMT is an immutable object so we can easily cache an
2493            empty instance. */
2494         Py_INCREF(_empty_hamt);
2495         return _empty_hamt;
2496     }
2497 
2498     PyHamtObject *o = hamt_alloc();
2499     if (o == NULL) {
2500         return NULL;
2501     }
2502 
2503     o->h_root = hamt_node_bitmap_new(0);
2504     if (o->h_root == NULL) {
2505         Py_DECREF(o);
2506         return NULL;
2507     }
2508 
2509     o->h_count = 0;
2510 
2511     if (_empty_hamt == NULL) {
2512         Py_INCREF(o);
2513         _empty_hamt = o;
2514     }
2515 
2516     return o;
2517 }
2518 
2519 #ifdef Py_DEBUG
2520 static PyObject *
hamt_dump(PyHamtObject * self)2521 hamt_dump(PyHamtObject *self)
2522 {
2523     _PyUnicodeWriter writer;
2524 
2525     _PyUnicodeWriter_Init(&writer);
2526 
2527     if (_hamt_dump_format(&writer, "HAMT(len=%zd):\n", self->h_count)) {
2528         goto error;
2529     }
2530 
2531     if (hamt_node_dump(self->h_root, &writer, 0)) {
2532         goto error;
2533     }
2534 
2535     return _PyUnicodeWriter_Finish(&writer);
2536 
2537 error:
2538     _PyUnicodeWriter_Dealloc(&writer);
2539     return NULL;
2540 }
2541 #endif  /* Py_DEBUG */
2542 
2543 
2544 /////////////////////////////////// Iterators: Shared Iterator Implementation
2545 
2546 
2547 static int
hamt_baseiter_tp_clear(PyHamtIterator * it)2548 hamt_baseiter_tp_clear(PyHamtIterator *it)
2549 {
2550     Py_CLEAR(it->hi_obj);
2551     return 0;
2552 }
2553 
2554 static void
hamt_baseiter_tp_dealloc(PyHamtIterator * it)2555 hamt_baseiter_tp_dealloc(PyHamtIterator *it)
2556 {
2557     PyObject_GC_UnTrack(it);
2558     (void)hamt_baseiter_tp_clear(it);
2559     PyObject_GC_Del(it);
2560 }
2561 
2562 static int
hamt_baseiter_tp_traverse(PyHamtIterator * it,visitproc visit,void * arg)2563 hamt_baseiter_tp_traverse(PyHamtIterator *it, visitproc visit, void *arg)
2564 {
2565     Py_VISIT(it->hi_obj);
2566     return 0;
2567 }
2568 
2569 static PyObject *
hamt_baseiter_tp_iternext(PyHamtIterator * it)2570 hamt_baseiter_tp_iternext(PyHamtIterator *it)
2571 {
2572     PyObject *key;
2573     PyObject *val;
2574     hamt_iter_t res = hamt_iterator_next(&it->hi_iter, &key, &val);
2575 
2576     switch (res) {
2577         case I_END:
2578             PyErr_SetNone(PyExc_StopIteration);
2579             return NULL;
2580 
2581         case I_ITEM: {
2582             return (*(it->hi_yield))(key, val);
2583         }
2584 
2585         default: {
2586             Py_UNREACHABLE();
2587         }
2588     }
2589 }
2590 
2591 static Py_ssize_t
hamt_baseiter_tp_len(PyHamtIterator * it)2592 hamt_baseiter_tp_len(PyHamtIterator *it)
2593 {
2594     return it->hi_obj->h_count;
2595 }
2596 
2597 static PyMappingMethods PyHamtIterator_as_mapping = {
2598     (lenfunc)hamt_baseiter_tp_len,
2599 };
2600 
2601 static PyObject *
hamt_baseiter_new(PyTypeObject * type,binaryfunc yield,PyHamtObject * o)2602 hamt_baseiter_new(PyTypeObject *type, binaryfunc yield, PyHamtObject *o)
2603 {
2604     PyHamtIterator *it = PyObject_GC_New(PyHamtIterator, type);
2605     if (it == NULL) {
2606         return NULL;
2607     }
2608 
2609     Py_INCREF(o);
2610     it->hi_obj = o;
2611     it->hi_yield = yield;
2612 
2613     hamt_iterator_init(&it->hi_iter, o->h_root);
2614 
2615     return (PyObject*)it;
2616 }
2617 
2618 #define ITERATOR_TYPE_SHARED_SLOTS                              \
2619     .tp_basicsize = sizeof(PyHamtIterator),                     \
2620     .tp_itemsize = 0,                                           \
2621     .tp_as_mapping = &PyHamtIterator_as_mapping,                \
2622     .tp_dealloc = (destructor)hamt_baseiter_tp_dealloc,         \
2623     .tp_getattro = PyObject_GenericGetAttr,                     \
2624     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,        \
2625     .tp_traverse = (traverseproc)hamt_baseiter_tp_traverse,     \
2626     .tp_clear = (inquiry)hamt_baseiter_tp_clear,                \
2627     .tp_iter = PyObject_SelfIter,                               \
2628     .tp_iternext = (iternextfunc)hamt_baseiter_tp_iternext,
2629 
2630 
2631 /////////////////////////////////// _PyHamtItems_Type
2632 
2633 
2634 PyTypeObject _PyHamtItems_Type = {
2635     PyVarObject_HEAD_INIT(NULL, 0)
2636     "items",
2637     ITERATOR_TYPE_SHARED_SLOTS
2638 };
2639 
2640 static PyObject *
hamt_iter_yield_items(PyObject * key,PyObject * val)2641 hamt_iter_yield_items(PyObject *key, PyObject *val)
2642 {
2643     return PyTuple_Pack(2, key, val);
2644 }
2645 
2646 PyObject *
_PyHamt_NewIterItems(PyHamtObject * o)2647 _PyHamt_NewIterItems(PyHamtObject *o)
2648 {
2649     return hamt_baseiter_new(
2650         &_PyHamtItems_Type, hamt_iter_yield_items, o);
2651 }
2652 
2653 
2654 /////////////////////////////////// _PyHamtKeys_Type
2655 
2656 
2657 PyTypeObject _PyHamtKeys_Type = {
2658     PyVarObject_HEAD_INIT(NULL, 0)
2659     "keys",
2660     ITERATOR_TYPE_SHARED_SLOTS
2661 };
2662 
2663 static PyObject *
hamt_iter_yield_keys(PyObject * key,PyObject * val)2664 hamt_iter_yield_keys(PyObject *key, PyObject *val)
2665 {
2666     Py_INCREF(key);
2667     return key;
2668 }
2669 
2670 PyObject *
_PyHamt_NewIterKeys(PyHamtObject * o)2671 _PyHamt_NewIterKeys(PyHamtObject *o)
2672 {
2673     return hamt_baseiter_new(
2674         &_PyHamtKeys_Type, hamt_iter_yield_keys, o);
2675 }
2676 
2677 
2678 /////////////////////////////////// _PyHamtValues_Type
2679 
2680 
2681 PyTypeObject _PyHamtValues_Type = {
2682     PyVarObject_HEAD_INIT(NULL, 0)
2683     "values",
2684     ITERATOR_TYPE_SHARED_SLOTS
2685 };
2686 
2687 static PyObject *
hamt_iter_yield_values(PyObject * key,PyObject * val)2688 hamt_iter_yield_values(PyObject *key, PyObject *val)
2689 {
2690     Py_INCREF(val);
2691     return val;
2692 }
2693 
2694 PyObject *
_PyHamt_NewIterValues(PyHamtObject * o)2695 _PyHamt_NewIterValues(PyHamtObject *o)
2696 {
2697     return hamt_baseiter_new(
2698         &_PyHamtValues_Type, hamt_iter_yield_values, o);
2699 }
2700 
2701 
2702 /////////////////////////////////// _PyHamt_Type
2703 
2704 
2705 #ifdef Py_DEBUG
2706 static PyObject *
2707 hamt_dump(PyHamtObject *self);
2708 #endif
2709 
2710 
2711 static PyObject *
hamt_tp_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2712 hamt_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2713 {
2714     return (PyObject*)_PyHamt_New();
2715 }
2716 
2717 static int
hamt_tp_clear(PyHamtObject * self)2718 hamt_tp_clear(PyHamtObject *self)
2719 {
2720     Py_CLEAR(self->h_root);
2721     return 0;
2722 }
2723 
2724 
2725 static int
hamt_tp_traverse(PyHamtObject * self,visitproc visit,void * arg)2726 hamt_tp_traverse(PyHamtObject *self, visitproc visit, void *arg)
2727 {
2728     Py_VISIT(self->h_root);
2729     return 0;
2730 }
2731 
2732 static void
hamt_tp_dealloc(PyHamtObject * self)2733 hamt_tp_dealloc(PyHamtObject *self)
2734 {
2735     PyObject_GC_UnTrack(self);
2736     if (self->h_weakreflist != NULL) {
2737         PyObject_ClearWeakRefs((PyObject*)self);
2738     }
2739     (void)hamt_tp_clear(self);
2740     Py_TYPE(self)->tp_free(self);
2741 }
2742 
2743 
2744 static PyObject *
hamt_tp_richcompare(PyObject * v,PyObject * w,int op)2745 hamt_tp_richcompare(PyObject *v, PyObject *w, int op)
2746 {
2747     if (!PyHamt_Check(v) || !PyHamt_Check(w) || (op != Py_EQ && op != Py_NE)) {
2748         Py_RETURN_NOTIMPLEMENTED;
2749     }
2750 
2751     int res = _PyHamt_Eq((PyHamtObject *)v, (PyHamtObject *)w);
2752     if (res < 0) {
2753         return NULL;
2754     }
2755 
2756     if (op == Py_NE) {
2757         res = !res;
2758     }
2759 
2760     if (res) {
2761         Py_RETURN_TRUE;
2762     }
2763     else {
2764         Py_RETURN_FALSE;
2765     }
2766 }
2767 
2768 static int
hamt_tp_contains(PyHamtObject * self,PyObject * key)2769 hamt_tp_contains(PyHamtObject *self, PyObject *key)
2770 {
2771     PyObject *val;
2772     return _PyHamt_Find(self, key, &val);
2773 }
2774 
2775 static PyObject *
hamt_tp_subscript(PyHamtObject * self,PyObject * key)2776 hamt_tp_subscript(PyHamtObject *self, PyObject *key)
2777 {
2778     PyObject *val;
2779     hamt_find_t res = hamt_find(self, key, &val);
2780     switch (res) {
2781         case F_ERROR:
2782             return NULL;
2783         case F_FOUND:
2784             Py_INCREF(val);
2785             return val;
2786         case F_NOT_FOUND:
2787             PyErr_SetObject(PyExc_KeyError, key);
2788             return NULL;
2789         default:
2790             Py_UNREACHABLE();
2791     }
2792 }
2793 
2794 static Py_ssize_t
hamt_tp_len(PyHamtObject * self)2795 hamt_tp_len(PyHamtObject *self)
2796 {
2797     return _PyHamt_Len(self);
2798 }
2799 
2800 static PyObject *
hamt_tp_iter(PyHamtObject * self)2801 hamt_tp_iter(PyHamtObject *self)
2802 {
2803     return _PyHamt_NewIterKeys(self);
2804 }
2805 
2806 static PyObject *
hamt_py_set(PyHamtObject * self,PyObject * args)2807 hamt_py_set(PyHamtObject *self, PyObject *args)
2808 {
2809     PyObject *key;
2810     PyObject *val;
2811 
2812     if (!PyArg_UnpackTuple(args, "set", 2, 2, &key, &val)) {
2813         return NULL;
2814     }
2815 
2816     return (PyObject *)_PyHamt_Assoc(self, key, val);
2817 }
2818 
2819 static PyObject *
hamt_py_get(PyHamtObject * self,PyObject * args)2820 hamt_py_get(PyHamtObject *self, PyObject *args)
2821 {
2822     PyObject *key;
2823     PyObject *def = NULL;
2824 
2825     if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &def)) {
2826         return NULL;
2827     }
2828 
2829     PyObject *val = NULL;
2830     hamt_find_t res = hamt_find(self, key, &val);
2831     switch (res) {
2832         case F_ERROR:
2833             return NULL;
2834         case F_FOUND:
2835             Py_INCREF(val);
2836             return val;
2837         case F_NOT_FOUND:
2838             if (def == NULL) {
2839                 Py_RETURN_NONE;
2840             }
2841             Py_INCREF(def);
2842             return def;
2843         default:
2844             Py_UNREACHABLE();
2845     }
2846 }
2847 
2848 static PyObject *
hamt_py_delete(PyHamtObject * self,PyObject * key)2849 hamt_py_delete(PyHamtObject *self, PyObject *key)
2850 {
2851     return (PyObject *)_PyHamt_Without(self, key);
2852 }
2853 
2854 static PyObject *
hamt_py_items(PyHamtObject * self,PyObject * args)2855 hamt_py_items(PyHamtObject *self, PyObject *args)
2856 {
2857     return _PyHamt_NewIterItems(self);
2858 }
2859 
2860 static PyObject *
hamt_py_values(PyHamtObject * self,PyObject * args)2861 hamt_py_values(PyHamtObject *self, PyObject *args)
2862 {
2863     return _PyHamt_NewIterValues(self);
2864 }
2865 
2866 static PyObject *
hamt_py_keys(PyHamtObject * self,PyObject * args)2867 hamt_py_keys(PyHamtObject *self, PyObject *args)
2868 {
2869     return _PyHamt_NewIterKeys(self);
2870 }
2871 
2872 #ifdef Py_DEBUG
2873 static PyObject *
hamt_py_dump(PyHamtObject * self,PyObject * args)2874 hamt_py_dump(PyHamtObject *self, PyObject *args)
2875 {
2876     return hamt_dump(self);
2877 }
2878 #endif
2879 
2880 
2881 static PyMethodDef PyHamt_methods[] = {
2882     {"set", (PyCFunction)hamt_py_set, METH_VARARGS, NULL},
2883     {"get", (PyCFunction)hamt_py_get, METH_VARARGS, NULL},
2884     {"delete", (PyCFunction)hamt_py_delete, METH_O, NULL},
2885     {"items", (PyCFunction)hamt_py_items, METH_NOARGS, NULL},
2886     {"keys", (PyCFunction)hamt_py_keys, METH_NOARGS, NULL},
2887     {"values", (PyCFunction)hamt_py_values, METH_NOARGS, NULL},
2888 #ifdef Py_DEBUG
2889     {"__dump__", (PyCFunction)hamt_py_dump, METH_NOARGS, NULL},
2890 #endif
2891     {NULL, NULL}
2892 };
2893 
2894 static PySequenceMethods PyHamt_as_sequence = {
2895     0,                                /* sq_length */
2896     0,                                /* sq_concat */
2897     0,                                /* sq_repeat */
2898     0,                                /* sq_item */
2899     0,                                /* sq_slice */
2900     0,                                /* sq_ass_item */
2901     0,                                /* sq_ass_slice */
2902     (objobjproc)hamt_tp_contains,     /* sq_contains */
2903     0,                                /* sq_inplace_concat */
2904     0,                                /* sq_inplace_repeat */
2905 };
2906 
2907 static PyMappingMethods PyHamt_as_mapping = {
2908     (lenfunc)hamt_tp_len,             /* mp_length */
2909     (binaryfunc)hamt_tp_subscript,    /* mp_subscript */
2910 };
2911 
2912 PyTypeObject _PyHamt_Type = {
2913     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2914     "hamt",
2915     sizeof(PyHamtObject),
2916     .tp_methods = PyHamt_methods,
2917     .tp_as_mapping = &PyHamt_as_mapping,
2918     .tp_as_sequence = &PyHamt_as_sequence,
2919     .tp_iter = (getiterfunc)hamt_tp_iter,
2920     .tp_dealloc = (destructor)hamt_tp_dealloc,
2921     .tp_getattro = PyObject_GenericGetAttr,
2922     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2923     .tp_richcompare = hamt_tp_richcompare,
2924     .tp_traverse = (traverseproc)hamt_tp_traverse,
2925     .tp_clear = (inquiry)hamt_tp_clear,
2926     .tp_new = hamt_tp_new,
2927     .tp_weaklistoffset = offsetof(PyHamtObject, h_weakreflist),
2928     .tp_hash = PyObject_HashNotImplemented,
2929 };
2930 
2931 
2932 /////////////////////////////////// Tree Node Types
2933 
2934 
2935 PyTypeObject _PyHamt_ArrayNode_Type = {
2936     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2937     "hamt_array_node",
2938     sizeof(PyHamtNode_Array),
2939     0,
2940     .tp_dealloc = (destructor)hamt_node_array_dealloc,
2941     .tp_getattro = PyObject_GenericGetAttr,
2942     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2943     .tp_traverse = (traverseproc)hamt_node_array_traverse,
2944     .tp_free = PyObject_GC_Del,
2945     .tp_hash = PyObject_HashNotImplemented,
2946 };
2947 
2948 PyTypeObject _PyHamt_BitmapNode_Type = {
2949     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2950     "hamt_bitmap_node",
2951     sizeof(PyHamtNode_Bitmap) - sizeof(PyObject *),
2952     sizeof(PyObject *),
2953     .tp_dealloc = (destructor)hamt_node_bitmap_dealloc,
2954     .tp_getattro = PyObject_GenericGetAttr,
2955     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2956     .tp_traverse = (traverseproc)hamt_node_bitmap_traverse,
2957     .tp_free = PyObject_GC_Del,
2958     .tp_hash = PyObject_HashNotImplemented,
2959 };
2960 
2961 PyTypeObject _PyHamt_CollisionNode_Type = {
2962     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2963     "hamt_collision_node",
2964     sizeof(PyHamtNode_Collision) - sizeof(PyObject *),
2965     sizeof(PyObject *),
2966     .tp_dealloc = (destructor)hamt_node_collision_dealloc,
2967     .tp_getattro = PyObject_GenericGetAttr,
2968     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2969     .tp_traverse = (traverseproc)hamt_node_collision_traverse,
2970     .tp_free = PyObject_GC_Del,
2971     .tp_hash = PyObject_HashNotImplemented,
2972 };
2973 
2974 
2975 int
_PyHamt_Init(void)2976 _PyHamt_Init(void)
2977 {
2978     if ((PyType_Ready(&_PyHamt_Type) < 0) ||
2979         (PyType_Ready(&_PyHamt_ArrayNode_Type) < 0) ||
2980         (PyType_Ready(&_PyHamt_BitmapNode_Type) < 0) ||
2981         (PyType_Ready(&_PyHamt_CollisionNode_Type) < 0) ||
2982         (PyType_Ready(&_PyHamtKeys_Type) < 0) ||
2983         (PyType_Ready(&_PyHamtValues_Type) < 0) ||
2984         (PyType_Ready(&_PyHamtItems_Type) < 0))
2985     {
2986         return 0;
2987     }
2988 
2989     return 1;
2990 }
2991 
2992 void
_PyHamt_Fini(void)2993 _PyHamt_Fini(void)
2994 {
2995     Py_CLEAR(_empty_hamt);
2996     Py_CLEAR(_empty_bitmap_node);
2997 }
2998