• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 
3 #include "pycore_bitutils.h"      // _Py_popcount32
4 #include "pycore_hamt.h"
5 #include "pycore_object.h"        // _PyObject_GC_TRACK()
6 #include <stddef.h>               // offsetof()
7 
8 /*
9 This file provides an implementation of an immutable mapping using the
10 Hash Array Mapped Trie (or HAMT) datastructure.
11 
12 This design allows to have:
13 
14 1. Efficient copy: immutable mappings can be copied by reference,
15    making it an O(1) operation.
16 
17 2. Efficient mutations: due to structural sharing, only a portion of
18    the trie needs to be copied when the collection is mutated.  The
19    cost of set/delete operations is O(log N).
20 
21 3. Efficient lookups: O(log N).
22 
23 (where N is number of key/value items in the immutable mapping.)
24 
25 
26 HAMT
27 ====
28 
29 The core idea of HAMT is that the shape of the trie is encoded into the
30 hashes of keys.
31 
32 Say we want to store a K/V pair in our mapping.  First, we calculate the
33 hash of K, let's say it's 19830128, or in binary:
34 
35     0b1001011101001010101110000 = 19830128
36 
37 Now let's partition this bit representation of the hash into blocks of
38 5 bits each:
39 
40     0b00_00000_10010_11101_00101_01011_10000 = 19830128
41           (6)   (5)   (4)   (3)   (2)   (1)
42 
43 Each block of 5 bits represents a number between 0 and 31.  So if we have
44 a tree that consists of nodes, each of which is an array of 32 pointers,
45 those 5-bit blocks will encode a position on a single tree level.
46 
47 For example, storing the key K with hash 19830128, results in the following
48 tree structure:
49 
50                      (array of 32 pointers)
51                      +---+ -- +----+----+----+ -- +----+
52   root node          | 0 | .. | 15 | 16 | 17 | .. | 31 |   0b10000 = 16 (1)
53   (level 1)          +---+ -- +----+----+----+ -- +----+
54                                       |
55                      +---+ -- +----+----+----+ -- +----+
56   a 2nd level node   | 0 | .. | 10 | 11 | 12 | .. | 31 |   0b01011 = 11 (2)
57                      +---+ -- +----+----+----+ -- +----+
58                                       |
59                      +---+ -- +----+----+----+ -- +----+
60   a 3rd level node   | 0 | .. | 04 | 05 | 06 | .. | 31 |   0b00101 = 5  (3)
61                      +---+ -- +----+----+----+ -- +----+
62                                       |
63                      +---+ -- +----+----+----+----+
64   a 4th level node   | 0 | .. | 04 | 29 | 30 | 31 |        0b11101 = 29 (4)
65                      +---+ -- +----+----+----+----+
66                                       |
67                      +---+ -- +----+----+----+ -- +----+
68   a 5th level node   | 0 | .. | 17 | 18 | 19 | .. | 31 |   0b10010 = 18 (5)
69                      +---+ -- +----+----+----+ -- +----+
70                                       |
71                        +--------------+
72                        |
73                      +---+ -- +----+----+----+ -- +----+
74   a 6th level node   | 0 | .. | 15 | 16 | 17 | .. | 31 |   0b00000 = 0  (6)
75                      +---+ -- +----+----+----+ -- +----+
76                        |
77                        V -- our value (or collision)
78 
79 To rehash: for a K/V pair, the hash of K encodes where in the tree V will
80 be stored.
81 
82 To optimize memory footprint and handle hash collisions, our implementation
83 uses three different types of nodes:
84 
85  * A Bitmap node;
86  * An Array node;
87  * A Collision node.
88 
89 Because we implement an immutable dictionary, our nodes are also
90 immutable.  Therefore, when we need to modify a node, we copy it, and
91 do that modification to the copy.
92 
93 
94 Array Nodes
95 -----------
96 
97 These nodes are very simple.  Essentially they are arrays of 32 pointers
98 we used to illustrate the high-level idea in the previous section.
99 
100 We use Array nodes only when we need to store more than 16 pointers
101 in a single node.
102 
103 Array nodes do not store key objects or value objects.  They are used
104 only as an indirection level - their pointers point to other nodes in
105 the tree.
106 
107 
108 Bitmap Node
109 -----------
110 
111 Allocating a new 32-pointers array for every node of our tree would be
112 very expensive.  Unless we store millions of keys, most of tree nodes would
113 be very sparse.
114 
115 When we have less than 16 elements in a node, we don't want to use the
116 Array node, that would mean that we waste a lot of memory.  Instead,
117 we can use bitmap compression and can have just as many pointers
118 as we need!
119 
120 Bitmap nodes consist of two fields:
121 
122 1. An array of pointers.  If a Bitmap node holds N elements, the
123    array will be of N pointers.
124 
125 2. A 32bit integer -- a bitmap field.  If an N-th bit is set in the
126    bitmap, it means that the node has an N-th element.
127 
128 For example, say we need to store a 3 elements sparse array:
129 
130    +---+  --  +---+  --  +----+  --  +----+
131    | 0 |  ..  | 4 |  ..  | 11 |  ..  | 17 |
132    +---+  --  +---+  --  +----+  --  +----+
133                 |          |           |
134                 o1         o2          o3
135 
136 We allocate a three-pointer Bitmap node.  Its bitmap field will be
137 then set to:
138 
139    0b_00100_00010_00000_10000 == (1 << 17) | (1 << 11) | (1 << 4)
140 
141 To check if our Bitmap node has an I-th element we can do:
142 
143    bitmap & (1 << I)
144 
145 
146 And here's a formula to calculate a position in our pointer array
147 which would correspond to an I-th element:
148 
149    popcount(bitmap & ((1 << I) - 1))
150 
151 
152 Let's break it down:
153 
154  * `popcount` is a function that returns a number of bits set to 1;
155 
156  * `((1 << I) - 1)` is a mask to filter the bitmask to contain bits
157    set to the *right* of our bit.
158 
159 
160 So for our 17, 11, and 4 indexes:
161 
162  * bitmap & ((1 << 17) - 1) == 0b100000010000 => 2 bits are set => index is 2.
163 
164  * bitmap & ((1 << 11) - 1) == 0b10000 => 1 bit is set => index is 1.
165 
166  * bitmap & ((1 << 4) - 1) == 0b0 => 0 bits are set => index is 0.
167 
168 
169 To conclude: Bitmap nodes are just like Array nodes -- they can store
170 a number of pointers, but use bitmap compression to eliminate unused
171 pointers.
172 
173 
174 Bitmap nodes have two pointers for each item:
175 
176   +----+----+----+----+  --  +----+----+
177   | k1 | v1 | k2 | v2 |  ..  | kN | vN |
178   +----+----+----+----+  --  +----+----+
179 
180 When kI == NULL, vI points to another tree level.
181 
182 When kI != NULL, the actual key object is stored in kI, and its
183 value is stored in vI.
184 
185 
186 Collision Nodes
187 ---------------
188 
189 Collision nodes are simple arrays of pointers -- two pointers per
190 key/value.  When there's a hash collision, say for k1/v1 and k2/v2
191 we have `hash(k1)==hash(k2)`.  Then our collision node will be:
192 
193   +----+----+----+----+
194   | k1 | v1 | k2 | v2 |
195   +----+----+----+----+
196 
197 
198 Tree Structure
199 --------------
200 
201 All nodes are PyObjects.
202 
203 The `PyHamtObject` object has a pointer to the root node (h_root),
204 and has a length field (h_count).
205 
206 High-level functions accept a PyHamtObject object and dispatch to
207 lower-level functions depending on what kind of node h_root points to.
208 
209 
210 Operations
211 ==========
212 
213 There are three fundamental operations on an immutable dictionary:
214 
215 1. "o.assoc(k, v)" will return a new immutable dictionary, that will be
216    a copy of "o", but with the "k/v" item set.
217 
218    Functions in this file:
219 
220         hamt_node_assoc, hamt_node_bitmap_assoc,
221         hamt_node_array_assoc, hamt_node_collision_assoc
222 
223    `hamt_node_assoc` function accepts a node object, and calls
224    other functions depending on its actual type.
225 
226 2. "o.find(k)" will lookup key "k" in "o".
227 
228    Functions:
229 
230         hamt_node_find, hamt_node_bitmap_find,
231         hamt_node_array_find, hamt_node_collision_find
232 
233 3. "o.without(k)" will return a new immutable dictionary, that will be
234    a copy of "o", buth without the "k" key.
235 
236    Functions:
237 
238         hamt_node_without, hamt_node_bitmap_without,
239         hamt_node_array_without, hamt_node_collision_without
240 
241 
242 Further Reading
243 ===============
244 
245 1. http://blog.higher-order.net/2009/09/08/understanding-clojures-persistenthashmap-deftwice.html
246 
247 2. http://blog.higher-order.net/2010/08/16/assoc-and-clojures-persistenthashmap-part-ii.html
248 
249 3. Clojure's PersistentHashMap implementation:
250    https://github.com/clojure/clojure/blob/master/src/jvm/clojure/lang/PersistentHashMap.java
251 
252 
253 Debug
254 =====
255 
256 The HAMT datatype is accessible for testing purposes under the
257 `_testcapi` module:
258 
259     >>> from _testcapi import hamt
260     >>> h = hamt()
261     >>> h2 = h.set('a', 2)
262     >>> h3 = h2.set('b', 3)
263     >>> list(h3)
264     ['a', 'b']
265 
266 When CPython is built in debug mode, a '__dump__()' method is available
267 to introspect the tree:
268 
269     >>> print(h3.__dump__())
270     HAMT(len=2):
271         BitmapNode(size=4 count=2 bitmap=0b110 id=0x10eb9d9e8):
272             'a': 2
273             'b': 3
274 */
275 
276 
277 #define IS_ARRAY_NODE(node)     Py_IS_TYPE(node, &_PyHamt_ArrayNode_Type)
278 #define IS_BITMAP_NODE(node)    Py_IS_TYPE(node, &_PyHamt_BitmapNode_Type)
279 #define IS_COLLISION_NODE(node) Py_IS_TYPE(node, &_PyHamt_CollisionNode_Type)
280 
281 
282 /* Return type for 'find' (lookup a key) functions.
283 
284    * F_ERROR - an error occurred;
285    * F_NOT_FOUND - the key was not found;
286    * F_FOUND - the key was found.
287 */
288 typedef enum {F_ERROR, F_NOT_FOUND, F_FOUND} hamt_find_t;
289 
290 
291 /* Return type for 'without' (delete a key) functions.
292 
293    * W_ERROR - an error occurred;
294    * W_NOT_FOUND - the key was not found: there's nothing to delete;
295    * W_EMPTY - the key was found: the node/tree would be empty
296      if the key is deleted;
297    * W_NEWNODE - the key was found: a new node/tree is returned
298      without that key.
299 */
300 typedef enum {W_ERROR, W_NOT_FOUND, W_EMPTY, W_NEWNODE} hamt_without_t;
301 
302 
303 /* Low-level iterator protocol type.
304 
305    * I_ITEM - a new item has been yielded;
306    * I_END - the whole tree was visited (similar to StopIteration).
307 */
308 typedef enum {I_ITEM, I_END} hamt_iter_t;
309 
310 
311 #define HAMT_ARRAY_NODE_SIZE 32
312 
313 
314 typedef struct {
315     PyObject_HEAD
316     PyHamtNode *a_array[HAMT_ARRAY_NODE_SIZE];
317     Py_ssize_t a_count;
318 } PyHamtNode_Array;
319 
320 
321 typedef struct {
322     PyObject_VAR_HEAD
323     uint32_t b_bitmap;
324     PyObject *b_array[1];
325 } PyHamtNode_Bitmap;
326 
327 
328 typedef struct {
329     PyObject_VAR_HEAD
330     int32_t c_hash;
331     PyObject *c_array[1];
332 } PyHamtNode_Collision;
333 
334 
335 static PyHamtNode_Bitmap *_empty_bitmap_node;
336 static PyHamtObject *_empty_hamt;
337 
338 
339 static PyHamtObject *
340 hamt_alloc(void);
341 
342 static PyHamtNode *
343 hamt_node_assoc(PyHamtNode *node,
344                 uint32_t shift, int32_t hash,
345                 PyObject *key, PyObject *val, int* added_leaf);
346 
347 static hamt_without_t
348 hamt_node_without(PyHamtNode *node,
349                   uint32_t shift, int32_t hash,
350                   PyObject *key,
351                   PyHamtNode **new_node);
352 
353 static hamt_find_t
354 hamt_node_find(PyHamtNode *node,
355                uint32_t shift, int32_t hash,
356                PyObject *key, PyObject **val);
357 
358 #ifdef Py_DEBUG
359 static int
360 hamt_node_dump(PyHamtNode *node,
361                _PyUnicodeWriter *writer, int level);
362 #endif
363 
364 static PyHamtNode *
365 hamt_node_array_new(Py_ssize_t);
366 
367 static PyHamtNode *
368 hamt_node_collision_new(int32_t hash, Py_ssize_t size);
369 
370 static inline Py_ssize_t
371 hamt_node_collision_count(PyHamtNode_Collision *node);
372 
373 
374 #ifdef Py_DEBUG
375 static void
_hamt_node_array_validate(void * obj_raw)376 _hamt_node_array_validate(void *obj_raw)
377 {
378     PyObject *obj = _PyObject_CAST(obj_raw);
379     assert(IS_ARRAY_NODE(obj));
380     PyHamtNode_Array *node = (PyHamtNode_Array*)obj;
381     Py_ssize_t i = 0, count = 0;
382     for (; i < HAMT_ARRAY_NODE_SIZE; i++) {
383         if (node->a_array[i] != NULL) {
384             count++;
385         }
386     }
387     assert(count == node->a_count);
388 }
389 
390 #define VALIDATE_ARRAY_NODE(NODE) \
391     do { _hamt_node_array_validate(NODE); } while (0);
392 #else
393 #define VALIDATE_ARRAY_NODE(NODE)
394 #endif
395 
396 
397 /* Returns -1 on error */
398 static inline int32_t
hamt_hash(PyObject * o)399 hamt_hash(PyObject *o)
400 {
401     Py_hash_t hash = PyObject_Hash(o);
402 
403 #if SIZEOF_PY_HASH_T <= 4
404     return hash;
405 #else
406     if (hash == -1) {
407         /* exception */
408         return -1;
409     }
410 
411     /* While it's suboptimal to reduce Python's 64 bit hash to
412        32 bits via XOR, it seems that the resulting hash function
413        is good enough (this is also how Long type is hashed in Java.)
414        Storing 10, 100, 1000 Python strings results in a relatively
415        shallow and uniform tree structure.
416 
417        Please don't change this hashing algorithm, as there are many
418        tests that test some exact tree shape to cover all code paths.
419     */
420     int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32);
421     return xored == -1 ? -2 : xored;
422 #endif
423 }
424 
425 static inline uint32_t
hamt_mask(int32_t hash,uint32_t shift)426 hamt_mask(int32_t hash, uint32_t shift)
427 {
428     return (((uint32_t)hash >> shift) & 0x01f);
429 }
430 
431 static inline uint32_t
hamt_bitpos(int32_t hash,uint32_t shift)432 hamt_bitpos(int32_t hash, uint32_t shift)
433 {
434     return (uint32_t)1 << hamt_mask(hash, shift);
435 }
436 
437 static inline uint32_t
hamt_bitindex(uint32_t bitmap,uint32_t bit)438 hamt_bitindex(uint32_t bitmap, uint32_t bit)
439 {
440     return (uint32_t)_Py_popcount32(bitmap & (bit - 1));
441 }
442 
443 
444 /////////////////////////////////// Dump Helpers
445 #ifdef Py_DEBUG
446 
447 static int
_hamt_dump_ident(_PyUnicodeWriter * writer,int level)448 _hamt_dump_ident(_PyUnicodeWriter *writer, int level)
449 {
450     /* Write `'    ' * level` to the `writer` */
451     PyObject *str = NULL;
452     PyObject *num = NULL;
453     PyObject *res = NULL;
454     int ret = -1;
455 
456     str = PyUnicode_FromString("    ");
457     if (str == NULL) {
458         goto error;
459     }
460 
461     num = PyLong_FromLong((long)level);
462     if (num == NULL) {
463         goto error;
464     }
465 
466     res = PyNumber_Multiply(str, num);
467     if (res == NULL) {
468         goto error;
469     }
470 
471     ret = _PyUnicodeWriter_WriteStr(writer, res);
472 
473 error:
474     Py_XDECREF(res);
475     Py_XDECREF(str);
476     Py_XDECREF(num);
477     return ret;
478 }
479 
480 static int
_hamt_dump_format(_PyUnicodeWriter * writer,const char * format,...)481 _hamt_dump_format(_PyUnicodeWriter *writer, const char *format, ...)
482 {
483     /* A convenient helper combining _PyUnicodeWriter_WriteStr and
484        PyUnicode_FromFormatV.
485     */
486     PyObject* msg;
487     int ret;
488 
489     va_list vargs;
490 #ifdef HAVE_STDARG_PROTOTYPES
491     va_start(vargs, format);
492 #else
493     va_start(vargs);
494 #endif
495     msg = PyUnicode_FromFormatV(format, vargs);
496     va_end(vargs);
497 
498     if (msg == NULL) {
499         return -1;
500     }
501 
502     ret = _PyUnicodeWriter_WriteStr(writer, msg);
503     Py_DECREF(msg);
504     return ret;
505 }
506 
507 #endif  /* Py_DEBUG */
508 /////////////////////////////////// Bitmap Node
509 
510 
511 static PyHamtNode *
hamt_node_bitmap_new(Py_ssize_t size)512 hamt_node_bitmap_new(Py_ssize_t size)
513 {
514     /* Create a new bitmap node of size 'size' */
515 
516     PyHamtNode_Bitmap *node;
517     Py_ssize_t i;
518 
519     assert(size >= 0);
520     assert(size % 2 == 0);
521 
522     if (size == 0 && _empty_bitmap_node != NULL) {
523         Py_INCREF(_empty_bitmap_node);
524         return (PyHamtNode *)_empty_bitmap_node;
525     }
526 
527     /* No freelist; allocate a new bitmap node */
528     node = PyObject_GC_NewVar(
529         PyHamtNode_Bitmap, &_PyHamt_BitmapNode_Type, size);
530     if (node == NULL) {
531         return NULL;
532     }
533 
534     Py_SET_SIZE(node, size);
535 
536     for (i = 0; i < size; i++) {
537         node->b_array[i] = NULL;
538     }
539 
540     node->b_bitmap = 0;
541 
542     _PyObject_GC_TRACK(node);
543 
544     if (size == 0 && _empty_bitmap_node == NULL) {
545         /* Since bitmap nodes are immutable, we can cache the instance
546            for size=0 and reuse it whenever we need an empty bitmap node.
547         */
548         _empty_bitmap_node = node;
549         Py_INCREF(_empty_bitmap_node);
550     }
551 
552     return (PyHamtNode *)node;
553 }
554 
555 static inline Py_ssize_t
hamt_node_bitmap_count(PyHamtNode_Bitmap * node)556 hamt_node_bitmap_count(PyHamtNode_Bitmap *node)
557 {
558     return Py_SIZE(node) / 2;
559 }
560 
561 static PyHamtNode_Bitmap *
hamt_node_bitmap_clone(PyHamtNode_Bitmap * node)562 hamt_node_bitmap_clone(PyHamtNode_Bitmap *node)
563 {
564     /* Clone a bitmap node; return a new one with the same child notes. */
565 
566     PyHamtNode_Bitmap *clone;
567     Py_ssize_t i;
568 
569     clone = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(Py_SIZE(node));
570     if (clone == NULL) {
571         return NULL;
572     }
573 
574     for (i = 0; i < Py_SIZE(node); i++) {
575         Py_XINCREF(node->b_array[i]);
576         clone->b_array[i] = node->b_array[i];
577     }
578 
579     clone->b_bitmap = node->b_bitmap;
580     return clone;
581 }
582 
583 static PyHamtNode_Bitmap *
hamt_node_bitmap_clone_without(PyHamtNode_Bitmap * o,uint32_t bit)584 hamt_node_bitmap_clone_without(PyHamtNode_Bitmap *o, uint32_t bit)
585 {
586     assert(bit & o->b_bitmap);
587     assert(hamt_node_bitmap_count(o) > 1);
588 
589     PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(
590         Py_SIZE(o) - 2);
591     if (new == NULL) {
592         return NULL;
593     }
594 
595     uint32_t idx = hamt_bitindex(o->b_bitmap, bit);
596     uint32_t key_idx = 2 * idx;
597     uint32_t val_idx = key_idx + 1;
598     uint32_t i;
599 
600     for (i = 0; i < key_idx; i++) {
601         Py_XINCREF(o->b_array[i]);
602         new->b_array[i] = o->b_array[i];
603     }
604 
605     assert(Py_SIZE(o) >= 0 && Py_SIZE(o) <= 32);
606     for (i = val_idx + 1; i < (uint32_t)Py_SIZE(o); i++) {
607         Py_XINCREF(o->b_array[i]);
608         new->b_array[i - 2] = o->b_array[i];
609     }
610 
611     new->b_bitmap = o->b_bitmap & ~bit;
612     return new;
613 }
614 
615 static PyHamtNode *
hamt_node_new_bitmap_or_collision(uint32_t shift,PyObject * key1,PyObject * val1,int32_t key2_hash,PyObject * key2,PyObject * val2)616 hamt_node_new_bitmap_or_collision(uint32_t shift,
617                                   PyObject *key1, PyObject *val1,
618                                   int32_t key2_hash,
619                                   PyObject *key2, PyObject *val2)
620 {
621     /* Helper method.  Creates a new node for key1/val and key2/val2
622        pairs.
623 
624        If key1 hash is equal to the hash of key2, a Collision node
625        will be created.  If they are not equal, a Bitmap node is
626        created.
627     */
628 
629     int32_t key1_hash = hamt_hash(key1);
630     if (key1_hash == -1) {
631         return NULL;
632     }
633 
634     if (key1_hash == key2_hash) {
635         PyHamtNode_Collision *n;
636         n = (PyHamtNode_Collision *)hamt_node_collision_new(key1_hash, 4);
637         if (n == NULL) {
638             return NULL;
639         }
640 
641         Py_INCREF(key1);
642         n->c_array[0] = key1;
643         Py_INCREF(val1);
644         n->c_array[1] = val1;
645 
646         Py_INCREF(key2);
647         n->c_array[2] = key2;
648         Py_INCREF(val2);
649         n->c_array[3] = val2;
650 
651         return (PyHamtNode *)n;
652     }
653     else {
654         int added_leaf = 0;
655         PyHamtNode *n = hamt_node_bitmap_new(0);
656         if (n == NULL) {
657             return NULL;
658         }
659 
660         PyHamtNode *n2 = hamt_node_assoc(
661             n, shift, key1_hash, key1, val1, &added_leaf);
662         Py_DECREF(n);
663         if (n2 == NULL) {
664             return NULL;
665         }
666 
667         n = hamt_node_assoc(n2, shift, key2_hash, key2, val2, &added_leaf);
668         Py_DECREF(n2);
669         if (n == NULL) {
670             return NULL;
671         }
672 
673         return n;
674     }
675 }
676 
677 static PyHamtNode *
hamt_node_bitmap_assoc(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)678 hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self,
679                        uint32_t shift, int32_t hash,
680                        PyObject *key, PyObject *val, int* added_leaf)
681 {
682     /* assoc operation for bitmap nodes.
683 
684        Return: a new node, or self if key/val already is in the
685        collection.
686 
687        'added_leaf' is later used in '_PyHamt_Assoc' to determine if
688        `hamt.set(key, val)` increased the size of the collection.
689     */
690 
691     uint32_t bit = hamt_bitpos(hash, shift);
692     uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
693 
694     /* Bitmap node layout:
695 
696     +------+------+------+------+  ---  +------+------+
697     | key1 | val1 | key2 | val2 |  ...  | keyN | valN |
698     +------+------+------+------+  ---  +------+------+
699     where `N < Py_SIZE(node)`.
700 
701     The `node->b_bitmap` field is a bitmap.  For a given
702     `(shift, hash)` pair we can determine:
703 
704      - If this node has the corresponding key/val slots.
705      - The index of key/val slots.
706     */
707 
708     if (self->b_bitmap & bit) {
709         /* The key is set in this node */
710 
711         uint32_t key_idx = 2 * idx;
712         uint32_t val_idx = key_idx + 1;
713 
714         assert(val_idx < (size_t)Py_SIZE(self));
715 
716         PyObject *key_or_null = self->b_array[key_idx];
717         PyObject *val_or_node = self->b_array[val_idx];
718 
719         if (key_or_null == NULL) {
720             /* key is NULL.  This means that we have a few keys
721                that have the same (hash, shift) pair. */
722 
723             assert(val_or_node != NULL);
724 
725             PyHamtNode *sub_node = hamt_node_assoc(
726                 (PyHamtNode *)val_or_node,
727                 shift + 5, hash, key, val, added_leaf);
728             if (sub_node == NULL) {
729                 return NULL;
730             }
731 
732             if (val_or_node == (PyObject *)sub_node) {
733                 Py_DECREF(sub_node);
734                 Py_INCREF(self);
735                 return (PyHamtNode *)self;
736             }
737 
738             PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
739             if (ret == NULL) {
740                 return NULL;
741             }
742             Py_SETREF(ret->b_array[val_idx], (PyObject*)sub_node);
743             return (PyHamtNode *)ret;
744         }
745 
746         assert(key != NULL);
747         /* key is not NULL.  This means that we have only one other
748            key in this collection that matches our hash for this shift. */
749 
750         int comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
751         if (comp_err < 0) {  /* exception in __eq__ */
752             return NULL;
753         }
754         if (comp_err == 1) {  /* key == key_or_null */
755             if (val == val_or_node) {
756                 /* we already have the same key/val pair; return self. */
757                 Py_INCREF(self);
758                 return (PyHamtNode *)self;
759             }
760 
761             /* We're setting a new value for the key we had before.
762                Make a new bitmap node with a replaced value, and return it. */
763             PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
764             if (ret == NULL) {
765                 return NULL;
766             }
767             Py_INCREF(val);
768             Py_SETREF(ret->b_array[val_idx], val);
769             return (PyHamtNode *)ret;
770         }
771 
772         /* It's a new key, and it has the same index as *one* another key.
773            We have a collision.  We need to create a new node which will
774            combine the existing key and the key we're adding.
775 
776            `hamt_node_new_bitmap_or_collision` will either create a new
777            Collision node if the keys have identical hashes, or
778            a new Bitmap node.
779         */
780         PyHamtNode *sub_node = hamt_node_new_bitmap_or_collision(
781             shift + 5,
782             key_or_null, val_or_node,  /* existing key/val */
783             hash,
784             key, val  /* new key/val */
785         );
786         if (sub_node == NULL) {
787             return NULL;
788         }
789 
790         PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
791         if (ret == NULL) {
792             Py_DECREF(sub_node);
793             return NULL;
794         }
795         Py_SETREF(ret->b_array[key_idx], NULL);
796         Py_SETREF(ret->b_array[val_idx], (PyObject *)sub_node);
797 
798         *added_leaf = 1;
799         return (PyHamtNode *)ret;
800     }
801     else {
802         /* There was no key before with the same (shift,hash). */
803 
804         uint32_t n = (uint32_t)_Py_popcount32(self->b_bitmap);
805 
806         if (n >= 16) {
807             /* When we have a situation where we want to store more
808                than 16 nodes at one level of the tree, we no longer
809                want to use the Bitmap node with bitmap encoding.
810 
811                Instead we start using an Array node, which has
812                simpler (faster) implementation at the expense of
813                having preallocated 32 pointers for its keys/values
814                pairs.
815 
816                Small hamt objects (<30 keys) usually don't have any
817                Array nodes at all.  Between ~30 and ~400 keys hamt
818                objects usually have one Array node, and usually it's
819                a root node.
820             */
821 
822             uint32_t jdx = hamt_mask(hash, shift);
823             /* 'jdx' is the index of where the new key should be added
824                in the new Array node we're about to create. */
825 
826             PyHamtNode *empty = NULL;
827             PyHamtNode_Array *new_node = NULL;
828             PyHamtNode *res = NULL;
829 
830             /* Create a new Array node. */
831             new_node = (PyHamtNode_Array *)hamt_node_array_new(n + 1);
832             if (new_node == NULL) {
833                 goto fin;
834             }
835 
836             /* Create an empty bitmap node for the next
837                hamt_node_assoc call. */
838             empty = hamt_node_bitmap_new(0);
839             if (empty == NULL) {
840                 goto fin;
841             }
842 
843             /* Make a new bitmap node for the key/val we're adding.
844                Set that bitmap node to new-array-node[jdx]. */
845             new_node->a_array[jdx] = hamt_node_assoc(
846                 empty, shift + 5, hash, key, val, added_leaf);
847             if (new_node->a_array[jdx] == NULL) {
848                 goto fin;
849             }
850 
851             /* Copy existing key/value pairs from the current Bitmap
852                node to the new Array node we've just created. */
853             Py_ssize_t i, j;
854             for (i = 0, j = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
855                 if (((self->b_bitmap >> i) & 1) != 0) {
856                     /* Ensure we don't accidentally override `jdx` element
857                        we set few lines above.
858                     */
859                     assert(new_node->a_array[i] == NULL);
860 
861                     if (self->b_array[j] == NULL) {
862                         new_node->a_array[i] =
863                             (PyHamtNode *)self->b_array[j + 1];
864                         Py_INCREF(new_node->a_array[i]);
865                     }
866                     else {
867                         int32_t rehash = hamt_hash(self->b_array[j]);
868                         if (rehash == -1) {
869                             goto fin;
870                         }
871 
872                         new_node->a_array[i] = hamt_node_assoc(
873                             empty, shift + 5,
874                             rehash,
875                             self->b_array[j],
876                             self->b_array[j + 1],
877                             added_leaf);
878 
879                         if (new_node->a_array[i] == NULL) {
880                             goto fin;
881                         }
882                     }
883                     j += 2;
884                 }
885             }
886 
887             VALIDATE_ARRAY_NODE(new_node)
888 
889             /* That's it! */
890             res = (PyHamtNode *)new_node;
891 
892         fin:
893             Py_XDECREF(empty);
894             if (res == NULL) {
895                 Py_XDECREF(new_node);
896             }
897             return res;
898         }
899         else {
900             /* We have less than 16 keys at this level; let's just
901                create a new bitmap node out of this node with the
902                new key/val pair added. */
903 
904             uint32_t key_idx = 2 * idx;
905             uint32_t val_idx = key_idx + 1;
906             uint32_t i;
907 
908             *added_leaf = 1;
909 
910             /* Allocate new Bitmap node which can have one more key/val
911                pair in addition to what we have already. */
912             PyHamtNode_Bitmap *new_node =
913                 (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2 * (n + 1));
914             if (new_node == NULL) {
915                 return NULL;
916             }
917 
918             /* Copy all keys/values that will be before the new key/value
919                we are adding. */
920             for (i = 0; i < key_idx; i++) {
921                 Py_XINCREF(self->b_array[i]);
922                 new_node->b_array[i] = self->b_array[i];
923             }
924 
925             /* Set the new key/value to the new Bitmap node. */
926             Py_INCREF(key);
927             new_node->b_array[key_idx] = key;
928             Py_INCREF(val);
929             new_node->b_array[val_idx] = val;
930 
931             /* Copy all keys/values that will be after the new key/value
932                we are adding. */
933             assert(Py_SIZE(self) >= 0 && Py_SIZE(self) <= 32);
934             for (i = key_idx; i < (uint32_t)Py_SIZE(self); i++) {
935                 Py_XINCREF(self->b_array[i]);
936                 new_node->b_array[i + 2] = self->b_array[i];
937             }
938 
939             new_node->b_bitmap = self->b_bitmap | bit;
940             return (PyHamtNode *)new_node;
941         }
942     }
943 }
944 
945 static hamt_without_t
hamt_node_bitmap_without(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)946 hamt_node_bitmap_without(PyHamtNode_Bitmap *self,
947                          uint32_t shift, int32_t hash,
948                          PyObject *key,
949                          PyHamtNode **new_node)
950 {
951     uint32_t bit = hamt_bitpos(hash, shift);
952     if ((self->b_bitmap & bit) == 0) {
953         return W_NOT_FOUND;
954     }
955 
956     uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
957 
958     uint32_t key_idx = 2 * idx;
959     uint32_t val_idx = key_idx + 1;
960 
961     PyObject *key_or_null = self->b_array[key_idx];
962     PyObject *val_or_node = self->b_array[val_idx];
963 
964     if (key_or_null == NULL) {
965         /* key == NULL means that 'value' is another tree node. */
966 
967         PyHamtNode *sub_node = NULL;
968 
969         hamt_without_t res = hamt_node_without(
970             (PyHamtNode *)val_or_node,
971             shift + 5, hash, key, &sub_node);
972 
973         switch (res) {
974             case W_EMPTY:
975                 /* It's impossible for us to receive a W_EMPTY here:
976 
977                     - Array nodes are converted to Bitmap nodes when
978                       we delete 16th item from them;
979 
980                     - Collision nodes are converted to Bitmap when
981                       there is one item in them;
982 
983                     - Bitmap node's without() inlines single-item
984                       sub-nodes.
985 
986                    So in no situation we can have a single-item
987                    Bitmap child of another Bitmap node.
988                 */
989                 Py_UNREACHABLE();
990 
991             case W_NEWNODE: {
992                 assert(sub_node != NULL);
993 
994                 if (IS_BITMAP_NODE(sub_node)) {
995                     PyHamtNode_Bitmap *sub_tree = (PyHamtNode_Bitmap *)sub_node;
996                     if (hamt_node_bitmap_count(sub_tree) == 1 &&
997                             sub_tree->b_array[0] != NULL)
998                     {
999                         /* A bitmap node with one key/value pair.  Just
1000                            merge it into this node.
1001 
1002                            Note that we don't inline Bitmap nodes that
1003                            have a NULL key -- those nodes point to another
1004                            tree level, and we cannot simply move tree levels
1005                            up or down.
1006                         */
1007 
1008                         PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1009                         if (clone == NULL) {
1010                             Py_DECREF(sub_node);
1011                             return W_ERROR;
1012                         }
1013 
1014                         PyObject *key = sub_tree->b_array[0];
1015                         PyObject *val = sub_tree->b_array[1];
1016 
1017                         Py_INCREF(key);
1018                         Py_XSETREF(clone->b_array[key_idx], key);
1019                         Py_INCREF(val);
1020                         Py_SETREF(clone->b_array[val_idx], val);
1021 
1022                         Py_DECREF(sub_tree);
1023 
1024                         *new_node = (PyHamtNode *)clone;
1025                         return W_NEWNODE;
1026                     }
1027                 }
1028 
1029 #ifdef Py_DEBUG
1030                 /* Ensure that Collision.without implementation
1031                    converts to Bitmap nodes itself.
1032                 */
1033                 if (IS_COLLISION_NODE(sub_node)) {
1034                     assert(hamt_node_collision_count(
1035                             (PyHamtNode_Collision*)sub_node) > 1);
1036                 }
1037 #endif
1038 
1039                 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1040                 if (clone == NULL) {
1041                     return W_ERROR;
1042                 }
1043 
1044                 Py_SETREF(clone->b_array[val_idx],
1045                           (PyObject *)sub_node);  /* borrow */
1046 
1047                 *new_node = (PyHamtNode *)clone;
1048                 return W_NEWNODE;
1049             }
1050 
1051             case W_ERROR:
1052             case W_NOT_FOUND:
1053                 assert(sub_node == NULL);
1054                 return res;
1055 
1056             default:
1057                 Py_UNREACHABLE();
1058         }
1059     }
1060     else {
1061         /* We have a regular key/value pair */
1062 
1063         int cmp = PyObject_RichCompareBool(key_or_null, key, Py_EQ);
1064         if (cmp < 0) {
1065             return W_ERROR;
1066         }
1067         if (cmp == 0) {
1068             return W_NOT_FOUND;
1069         }
1070 
1071         if (hamt_node_bitmap_count(self) == 1) {
1072             return W_EMPTY;
1073         }
1074 
1075         *new_node = (PyHamtNode *)
1076             hamt_node_bitmap_clone_without(self, bit);
1077         if (*new_node == NULL) {
1078             return W_ERROR;
1079         }
1080 
1081         return W_NEWNODE;
1082     }
1083 }
1084 
1085 static hamt_find_t
hamt_node_bitmap_find(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1086 hamt_node_bitmap_find(PyHamtNode_Bitmap *self,
1087                       uint32_t shift, int32_t hash,
1088                       PyObject *key, PyObject **val)
1089 {
1090     /* Lookup a key in a Bitmap node. */
1091 
1092     uint32_t bit = hamt_bitpos(hash, shift);
1093     uint32_t idx;
1094     uint32_t key_idx;
1095     uint32_t val_idx;
1096     PyObject *key_or_null;
1097     PyObject *val_or_node;
1098     int comp_err;
1099 
1100     if ((self->b_bitmap & bit) == 0) {
1101         return F_NOT_FOUND;
1102     }
1103 
1104     idx = hamt_bitindex(self->b_bitmap, bit);
1105     key_idx = idx * 2;
1106     val_idx = key_idx + 1;
1107 
1108     assert(val_idx < (size_t)Py_SIZE(self));
1109 
1110     key_or_null = self->b_array[key_idx];
1111     val_or_node = self->b_array[val_idx];
1112 
1113     if (key_or_null == NULL) {
1114         /* There are a few keys that have the same hash at the current shift
1115            that match our key.  Dispatch the lookup further down the tree. */
1116         assert(val_or_node != NULL);
1117         return hamt_node_find((PyHamtNode *)val_or_node,
1118                               shift + 5, hash, key, val);
1119     }
1120 
1121     /* We have only one key -- a potential match.  Let's compare if the
1122        key we are looking at is equal to the key we are looking for. */
1123     assert(key != NULL);
1124     comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
1125     if (comp_err < 0) {  /* exception in __eq__ */
1126         return F_ERROR;
1127     }
1128     if (comp_err == 1) {  /* key == key_or_null */
1129         *val = val_or_node;
1130         return F_FOUND;
1131     }
1132 
1133     return F_NOT_FOUND;
1134 }
1135 
1136 static int
hamt_node_bitmap_traverse(PyHamtNode_Bitmap * self,visitproc visit,void * arg)1137 hamt_node_bitmap_traverse(PyHamtNode_Bitmap *self, visitproc visit, void *arg)
1138 {
1139     /* Bitmap's tp_traverse */
1140 
1141     Py_ssize_t i;
1142 
1143     for (i = Py_SIZE(self); --i >= 0; ) {
1144         Py_VISIT(self->b_array[i]);
1145     }
1146 
1147     return 0;
1148 }
1149 
1150 static void
hamt_node_bitmap_dealloc(PyHamtNode_Bitmap * self)1151 hamt_node_bitmap_dealloc(PyHamtNode_Bitmap *self)
1152 {
1153     /* Bitmap's tp_dealloc */
1154 
1155     Py_ssize_t len = Py_SIZE(self);
1156     Py_ssize_t i;
1157 
1158     PyObject_GC_UnTrack(self);
1159     Py_TRASHCAN_BEGIN(self, hamt_node_bitmap_dealloc)
1160 
1161     if (len > 0) {
1162         i = len;
1163         while (--i >= 0) {
1164             Py_XDECREF(self->b_array[i]);
1165         }
1166     }
1167 
1168     Py_TYPE(self)->tp_free((PyObject *)self);
1169     Py_TRASHCAN_END
1170 }
1171 
1172 #ifdef Py_DEBUG
1173 static int
hamt_node_bitmap_dump(PyHamtNode_Bitmap * node,_PyUnicodeWriter * writer,int level)1174 hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
1175                       _PyUnicodeWriter *writer, int level)
1176 {
1177     /* Debug build: __dump__() method implementation for Bitmap nodes. */
1178 
1179     Py_ssize_t i;
1180     PyObject *tmp1;
1181     PyObject *tmp2;
1182 
1183     if (_hamt_dump_ident(writer, level + 1)) {
1184         goto error;
1185     }
1186 
1187     if (_hamt_dump_format(writer, "BitmapNode(size=%zd count=%zd ",
1188                           Py_SIZE(node), Py_SIZE(node) / 2))
1189     {
1190         goto error;
1191     }
1192 
1193     tmp1 = PyLong_FromUnsignedLong(node->b_bitmap);
1194     if (tmp1 == NULL) {
1195         goto error;
1196     }
1197     tmp2 = _PyLong_Format(tmp1, 2);
1198     Py_DECREF(tmp1);
1199     if (tmp2 == NULL) {
1200         goto error;
1201     }
1202     if (_hamt_dump_format(writer, "bitmap=%S id=%p):\n", tmp2, node)) {
1203         Py_DECREF(tmp2);
1204         goto error;
1205     }
1206     Py_DECREF(tmp2);
1207 
1208     for (i = 0; i < Py_SIZE(node); i += 2) {
1209         PyObject *key_or_null = node->b_array[i];
1210         PyObject *val_or_node = node->b_array[i + 1];
1211 
1212         if (_hamt_dump_ident(writer, level + 2)) {
1213             goto error;
1214         }
1215 
1216         if (key_or_null == NULL) {
1217             if (_hamt_dump_format(writer, "NULL:\n")) {
1218                 goto error;
1219             }
1220 
1221             if (hamt_node_dump((PyHamtNode *)val_or_node,
1222                                writer, level + 2))
1223             {
1224                 goto error;
1225             }
1226         }
1227         else {
1228             if (_hamt_dump_format(writer, "%R: %R", key_or_null,
1229                                   val_or_node))
1230             {
1231                 goto error;
1232             }
1233         }
1234 
1235         if (_hamt_dump_format(writer, "\n")) {
1236             goto error;
1237         }
1238     }
1239 
1240     return 0;
1241 error:
1242     return -1;
1243 }
1244 #endif  /* Py_DEBUG */
1245 
1246 
1247 /////////////////////////////////// Collision Node
1248 
1249 
1250 static PyHamtNode *
hamt_node_collision_new(int32_t hash,Py_ssize_t size)1251 hamt_node_collision_new(int32_t hash, Py_ssize_t size)
1252 {
1253     /* Create a new Collision node. */
1254 
1255     PyHamtNode_Collision *node;
1256     Py_ssize_t i;
1257 
1258     assert(size >= 4);
1259     assert(size % 2 == 0);
1260 
1261     node = PyObject_GC_NewVar(
1262         PyHamtNode_Collision, &_PyHamt_CollisionNode_Type, size);
1263     if (node == NULL) {
1264         return NULL;
1265     }
1266 
1267     for (i = 0; i < size; i++) {
1268         node->c_array[i] = NULL;
1269     }
1270 
1271     Py_SET_SIZE(node, size);
1272     node->c_hash = hash;
1273 
1274     _PyObject_GC_TRACK(node);
1275 
1276     return (PyHamtNode *)node;
1277 }
1278 
1279 static hamt_find_t
hamt_node_collision_find_index(PyHamtNode_Collision * self,PyObject * key,Py_ssize_t * idx)1280 hamt_node_collision_find_index(PyHamtNode_Collision *self, PyObject *key,
1281                                Py_ssize_t *idx)
1282 {
1283     /* Lookup `key` in the Collision node `self`.  Set the index of the
1284        found key to 'idx'. */
1285 
1286     Py_ssize_t i;
1287     PyObject *el;
1288 
1289     for (i = 0; i < Py_SIZE(self); i += 2) {
1290         el = self->c_array[i];
1291 
1292         assert(el != NULL);
1293         int cmp = PyObject_RichCompareBool(key, el, Py_EQ);
1294         if (cmp < 0) {
1295             return F_ERROR;
1296         }
1297         if (cmp == 1) {
1298             *idx = i;
1299             return F_FOUND;
1300         }
1301     }
1302 
1303     return F_NOT_FOUND;
1304 }
1305 
1306 static PyHamtNode *
hamt_node_collision_assoc(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)1307 hamt_node_collision_assoc(PyHamtNode_Collision *self,
1308                           uint32_t shift, int32_t hash,
1309                           PyObject *key, PyObject *val, int* added_leaf)
1310 {
1311     /* Set a new key to this level (currently a Collision node)
1312        of the tree. */
1313 
1314     if (hash == self->c_hash) {
1315         /* The hash of the 'key' we are adding matches the hash of
1316            other keys in this Collision node. */
1317 
1318         Py_ssize_t key_idx = -1;
1319         hamt_find_t found;
1320         PyHamtNode_Collision *new_node;
1321         Py_ssize_t i;
1322 
1323         /* Let's try to lookup the new 'key', maybe we already have it. */
1324         found = hamt_node_collision_find_index(self, key, &key_idx);
1325         switch (found) {
1326             case F_ERROR:
1327                 /* Exception. */
1328                 return NULL;
1329 
1330             case F_NOT_FOUND:
1331                 /* This is a totally new key.  Clone the current node,
1332                    add a new key/value to the cloned node. */
1333 
1334                 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1335                     self->c_hash, Py_SIZE(self) + 2);
1336                 if (new_node == NULL) {
1337                     return NULL;
1338                 }
1339 
1340                 for (i = 0; i < Py_SIZE(self); i++) {
1341                     Py_INCREF(self->c_array[i]);
1342                     new_node->c_array[i] = self->c_array[i];
1343                 }
1344 
1345                 Py_INCREF(key);
1346                 new_node->c_array[i] = key;
1347                 Py_INCREF(val);
1348                 new_node->c_array[i + 1] = val;
1349 
1350                 *added_leaf = 1;
1351                 return (PyHamtNode *)new_node;
1352 
1353             case F_FOUND:
1354                 /* There's a key which is equal to the key we are adding. */
1355 
1356                 assert(key_idx >= 0);
1357                 assert(key_idx < Py_SIZE(self));
1358                 Py_ssize_t val_idx = key_idx + 1;
1359 
1360                 if (self->c_array[val_idx] == val) {
1361                     /* We're setting a key/value pair that's already set. */
1362                     Py_INCREF(self);
1363                     return (PyHamtNode *)self;
1364                 }
1365 
1366                 /* We need to replace old value for the key
1367                    with a new value.  Create a new Collision node.*/
1368                 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1369                     self->c_hash, Py_SIZE(self));
1370                 if (new_node == NULL) {
1371                     return NULL;
1372                 }
1373 
1374                 /* Copy all elements of the old node to the new one. */
1375                 for (i = 0; i < Py_SIZE(self); i++) {
1376                     Py_INCREF(self->c_array[i]);
1377                     new_node->c_array[i] = self->c_array[i];
1378                 }
1379 
1380                 /* Replace the old value with the new value for the our key. */
1381                 Py_DECREF(new_node->c_array[val_idx]);
1382                 Py_INCREF(val);
1383                 new_node->c_array[val_idx] = val;
1384 
1385                 return (PyHamtNode *)new_node;
1386 
1387             default:
1388                 Py_UNREACHABLE();
1389         }
1390     }
1391     else {
1392         /* The hash of the new key is different from the hash that
1393            all keys of this Collision node have.
1394 
1395            Create a Bitmap node inplace with two children:
1396            key/value pair that we're adding, and the Collision node
1397            we're replacing on this tree level.
1398         */
1399 
1400         PyHamtNode_Bitmap *new_node;
1401         PyHamtNode *assoc_res;
1402 
1403         new_node = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2);
1404         if (new_node == NULL) {
1405             return NULL;
1406         }
1407         new_node->b_bitmap = hamt_bitpos(self->c_hash, shift);
1408         Py_INCREF(self);
1409         new_node->b_array[1] = (PyObject*) self;
1410 
1411         assoc_res = hamt_node_bitmap_assoc(
1412             new_node, shift, hash, key, val, added_leaf);
1413         Py_DECREF(new_node);
1414         return assoc_res;
1415     }
1416 }
1417 
1418 static inline Py_ssize_t
hamt_node_collision_count(PyHamtNode_Collision * node)1419 hamt_node_collision_count(PyHamtNode_Collision *node)
1420 {
1421     return Py_SIZE(node) / 2;
1422 }
1423 
1424 static hamt_without_t
hamt_node_collision_without(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)1425 hamt_node_collision_without(PyHamtNode_Collision *self,
1426                             uint32_t shift, int32_t hash,
1427                             PyObject *key,
1428                             PyHamtNode **new_node)
1429 {
1430     if (hash != self->c_hash) {
1431         return W_NOT_FOUND;
1432     }
1433 
1434     Py_ssize_t key_idx = -1;
1435     hamt_find_t found = hamt_node_collision_find_index(self, key, &key_idx);
1436 
1437     switch (found) {
1438         case F_ERROR:
1439             return W_ERROR;
1440 
1441         case F_NOT_FOUND:
1442             return W_NOT_FOUND;
1443 
1444         case F_FOUND:
1445             assert(key_idx >= 0);
1446             assert(key_idx < Py_SIZE(self));
1447 
1448             Py_ssize_t new_count = hamt_node_collision_count(self) - 1;
1449 
1450             if (new_count == 0) {
1451                 /* The node has only one key/value pair and it's for the
1452                    key we're trying to delete.  So a new node will be empty
1453                    after the removal.
1454                 */
1455                 return W_EMPTY;
1456             }
1457 
1458             if (new_count == 1) {
1459                 /* The node has two keys, and after deletion the
1460                    new Collision node would have one.  Collision nodes
1461                    with one key shouldn't exist, so convert it to a
1462                    Bitmap node.
1463                 */
1464                 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)
1465                     hamt_node_bitmap_new(2);
1466                 if (node == NULL) {
1467                     return W_ERROR;
1468                 }
1469 
1470                 if (key_idx == 0) {
1471                     Py_INCREF(self->c_array[2]);
1472                     node->b_array[0] = self->c_array[2];
1473                     Py_INCREF(self->c_array[3]);
1474                     node->b_array[1] = self->c_array[3];
1475                 }
1476                 else {
1477                     assert(key_idx == 2);
1478                     Py_INCREF(self->c_array[0]);
1479                     node->b_array[0] = self->c_array[0];
1480                     Py_INCREF(self->c_array[1]);
1481                     node->b_array[1] = self->c_array[1];
1482                 }
1483 
1484                 node->b_bitmap = hamt_bitpos(hash, shift);
1485 
1486                 *new_node = (PyHamtNode *)node;
1487                 return W_NEWNODE;
1488             }
1489 
1490             /* Allocate a new Collision node with capacity for one
1491                less key/value pair */
1492             PyHamtNode_Collision *new = (PyHamtNode_Collision *)
1493                 hamt_node_collision_new(
1494                     self->c_hash, Py_SIZE(self) - 2);
1495             if (new == NULL) {
1496                 return W_ERROR;
1497             }
1498 
1499             /* Copy all other keys from `self` to `new` */
1500             Py_ssize_t i;
1501             for (i = 0; i < key_idx; i++) {
1502                 Py_INCREF(self->c_array[i]);
1503                 new->c_array[i] = self->c_array[i];
1504             }
1505             for (i = key_idx + 2; i < Py_SIZE(self); i++) {
1506                 Py_INCREF(self->c_array[i]);
1507                 new->c_array[i - 2] = self->c_array[i];
1508             }
1509 
1510             *new_node = (PyHamtNode*)new;
1511             return W_NEWNODE;
1512 
1513         default:
1514             Py_UNREACHABLE();
1515     }
1516 }
1517 
1518 static hamt_find_t
hamt_node_collision_find(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1519 hamt_node_collision_find(PyHamtNode_Collision *self,
1520                          uint32_t shift, int32_t hash,
1521                          PyObject *key, PyObject **val)
1522 {
1523     /* Lookup `key` in the Collision node `self`.  Set the value
1524        for the found key to 'val'. */
1525 
1526     Py_ssize_t idx = -1;
1527     hamt_find_t res;
1528 
1529     res = hamt_node_collision_find_index(self, key, &idx);
1530     if (res == F_ERROR || res == F_NOT_FOUND) {
1531         return res;
1532     }
1533 
1534     assert(idx >= 0);
1535     assert(idx + 1 < Py_SIZE(self));
1536 
1537     *val = self->c_array[idx + 1];
1538     assert(*val != NULL);
1539 
1540     return F_FOUND;
1541 }
1542 
1543 
1544 static int
hamt_node_collision_traverse(PyHamtNode_Collision * self,visitproc visit,void * arg)1545 hamt_node_collision_traverse(PyHamtNode_Collision *self,
1546                              visitproc visit, void *arg)
1547 {
1548     /* Collision's tp_traverse */
1549 
1550     Py_ssize_t i;
1551 
1552     for (i = Py_SIZE(self); --i >= 0; ) {
1553         Py_VISIT(self->c_array[i]);
1554     }
1555 
1556     return 0;
1557 }
1558 
1559 static void
hamt_node_collision_dealloc(PyHamtNode_Collision * self)1560 hamt_node_collision_dealloc(PyHamtNode_Collision *self)
1561 {
1562     /* Collision's tp_dealloc */
1563 
1564     Py_ssize_t len = Py_SIZE(self);
1565 
1566     PyObject_GC_UnTrack(self);
1567     Py_TRASHCAN_BEGIN(self, hamt_node_collision_dealloc)
1568 
1569     if (len > 0) {
1570 
1571         while (--len >= 0) {
1572             Py_XDECREF(self->c_array[len]);
1573         }
1574     }
1575 
1576     Py_TYPE(self)->tp_free((PyObject *)self);
1577     Py_TRASHCAN_END
1578 }
1579 
1580 #ifdef Py_DEBUG
1581 static int
hamt_node_collision_dump(PyHamtNode_Collision * node,_PyUnicodeWriter * writer,int level)1582 hamt_node_collision_dump(PyHamtNode_Collision *node,
1583                          _PyUnicodeWriter *writer, int level)
1584 {
1585     /* Debug build: __dump__() method implementation for Collision nodes. */
1586 
1587     Py_ssize_t i;
1588 
1589     if (_hamt_dump_ident(writer, level + 1)) {
1590         goto error;
1591     }
1592 
1593     if (_hamt_dump_format(writer, "CollisionNode(size=%zd id=%p):\n",
1594                           Py_SIZE(node), node))
1595     {
1596         goto error;
1597     }
1598 
1599     for (i = 0; i < Py_SIZE(node); i += 2) {
1600         PyObject *key = node->c_array[i];
1601         PyObject *val = node->c_array[i + 1];
1602 
1603         if (_hamt_dump_ident(writer, level + 2)) {
1604             goto error;
1605         }
1606 
1607         if (_hamt_dump_format(writer, "%R: %R\n", key, val)) {
1608             goto error;
1609         }
1610     }
1611 
1612     return 0;
1613 error:
1614     return -1;
1615 }
1616 #endif  /* Py_DEBUG */
1617 
1618 
1619 /////////////////////////////////// Array Node
1620 
1621 
1622 static PyHamtNode *
hamt_node_array_new(Py_ssize_t count)1623 hamt_node_array_new(Py_ssize_t count)
1624 {
1625     Py_ssize_t i;
1626 
1627     PyHamtNode_Array *node = PyObject_GC_New(
1628         PyHamtNode_Array, &_PyHamt_ArrayNode_Type);
1629     if (node == NULL) {
1630         return NULL;
1631     }
1632 
1633     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1634         node->a_array[i] = NULL;
1635     }
1636 
1637     node->a_count = count;
1638 
1639     _PyObject_GC_TRACK(node);
1640     return (PyHamtNode *)node;
1641 }
1642 
1643 static PyHamtNode_Array *
hamt_node_array_clone(PyHamtNode_Array * node)1644 hamt_node_array_clone(PyHamtNode_Array *node)
1645 {
1646     PyHamtNode_Array *clone;
1647     Py_ssize_t i;
1648 
1649     VALIDATE_ARRAY_NODE(node)
1650 
1651     /* Create a new Array node. */
1652     clone = (PyHamtNode_Array *)hamt_node_array_new(node->a_count);
1653     if (clone == NULL) {
1654         return NULL;
1655     }
1656 
1657     /* Copy all elements from the current Array node to the new one. */
1658     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1659         Py_XINCREF(node->a_array[i]);
1660         clone->a_array[i] = node->a_array[i];
1661     }
1662 
1663     VALIDATE_ARRAY_NODE(clone)
1664     return clone;
1665 }
1666 
1667 static PyHamtNode *
hamt_node_array_assoc(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)1668 hamt_node_array_assoc(PyHamtNode_Array *self,
1669                       uint32_t shift, int32_t hash,
1670                       PyObject *key, PyObject *val, int* added_leaf)
1671 {
1672     /* Set a new key to this level (currently a Collision node)
1673        of the tree.
1674 
1675        Array nodes don't store values, they can only point to
1676        other nodes.  They are simple arrays of 32 BaseNode pointers/
1677      */
1678 
1679     uint32_t idx = hamt_mask(hash, shift);
1680     PyHamtNode *node = self->a_array[idx];
1681     PyHamtNode *child_node;
1682     PyHamtNode_Array *new_node;
1683     Py_ssize_t i;
1684 
1685     if (node == NULL) {
1686         /* There's no child node for the given hash.  Create a new
1687            Bitmap node for this key. */
1688 
1689         PyHamtNode_Bitmap *empty = NULL;
1690 
1691         /* Get an empty Bitmap node to work with. */
1692         empty = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(0);
1693         if (empty == NULL) {
1694             return NULL;
1695         }
1696 
1697         /* Set key/val to the newly created empty Bitmap, thus
1698            creating a new Bitmap node with our key/value pair. */
1699         child_node = hamt_node_bitmap_assoc(
1700             empty,
1701             shift + 5, hash, key, val, added_leaf);
1702         Py_DECREF(empty);
1703         if (child_node == NULL) {
1704             return NULL;
1705         }
1706 
1707         /* Create a new Array node. */
1708         new_node = (PyHamtNode_Array *)hamt_node_array_new(self->a_count + 1);
1709         if (new_node == NULL) {
1710             Py_DECREF(child_node);
1711             return NULL;
1712         }
1713 
1714         /* Copy all elements from the current Array node to the
1715            new one. */
1716         for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1717             Py_XINCREF(self->a_array[i]);
1718             new_node->a_array[i] = self->a_array[i];
1719         }
1720 
1721         assert(new_node->a_array[idx] == NULL);
1722         new_node->a_array[idx] = child_node;  /* borrow */
1723         VALIDATE_ARRAY_NODE(new_node)
1724     }
1725     else {
1726         /* There's a child node for the given hash.
1727            Set the key to it./ */
1728         child_node = hamt_node_assoc(
1729             node, shift + 5, hash, key, val, added_leaf);
1730         if (child_node == NULL) {
1731             return NULL;
1732         }
1733         else if (child_node == (PyHamtNode *)self) {
1734             Py_DECREF(child_node);
1735             return (PyHamtNode *)self;
1736         }
1737 
1738         new_node = hamt_node_array_clone(self);
1739         if (new_node == NULL) {
1740             Py_DECREF(child_node);
1741             return NULL;
1742         }
1743 
1744         Py_SETREF(new_node->a_array[idx], child_node);  /* borrow */
1745         VALIDATE_ARRAY_NODE(new_node)
1746     }
1747 
1748     return (PyHamtNode *)new_node;
1749 }
1750 
1751 static hamt_without_t
hamt_node_array_without(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)1752 hamt_node_array_without(PyHamtNode_Array *self,
1753                         uint32_t shift, int32_t hash,
1754                         PyObject *key,
1755                         PyHamtNode **new_node)
1756 {
1757     uint32_t idx = hamt_mask(hash, shift);
1758     PyHamtNode *node = self->a_array[idx];
1759 
1760     if (node == NULL) {
1761         return W_NOT_FOUND;
1762     }
1763 
1764     PyHamtNode *sub_node = NULL;
1765     hamt_without_t res = hamt_node_without(
1766         (PyHamtNode *)node,
1767         shift + 5, hash, key, &sub_node);
1768 
1769     switch (res) {
1770         case W_NOT_FOUND:
1771         case W_ERROR:
1772             assert(sub_node == NULL);
1773             return res;
1774 
1775         case W_NEWNODE: {
1776             /* We need to replace a node at the `idx` index.
1777                Clone this node and replace.
1778             */
1779             assert(sub_node != NULL);
1780 
1781             PyHamtNode_Array *clone = hamt_node_array_clone(self);
1782             if (clone == NULL) {
1783                 Py_DECREF(sub_node);
1784                 return W_ERROR;
1785             }
1786 
1787             Py_SETREF(clone->a_array[idx], sub_node);  /* borrow */
1788             *new_node = (PyHamtNode*)clone;  /* borrow */
1789             return W_NEWNODE;
1790         }
1791 
1792         case W_EMPTY: {
1793             assert(sub_node == NULL);
1794             /* We need to remove a node at the `idx` index.
1795                Calculate the size of the replacement Array node.
1796             */
1797             Py_ssize_t new_count = self->a_count - 1;
1798 
1799             if (new_count == 0) {
1800                 return W_EMPTY;
1801             }
1802 
1803             if (new_count >= 16) {
1804                 /* We convert Bitmap nodes to Array nodes, when a
1805                    Bitmap node needs to store more than 15 key/value
1806                    pairs.  So we will create a new Array node if we
1807                    the number of key/values after deletion is still
1808                    greater than 15.
1809                 */
1810 
1811                 PyHamtNode_Array *new = hamt_node_array_clone(self);
1812                 if (new == NULL) {
1813                     return W_ERROR;
1814                 }
1815                 new->a_count = new_count;
1816                 Py_CLEAR(new->a_array[idx]);
1817 
1818                 *new_node = (PyHamtNode*)new;  /* borrow */
1819                 return W_NEWNODE;
1820             }
1821 
1822             /* New Array node would have less than 16 key/value
1823                pairs.  We need to create a replacement Bitmap node. */
1824 
1825             Py_ssize_t bitmap_size = new_count * 2;
1826             uint32_t bitmap = 0;
1827 
1828             PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)
1829                 hamt_node_bitmap_new(bitmap_size);
1830             if (new == NULL) {
1831                 return W_ERROR;
1832             }
1833 
1834             Py_ssize_t new_i = 0;
1835             for (uint32_t i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1836                 if (i == idx) {
1837                     /* Skip the node we are deleting. */
1838                     continue;
1839                 }
1840 
1841                 PyHamtNode *node = self->a_array[i];
1842                 if (node == NULL) {
1843                     /* Skip any missing nodes. */
1844                     continue;
1845                 }
1846 
1847                 bitmap |= 1U << i;
1848 
1849                 if (IS_BITMAP_NODE(node)) {
1850                     PyHamtNode_Bitmap *child = (PyHamtNode_Bitmap *)node;
1851 
1852                     if (hamt_node_bitmap_count(child) == 1 &&
1853                             child->b_array[0] != NULL)
1854                     {
1855                         /* node is a Bitmap with one key/value pair, just
1856                            merge it into the new Bitmap node we're building.
1857 
1858                            Note that we don't inline Bitmap nodes that
1859                            have a NULL key -- those nodes point to another
1860                            tree level, and we cannot simply move tree levels
1861                            up or down.
1862                         */
1863                         PyObject *key = child->b_array[0];
1864                         PyObject *val = child->b_array[1];
1865 
1866                         Py_INCREF(key);
1867                         new->b_array[new_i] = key;
1868                         Py_INCREF(val);
1869                         new->b_array[new_i + 1] = val;
1870                     }
1871                     else {
1872                         new->b_array[new_i] = NULL;
1873                         Py_INCREF(node);
1874                         new->b_array[new_i + 1] = (PyObject*)node;
1875                     }
1876                 }
1877                 else {
1878 
1879 #ifdef Py_DEBUG
1880                     if (IS_COLLISION_NODE(node)) {
1881                         Py_ssize_t child_count = hamt_node_collision_count(
1882                             (PyHamtNode_Collision*)node);
1883                         assert(child_count > 1);
1884                     }
1885                     else if (IS_ARRAY_NODE(node)) {
1886                         assert(((PyHamtNode_Array*)node)->a_count >= 16);
1887                     }
1888 #endif
1889 
1890                     /* Just copy the node into our new Bitmap */
1891                     new->b_array[new_i] = NULL;
1892                     Py_INCREF(node);
1893                     new->b_array[new_i + 1] = (PyObject*)node;
1894                 }
1895 
1896                 new_i += 2;
1897             }
1898 
1899             new->b_bitmap = bitmap;
1900             *new_node = (PyHamtNode*)new;  /* borrow */
1901             return W_NEWNODE;
1902         }
1903 
1904         default:
1905             Py_UNREACHABLE();
1906     }
1907 }
1908 
1909 static hamt_find_t
hamt_node_array_find(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1910 hamt_node_array_find(PyHamtNode_Array *self,
1911                      uint32_t shift, int32_t hash,
1912                      PyObject *key, PyObject **val)
1913 {
1914     /* Lookup `key` in the Array node `self`.  Set the value
1915        for the found key to 'val'. */
1916 
1917     uint32_t idx = hamt_mask(hash, shift);
1918     PyHamtNode *node;
1919 
1920     node = self->a_array[idx];
1921     if (node == NULL) {
1922         return F_NOT_FOUND;
1923     }
1924 
1925     /* Dispatch to the generic hamt_node_find */
1926     return hamt_node_find(node, shift + 5, hash, key, val);
1927 }
1928 
1929 static int
hamt_node_array_traverse(PyHamtNode_Array * self,visitproc visit,void * arg)1930 hamt_node_array_traverse(PyHamtNode_Array *self,
1931                          visitproc visit, void *arg)
1932 {
1933     /* Array's tp_traverse */
1934 
1935     Py_ssize_t i;
1936 
1937     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1938         Py_VISIT(self->a_array[i]);
1939     }
1940 
1941     return 0;
1942 }
1943 
1944 static void
hamt_node_array_dealloc(PyHamtNode_Array * self)1945 hamt_node_array_dealloc(PyHamtNode_Array *self)
1946 {
1947     /* Array's tp_dealloc */
1948 
1949     Py_ssize_t i;
1950 
1951     PyObject_GC_UnTrack(self);
1952     Py_TRASHCAN_BEGIN(self, hamt_node_array_dealloc)
1953 
1954     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1955         Py_XDECREF(self->a_array[i]);
1956     }
1957 
1958     Py_TYPE(self)->tp_free((PyObject *)self);
1959     Py_TRASHCAN_END
1960 }
1961 
1962 #ifdef Py_DEBUG
1963 static int
hamt_node_array_dump(PyHamtNode_Array * node,_PyUnicodeWriter * writer,int level)1964 hamt_node_array_dump(PyHamtNode_Array *node,
1965                      _PyUnicodeWriter *writer, int level)
1966 {
1967     /* Debug build: __dump__() method implementation for Array nodes. */
1968 
1969     Py_ssize_t i;
1970 
1971     if (_hamt_dump_ident(writer, level + 1)) {
1972         goto error;
1973     }
1974 
1975     if (_hamt_dump_format(writer, "ArrayNode(id=%p):\n", node)) {
1976         goto error;
1977     }
1978 
1979     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1980         if (node->a_array[i] == NULL) {
1981             continue;
1982         }
1983 
1984         if (_hamt_dump_ident(writer, level + 2)) {
1985             goto error;
1986         }
1987 
1988         if (_hamt_dump_format(writer, "%zd::\n", i)) {
1989             goto error;
1990         }
1991 
1992         if (hamt_node_dump(node->a_array[i], writer, level + 1)) {
1993             goto error;
1994         }
1995 
1996         if (_hamt_dump_format(writer, "\n")) {
1997             goto error;
1998         }
1999     }
2000 
2001     return 0;
2002 error:
2003     return -1;
2004 }
2005 #endif  /* Py_DEBUG */
2006 
2007 
2008 /////////////////////////////////// Node Dispatch
2009 
2010 
2011 static PyHamtNode *
hamt_node_assoc(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)2012 hamt_node_assoc(PyHamtNode *node,
2013                 uint32_t shift, int32_t hash,
2014                 PyObject *key, PyObject *val, int* added_leaf)
2015 {
2016     /* Set key/value to the 'node' starting with the given shift/hash.
2017        Return a new node, or the same node if key/value already
2018        set.
2019 
2020        added_leaf will be set to 1 if key/value wasn't in the
2021        tree before.
2022 
2023        This method automatically dispatches to the suitable
2024        hamt_node_{nodetype}_assoc method.
2025     */
2026 
2027     if (IS_BITMAP_NODE(node)) {
2028         return hamt_node_bitmap_assoc(
2029             (PyHamtNode_Bitmap *)node,
2030             shift, hash, key, val, added_leaf);
2031     }
2032     else if (IS_ARRAY_NODE(node)) {
2033         return hamt_node_array_assoc(
2034             (PyHamtNode_Array *)node,
2035             shift, hash, key, val, added_leaf);
2036     }
2037     else {
2038         assert(IS_COLLISION_NODE(node));
2039         return hamt_node_collision_assoc(
2040             (PyHamtNode_Collision *)node,
2041             shift, hash, key, val, added_leaf);
2042     }
2043 }
2044 
2045 static hamt_without_t
hamt_node_without(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)2046 hamt_node_without(PyHamtNode *node,
2047                   uint32_t shift, int32_t hash,
2048                   PyObject *key,
2049                   PyHamtNode **new_node)
2050 {
2051     if (IS_BITMAP_NODE(node)) {
2052         return hamt_node_bitmap_without(
2053             (PyHamtNode_Bitmap *)node,
2054             shift, hash, key,
2055             new_node);
2056     }
2057     else if (IS_ARRAY_NODE(node)) {
2058         return hamt_node_array_without(
2059             (PyHamtNode_Array *)node,
2060             shift, hash, key,
2061             new_node);
2062     }
2063     else {
2064         assert(IS_COLLISION_NODE(node));
2065         return hamt_node_collision_without(
2066             (PyHamtNode_Collision *)node,
2067             shift, hash, key,
2068             new_node);
2069     }
2070 }
2071 
2072 static hamt_find_t
hamt_node_find(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)2073 hamt_node_find(PyHamtNode *node,
2074                uint32_t shift, int32_t hash,
2075                PyObject *key, PyObject **val)
2076 {
2077     /* Find the key in the node starting with the given shift/hash.
2078 
2079        If a value is found, the result will be set to F_FOUND, and
2080        *val will point to the found value object.
2081 
2082        If a value wasn't found, the result will be set to F_NOT_FOUND.
2083 
2084        If an exception occurs during the call, the result will be F_ERROR.
2085 
2086        This method automatically dispatches to the suitable
2087        hamt_node_{nodetype}_find method.
2088     */
2089 
2090     if (IS_BITMAP_NODE(node)) {
2091         return hamt_node_bitmap_find(
2092             (PyHamtNode_Bitmap *)node,
2093             shift, hash, key, val);
2094 
2095     }
2096     else if (IS_ARRAY_NODE(node)) {
2097         return hamt_node_array_find(
2098             (PyHamtNode_Array *)node,
2099             shift, hash, key, val);
2100     }
2101     else {
2102         assert(IS_COLLISION_NODE(node));
2103         return hamt_node_collision_find(
2104             (PyHamtNode_Collision *)node,
2105             shift, hash, key, val);
2106     }
2107 }
2108 
2109 #ifdef Py_DEBUG
2110 static int
hamt_node_dump(PyHamtNode * node,_PyUnicodeWriter * writer,int level)2111 hamt_node_dump(PyHamtNode *node,
2112                _PyUnicodeWriter *writer, int level)
2113 {
2114     /* Debug build: __dump__() method implementation for a node.
2115 
2116        This method automatically dispatches to the suitable
2117        hamt_node_{nodetype})_dump method.
2118     */
2119 
2120     if (IS_BITMAP_NODE(node)) {
2121         return hamt_node_bitmap_dump(
2122             (PyHamtNode_Bitmap *)node, writer, level);
2123     }
2124     else if (IS_ARRAY_NODE(node)) {
2125         return hamt_node_array_dump(
2126             (PyHamtNode_Array *)node, writer, level);
2127     }
2128     else {
2129         assert(IS_COLLISION_NODE(node));
2130         return hamt_node_collision_dump(
2131             (PyHamtNode_Collision *)node, writer, level);
2132     }
2133 }
2134 #endif  /* Py_DEBUG */
2135 
2136 
2137 /////////////////////////////////// Iterators: Machinery
2138 
2139 
2140 static hamt_iter_t
2141 hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val);
2142 
2143 
2144 static void
hamt_iterator_init(PyHamtIteratorState * iter,PyHamtNode * root)2145 hamt_iterator_init(PyHamtIteratorState *iter, PyHamtNode *root)
2146 {
2147     for (uint32_t i = 0; i < _Py_HAMT_MAX_TREE_DEPTH; i++) {
2148         iter->i_nodes[i] = NULL;
2149         iter->i_pos[i] = 0;
2150     }
2151 
2152     iter->i_level = 0;
2153 
2154     /* Note: we don't incref/decref nodes in i_nodes. */
2155     iter->i_nodes[0] = root;
2156 }
2157 
2158 static hamt_iter_t
hamt_iterator_bitmap_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2159 hamt_iterator_bitmap_next(PyHamtIteratorState *iter,
2160                           PyObject **key, PyObject **val)
2161 {
2162     int8_t level = iter->i_level;
2163 
2164     PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)(iter->i_nodes[level]);
2165     Py_ssize_t pos = iter->i_pos[level];
2166 
2167     if (pos + 1 >= Py_SIZE(node)) {
2168 #ifdef Py_DEBUG
2169         assert(iter->i_level >= 0);
2170         iter->i_nodes[iter->i_level] = NULL;
2171 #endif
2172         iter->i_level--;
2173         return hamt_iterator_next(iter, key, val);
2174     }
2175 
2176     if (node->b_array[pos] == NULL) {
2177         iter->i_pos[level] = pos + 2;
2178 
2179         int8_t next_level = level + 1;
2180         assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2181         iter->i_level = next_level;
2182         iter->i_pos[next_level] = 0;
2183         iter->i_nodes[next_level] = (PyHamtNode *)
2184             node->b_array[pos + 1];
2185 
2186         return hamt_iterator_next(iter, key, val);
2187     }
2188 
2189     *key = node->b_array[pos];
2190     *val = node->b_array[pos + 1];
2191     iter->i_pos[level] = pos + 2;
2192     return I_ITEM;
2193 }
2194 
2195 static hamt_iter_t
hamt_iterator_collision_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2196 hamt_iterator_collision_next(PyHamtIteratorState *iter,
2197                              PyObject **key, PyObject **val)
2198 {
2199     int8_t level = iter->i_level;
2200 
2201     PyHamtNode_Collision *node = (PyHamtNode_Collision *)(iter->i_nodes[level]);
2202     Py_ssize_t pos = iter->i_pos[level];
2203 
2204     if (pos + 1 >= Py_SIZE(node)) {
2205 #ifdef Py_DEBUG
2206         assert(iter->i_level >= 0);
2207         iter->i_nodes[iter->i_level] = NULL;
2208 #endif
2209         iter->i_level--;
2210         return hamt_iterator_next(iter, key, val);
2211     }
2212 
2213     *key = node->c_array[pos];
2214     *val = node->c_array[pos + 1];
2215     iter->i_pos[level] = pos + 2;
2216     return I_ITEM;
2217 }
2218 
2219 static hamt_iter_t
hamt_iterator_array_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2220 hamt_iterator_array_next(PyHamtIteratorState *iter,
2221                          PyObject **key, PyObject **val)
2222 {
2223     int8_t level = iter->i_level;
2224 
2225     PyHamtNode_Array *node = (PyHamtNode_Array *)(iter->i_nodes[level]);
2226     Py_ssize_t pos = iter->i_pos[level];
2227 
2228     if (pos >= HAMT_ARRAY_NODE_SIZE) {
2229 #ifdef Py_DEBUG
2230         assert(iter->i_level >= 0);
2231         iter->i_nodes[iter->i_level] = NULL;
2232 #endif
2233         iter->i_level--;
2234         return hamt_iterator_next(iter, key, val);
2235     }
2236 
2237     for (Py_ssize_t i = pos; i < HAMT_ARRAY_NODE_SIZE; i++) {
2238         if (node->a_array[i] != NULL) {
2239             iter->i_pos[level] = i + 1;
2240 
2241             int8_t next_level = level + 1;
2242             assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2243             iter->i_pos[next_level] = 0;
2244             iter->i_nodes[next_level] = node->a_array[i];
2245             iter->i_level = next_level;
2246 
2247             return hamt_iterator_next(iter, key, val);
2248         }
2249     }
2250 
2251 #ifdef Py_DEBUG
2252         assert(iter->i_level >= 0);
2253         iter->i_nodes[iter->i_level] = NULL;
2254 #endif
2255 
2256     iter->i_level--;
2257     return hamt_iterator_next(iter, key, val);
2258 }
2259 
2260 static hamt_iter_t
hamt_iterator_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2261 hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val)
2262 {
2263     if (iter->i_level < 0) {
2264         return I_END;
2265     }
2266 
2267     assert(iter->i_level < _Py_HAMT_MAX_TREE_DEPTH);
2268 
2269     PyHamtNode *current = iter->i_nodes[iter->i_level];
2270 
2271     if (IS_BITMAP_NODE(current)) {
2272         return hamt_iterator_bitmap_next(iter, key, val);
2273     }
2274     else if (IS_ARRAY_NODE(current)) {
2275         return hamt_iterator_array_next(iter, key, val);
2276     }
2277     else {
2278         assert(IS_COLLISION_NODE(current));
2279         return hamt_iterator_collision_next(iter, key, val);
2280     }
2281 }
2282 
2283 
2284 /////////////////////////////////// HAMT high-level functions
2285 
2286 
2287 PyHamtObject *
_PyHamt_Assoc(PyHamtObject * o,PyObject * key,PyObject * val)2288 _PyHamt_Assoc(PyHamtObject *o, PyObject *key, PyObject *val)
2289 {
2290     int32_t key_hash;
2291     int added_leaf = 0;
2292     PyHamtNode *new_root;
2293     PyHamtObject *new_o;
2294 
2295     key_hash = hamt_hash(key);
2296     if (key_hash == -1) {
2297         return NULL;
2298     }
2299 
2300     new_root = hamt_node_assoc(
2301         (PyHamtNode *)(o->h_root),
2302         0, key_hash, key, val, &added_leaf);
2303     if (new_root == NULL) {
2304         return NULL;
2305     }
2306 
2307     if (new_root == o->h_root) {
2308         Py_DECREF(new_root);
2309         Py_INCREF(o);
2310         return o;
2311     }
2312 
2313     new_o = hamt_alloc();
2314     if (new_o == NULL) {
2315         Py_DECREF(new_root);
2316         return NULL;
2317     }
2318 
2319     new_o->h_root = new_root;  /* borrow */
2320     new_o->h_count = added_leaf ? o->h_count + 1 : o->h_count;
2321 
2322     return new_o;
2323 }
2324 
2325 PyHamtObject *
_PyHamt_Without(PyHamtObject * o,PyObject * key)2326 _PyHamt_Without(PyHamtObject *o, PyObject *key)
2327 {
2328     int32_t key_hash = hamt_hash(key);
2329     if (key_hash == -1) {
2330         return NULL;
2331     }
2332 
2333     PyHamtNode *new_root = NULL;
2334 
2335     hamt_without_t res = hamt_node_without(
2336         (PyHamtNode *)(o->h_root),
2337         0, key_hash, key,
2338         &new_root);
2339 
2340     switch (res) {
2341         case W_ERROR:
2342             return NULL;
2343         case W_EMPTY:
2344             return _PyHamt_New();
2345         case W_NOT_FOUND:
2346             Py_INCREF(o);
2347             return o;
2348         case W_NEWNODE: {
2349             assert(new_root != NULL);
2350 
2351             PyHamtObject *new_o = hamt_alloc();
2352             if (new_o == NULL) {
2353                 Py_DECREF(new_root);
2354                 return NULL;
2355             }
2356 
2357             new_o->h_root = new_root;  /* borrow */
2358             new_o->h_count = o->h_count - 1;
2359             assert(new_o->h_count >= 0);
2360             return new_o;
2361         }
2362         default:
2363             Py_UNREACHABLE();
2364     }
2365 }
2366 
2367 static hamt_find_t
hamt_find(PyHamtObject * o,PyObject * key,PyObject ** val)2368 hamt_find(PyHamtObject *o, PyObject *key, PyObject **val)
2369 {
2370     if (o->h_count == 0) {
2371         return F_NOT_FOUND;
2372     }
2373 
2374     int32_t key_hash = hamt_hash(key);
2375     if (key_hash == -1) {
2376         return F_ERROR;
2377     }
2378 
2379     return hamt_node_find(o->h_root, 0, key_hash, key, val);
2380 }
2381 
2382 
2383 int
_PyHamt_Find(PyHamtObject * o,PyObject * key,PyObject ** val)2384 _PyHamt_Find(PyHamtObject *o, PyObject *key, PyObject **val)
2385 {
2386     hamt_find_t res = hamt_find(o, key, val);
2387     switch (res) {
2388         case F_ERROR:
2389             return -1;
2390         case F_NOT_FOUND:
2391             return 0;
2392         case F_FOUND:
2393             return 1;
2394         default:
2395             Py_UNREACHABLE();
2396     }
2397 }
2398 
2399 
2400 int
_PyHamt_Eq(PyHamtObject * v,PyHamtObject * w)2401 _PyHamt_Eq(PyHamtObject *v, PyHamtObject *w)
2402 {
2403     if (v == w) {
2404         return 1;
2405     }
2406 
2407     if (v->h_count != w->h_count) {
2408         return 0;
2409     }
2410 
2411     PyHamtIteratorState iter;
2412     hamt_iter_t iter_res;
2413     hamt_find_t find_res;
2414     PyObject *v_key;
2415     PyObject *v_val;
2416     PyObject *w_val;
2417 
2418     hamt_iterator_init(&iter, v->h_root);
2419 
2420     do {
2421         iter_res = hamt_iterator_next(&iter, &v_key, &v_val);
2422         if (iter_res == I_ITEM) {
2423             find_res = hamt_find(w, v_key, &w_val);
2424             switch (find_res) {
2425                 case F_ERROR:
2426                     return -1;
2427 
2428                 case F_NOT_FOUND:
2429                     return 0;
2430 
2431                 case F_FOUND: {
2432                     int cmp = PyObject_RichCompareBool(v_val, w_val, Py_EQ);
2433                     if (cmp < 0) {
2434                         return -1;
2435                     }
2436                     if (cmp == 0) {
2437                         return 0;
2438                     }
2439                 }
2440             }
2441         }
2442     } while (iter_res != I_END);
2443 
2444     return 1;
2445 }
2446 
2447 Py_ssize_t
_PyHamt_Len(PyHamtObject * o)2448 _PyHamt_Len(PyHamtObject *o)
2449 {
2450     return o->h_count;
2451 }
2452 
2453 static PyHamtObject *
hamt_alloc(void)2454 hamt_alloc(void)
2455 {
2456     PyHamtObject *o;
2457     o = PyObject_GC_New(PyHamtObject, &_PyHamt_Type);
2458     if (o == NULL) {
2459         return NULL;
2460     }
2461     o->h_count = 0;
2462     o->h_root = NULL;
2463     o->h_weakreflist = NULL;
2464     PyObject_GC_Track(o);
2465     return o;
2466 }
2467 
2468 PyHamtObject *
_PyHamt_New(void)2469 _PyHamt_New(void)
2470 {
2471     if (_empty_hamt != NULL) {
2472         /* HAMT is an immutable object so we can easily cache an
2473            empty instance. */
2474         Py_INCREF(_empty_hamt);
2475         return _empty_hamt;
2476     }
2477 
2478     PyHamtObject *o = hamt_alloc();
2479     if (o == NULL) {
2480         return NULL;
2481     }
2482 
2483     o->h_root = hamt_node_bitmap_new(0);
2484     if (o->h_root == NULL) {
2485         Py_DECREF(o);
2486         return NULL;
2487     }
2488 
2489     o->h_count = 0;
2490 
2491     if (_empty_hamt == NULL) {
2492         Py_INCREF(o);
2493         _empty_hamt = o;
2494     }
2495 
2496     return o;
2497 }
2498 
2499 #ifdef Py_DEBUG
2500 static PyObject *
hamt_dump(PyHamtObject * self)2501 hamt_dump(PyHamtObject *self)
2502 {
2503     _PyUnicodeWriter writer;
2504 
2505     _PyUnicodeWriter_Init(&writer);
2506 
2507     if (_hamt_dump_format(&writer, "HAMT(len=%zd):\n", self->h_count)) {
2508         goto error;
2509     }
2510 
2511     if (hamt_node_dump(self->h_root, &writer, 0)) {
2512         goto error;
2513     }
2514 
2515     return _PyUnicodeWriter_Finish(&writer);
2516 
2517 error:
2518     _PyUnicodeWriter_Dealloc(&writer);
2519     return NULL;
2520 }
2521 #endif  /* Py_DEBUG */
2522 
2523 
2524 /////////////////////////////////// Iterators: Shared Iterator Implementation
2525 
2526 
2527 static int
hamt_baseiter_tp_clear(PyHamtIterator * it)2528 hamt_baseiter_tp_clear(PyHamtIterator *it)
2529 {
2530     Py_CLEAR(it->hi_obj);
2531     return 0;
2532 }
2533 
2534 static void
hamt_baseiter_tp_dealloc(PyHamtIterator * it)2535 hamt_baseiter_tp_dealloc(PyHamtIterator *it)
2536 {
2537     PyObject_GC_UnTrack(it);
2538     (void)hamt_baseiter_tp_clear(it);
2539     PyObject_GC_Del(it);
2540 }
2541 
2542 static int
hamt_baseiter_tp_traverse(PyHamtIterator * it,visitproc visit,void * arg)2543 hamt_baseiter_tp_traverse(PyHamtIterator *it, visitproc visit, void *arg)
2544 {
2545     Py_VISIT(it->hi_obj);
2546     return 0;
2547 }
2548 
2549 static PyObject *
hamt_baseiter_tp_iternext(PyHamtIterator * it)2550 hamt_baseiter_tp_iternext(PyHamtIterator *it)
2551 {
2552     PyObject *key;
2553     PyObject *val;
2554     hamt_iter_t res = hamt_iterator_next(&it->hi_iter, &key, &val);
2555 
2556     switch (res) {
2557         case I_END:
2558             PyErr_SetNone(PyExc_StopIteration);
2559             return NULL;
2560 
2561         case I_ITEM: {
2562             return (*(it->hi_yield))(key, val);
2563         }
2564 
2565         default: {
2566             Py_UNREACHABLE();
2567         }
2568     }
2569 }
2570 
2571 static Py_ssize_t
hamt_baseiter_tp_len(PyHamtIterator * it)2572 hamt_baseiter_tp_len(PyHamtIterator *it)
2573 {
2574     return it->hi_obj->h_count;
2575 }
2576 
2577 static PyMappingMethods PyHamtIterator_as_mapping = {
2578     (lenfunc)hamt_baseiter_tp_len,
2579 };
2580 
2581 static PyObject *
hamt_baseiter_new(PyTypeObject * type,binaryfunc yield,PyHamtObject * o)2582 hamt_baseiter_new(PyTypeObject *type, binaryfunc yield, PyHamtObject *o)
2583 {
2584     PyHamtIterator *it = PyObject_GC_New(PyHamtIterator, type);
2585     if (it == NULL) {
2586         return NULL;
2587     }
2588 
2589     Py_INCREF(o);
2590     it->hi_obj = o;
2591     it->hi_yield = yield;
2592 
2593     hamt_iterator_init(&it->hi_iter, o->h_root);
2594 
2595     return (PyObject*)it;
2596 }
2597 
2598 #define ITERATOR_TYPE_SHARED_SLOTS                              \
2599     .tp_basicsize = sizeof(PyHamtIterator),                     \
2600     .tp_itemsize = 0,                                           \
2601     .tp_as_mapping = &PyHamtIterator_as_mapping,                \
2602     .tp_dealloc = (destructor)hamt_baseiter_tp_dealloc,         \
2603     .tp_getattro = PyObject_GenericGetAttr,                     \
2604     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,        \
2605     .tp_traverse = (traverseproc)hamt_baseiter_tp_traverse,     \
2606     .tp_clear = (inquiry)hamt_baseiter_tp_clear,                \
2607     .tp_iter = PyObject_SelfIter,                               \
2608     .tp_iternext = (iternextfunc)hamt_baseiter_tp_iternext,
2609 
2610 
2611 /////////////////////////////////// _PyHamtItems_Type
2612 
2613 
2614 PyTypeObject _PyHamtItems_Type = {
2615     PyVarObject_HEAD_INIT(NULL, 0)
2616     "items",
2617     ITERATOR_TYPE_SHARED_SLOTS
2618 };
2619 
2620 static PyObject *
hamt_iter_yield_items(PyObject * key,PyObject * val)2621 hamt_iter_yield_items(PyObject *key, PyObject *val)
2622 {
2623     return PyTuple_Pack(2, key, val);
2624 }
2625 
2626 PyObject *
_PyHamt_NewIterItems(PyHamtObject * o)2627 _PyHamt_NewIterItems(PyHamtObject *o)
2628 {
2629     return hamt_baseiter_new(
2630         &_PyHamtItems_Type, hamt_iter_yield_items, o);
2631 }
2632 
2633 
2634 /////////////////////////////////// _PyHamtKeys_Type
2635 
2636 
2637 PyTypeObject _PyHamtKeys_Type = {
2638     PyVarObject_HEAD_INIT(NULL, 0)
2639     "keys",
2640     ITERATOR_TYPE_SHARED_SLOTS
2641 };
2642 
2643 static PyObject *
hamt_iter_yield_keys(PyObject * key,PyObject * val)2644 hamt_iter_yield_keys(PyObject *key, PyObject *val)
2645 {
2646     Py_INCREF(key);
2647     return key;
2648 }
2649 
2650 PyObject *
_PyHamt_NewIterKeys(PyHamtObject * o)2651 _PyHamt_NewIterKeys(PyHamtObject *o)
2652 {
2653     return hamt_baseiter_new(
2654         &_PyHamtKeys_Type, hamt_iter_yield_keys, o);
2655 }
2656 
2657 
2658 /////////////////////////////////// _PyHamtValues_Type
2659 
2660 
2661 PyTypeObject _PyHamtValues_Type = {
2662     PyVarObject_HEAD_INIT(NULL, 0)
2663     "values",
2664     ITERATOR_TYPE_SHARED_SLOTS
2665 };
2666 
2667 static PyObject *
hamt_iter_yield_values(PyObject * key,PyObject * val)2668 hamt_iter_yield_values(PyObject *key, PyObject *val)
2669 {
2670     Py_INCREF(val);
2671     return val;
2672 }
2673 
2674 PyObject *
_PyHamt_NewIterValues(PyHamtObject * o)2675 _PyHamt_NewIterValues(PyHamtObject *o)
2676 {
2677     return hamt_baseiter_new(
2678         &_PyHamtValues_Type, hamt_iter_yield_values, o);
2679 }
2680 
2681 
2682 /////////////////////////////////// _PyHamt_Type
2683 
2684 
2685 #ifdef Py_DEBUG
2686 static PyObject *
2687 hamt_dump(PyHamtObject *self);
2688 #endif
2689 
2690 
2691 static PyObject *
hamt_tp_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2692 hamt_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2693 {
2694     return (PyObject*)_PyHamt_New();
2695 }
2696 
2697 static int
hamt_tp_clear(PyHamtObject * self)2698 hamt_tp_clear(PyHamtObject *self)
2699 {
2700     Py_CLEAR(self->h_root);
2701     return 0;
2702 }
2703 
2704 
2705 static int
hamt_tp_traverse(PyHamtObject * self,visitproc visit,void * arg)2706 hamt_tp_traverse(PyHamtObject *self, visitproc visit, void *arg)
2707 {
2708     Py_VISIT(self->h_root);
2709     return 0;
2710 }
2711 
2712 static void
hamt_tp_dealloc(PyHamtObject * self)2713 hamt_tp_dealloc(PyHamtObject *self)
2714 {
2715     PyObject_GC_UnTrack(self);
2716     if (self->h_weakreflist != NULL) {
2717         PyObject_ClearWeakRefs((PyObject*)self);
2718     }
2719     (void)hamt_tp_clear(self);
2720     Py_TYPE(self)->tp_free(self);
2721 }
2722 
2723 
2724 static PyObject *
hamt_tp_richcompare(PyObject * v,PyObject * w,int op)2725 hamt_tp_richcompare(PyObject *v, PyObject *w, int op)
2726 {
2727     if (!PyHamt_Check(v) || !PyHamt_Check(w) || (op != Py_EQ && op != Py_NE)) {
2728         Py_RETURN_NOTIMPLEMENTED;
2729     }
2730 
2731     int res = _PyHamt_Eq((PyHamtObject *)v, (PyHamtObject *)w);
2732     if (res < 0) {
2733         return NULL;
2734     }
2735 
2736     if (op == Py_NE) {
2737         res = !res;
2738     }
2739 
2740     if (res) {
2741         Py_RETURN_TRUE;
2742     }
2743     else {
2744         Py_RETURN_FALSE;
2745     }
2746 }
2747 
2748 static int
hamt_tp_contains(PyHamtObject * self,PyObject * key)2749 hamt_tp_contains(PyHamtObject *self, PyObject *key)
2750 {
2751     PyObject *val;
2752     return _PyHamt_Find(self, key, &val);
2753 }
2754 
2755 static PyObject *
hamt_tp_subscript(PyHamtObject * self,PyObject * key)2756 hamt_tp_subscript(PyHamtObject *self, PyObject *key)
2757 {
2758     PyObject *val;
2759     hamt_find_t res = hamt_find(self, key, &val);
2760     switch (res) {
2761         case F_ERROR:
2762             return NULL;
2763         case F_FOUND:
2764             Py_INCREF(val);
2765             return val;
2766         case F_NOT_FOUND:
2767             PyErr_SetObject(PyExc_KeyError, key);
2768             return NULL;
2769         default:
2770             Py_UNREACHABLE();
2771     }
2772 }
2773 
2774 static Py_ssize_t
hamt_tp_len(PyHamtObject * self)2775 hamt_tp_len(PyHamtObject *self)
2776 {
2777     return _PyHamt_Len(self);
2778 }
2779 
2780 static PyObject *
hamt_tp_iter(PyHamtObject * self)2781 hamt_tp_iter(PyHamtObject *self)
2782 {
2783     return _PyHamt_NewIterKeys(self);
2784 }
2785 
2786 static PyObject *
hamt_py_set(PyHamtObject * self,PyObject * args)2787 hamt_py_set(PyHamtObject *self, PyObject *args)
2788 {
2789     PyObject *key;
2790     PyObject *val;
2791 
2792     if (!PyArg_UnpackTuple(args, "set", 2, 2, &key, &val)) {
2793         return NULL;
2794     }
2795 
2796     return (PyObject *)_PyHamt_Assoc(self, key, val);
2797 }
2798 
2799 static PyObject *
hamt_py_get(PyHamtObject * self,PyObject * args)2800 hamt_py_get(PyHamtObject *self, PyObject *args)
2801 {
2802     PyObject *key;
2803     PyObject *def = NULL;
2804 
2805     if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &def)) {
2806         return NULL;
2807     }
2808 
2809     PyObject *val = NULL;
2810     hamt_find_t res = hamt_find(self, key, &val);
2811     switch (res) {
2812         case F_ERROR:
2813             return NULL;
2814         case F_FOUND:
2815             Py_INCREF(val);
2816             return val;
2817         case F_NOT_FOUND:
2818             if (def == NULL) {
2819                 Py_RETURN_NONE;
2820             }
2821             Py_INCREF(def);
2822             return def;
2823         default:
2824             Py_UNREACHABLE();
2825     }
2826 }
2827 
2828 static PyObject *
hamt_py_delete(PyHamtObject * self,PyObject * key)2829 hamt_py_delete(PyHamtObject *self, PyObject *key)
2830 {
2831     return (PyObject *)_PyHamt_Without(self, key);
2832 }
2833 
2834 static PyObject *
hamt_py_items(PyHamtObject * self,PyObject * args)2835 hamt_py_items(PyHamtObject *self, PyObject *args)
2836 {
2837     return _PyHamt_NewIterItems(self);
2838 }
2839 
2840 static PyObject *
hamt_py_values(PyHamtObject * self,PyObject * args)2841 hamt_py_values(PyHamtObject *self, PyObject *args)
2842 {
2843     return _PyHamt_NewIterValues(self);
2844 }
2845 
2846 static PyObject *
hamt_py_keys(PyHamtObject * self,PyObject * args)2847 hamt_py_keys(PyHamtObject *self, PyObject *args)
2848 {
2849     return _PyHamt_NewIterKeys(self);
2850 }
2851 
2852 #ifdef Py_DEBUG
2853 static PyObject *
hamt_py_dump(PyHamtObject * self,PyObject * args)2854 hamt_py_dump(PyHamtObject *self, PyObject *args)
2855 {
2856     return hamt_dump(self);
2857 }
2858 #endif
2859 
2860 
2861 static PyMethodDef PyHamt_methods[] = {
2862     {"set", (PyCFunction)hamt_py_set, METH_VARARGS, NULL},
2863     {"get", (PyCFunction)hamt_py_get, METH_VARARGS, NULL},
2864     {"delete", (PyCFunction)hamt_py_delete, METH_O, NULL},
2865     {"items", (PyCFunction)hamt_py_items, METH_NOARGS, NULL},
2866     {"keys", (PyCFunction)hamt_py_keys, METH_NOARGS, NULL},
2867     {"values", (PyCFunction)hamt_py_values, METH_NOARGS, NULL},
2868 #ifdef Py_DEBUG
2869     {"__dump__", (PyCFunction)hamt_py_dump, METH_NOARGS, NULL},
2870 #endif
2871     {NULL, NULL}
2872 };
2873 
2874 static PySequenceMethods PyHamt_as_sequence = {
2875     0,                                /* sq_length */
2876     0,                                /* sq_concat */
2877     0,                                /* sq_repeat */
2878     0,                                /* sq_item */
2879     0,                                /* sq_slice */
2880     0,                                /* sq_ass_item */
2881     0,                                /* sq_ass_slice */
2882     (objobjproc)hamt_tp_contains,     /* sq_contains */
2883     0,                                /* sq_inplace_concat */
2884     0,                                /* sq_inplace_repeat */
2885 };
2886 
2887 static PyMappingMethods PyHamt_as_mapping = {
2888     (lenfunc)hamt_tp_len,             /* mp_length */
2889     (binaryfunc)hamt_tp_subscript,    /* mp_subscript */
2890 };
2891 
2892 PyTypeObject _PyHamt_Type = {
2893     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2894     "hamt",
2895     sizeof(PyHamtObject),
2896     .tp_methods = PyHamt_methods,
2897     .tp_as_mapping = &PyHamt_as_mapping,
2898     .tp_as_sequence = &PyHamt_as_sequence,
2899     .tp_iter = (getiterfunc)hamt_tp_iter,
2900     .tp_dealloc = (destructor)hamt_tp_dealloc,
2901     .tp_getattro = PyObject_GenericGetAttr,
2902     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2903     .tp_richcompare = hamt_tp_richcompare,
2904     .tp_traverse = (traverseproc)hamt_tp_traverse,
2905     .tp_clear = (inquiry)hamt_tp_clear,
2906     .tp_new = hamt_tp_new,
2907     .tp_weaklistoffset = offsetof(PyHamtObject, h_weakreflist),
2908     .tp_hash = PyObject_HashNotImplemented,
2909 };
2910 
2911 
2912 /////////////////////////////////// Tree Node Types
2913 
2914 
2915 PyTypeObject _PyHamt_ArrayNode_Type = {
2916     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2917     "hamt_array_node",
2918     sizeof(PyHamtNode_Array),
2919     0,
2920     .tp_dealloc = (destructor)hamt_node_array_dealloc,
2921     .tp_getattro = PyObject_GenericGetAttr,
2922     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2923     .tp_traverse = (traverseproc)hamt_node_array_traverse,
2924     .tp_free = PyObject_GC_Del,
2925     .tp_hash = PyObject_HashNotImplemented,
2926 };
2927 
2928 PyTypeObject _PyHamt_BitmapNode_Type = {
2929     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2930     "hamt_bitmap_node",
2931     sizeof(PyHamtNode_Bitmap) - sizeof(PyObject *),
2932     sizeof(PyObject *),
2933     .tp_dealloc = (destructor)hamt_node_bitmap_dealloc,
2934     .tp_getattro = PyObject_GenericGetAttr,
2935     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2936     .tp_traverse = (traverseproc)hamt_node_bitmap_traverse,
2937     .tp_free = PyObject_GC_Del,
2938     .tp_hash = PyObject_HashNotImplemented,
2939 };
2940 
2941 PyTypeObject _PyHamt_CollisionNode_Type = {
2942     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2943     "hamt_collision_node",
2944     sizeof(PyHamtNode_Collision) - sizeof(PyObject *),
2945     sizeof(PyObject *),
2946     .tp_dealloc = (destructor)hamt_node_collision_dealloc,
2947     .tp_getattro = PyObject_GenericGetAttr,
2948     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2949     .tp_traverse = (traverseproc)hamt_node_collision_traverse,
2950     .tp_free = PyObject_GC_Del,
2951     .tp_hash = PyObject_HashNotImplemented,
2952 };
2953 
2954 
2955 int
_PyHamt_Init(void)2956 _PyHamt_Init(void)
2957 {
2958     if ((PyType_Ready(&_PyHamt_Type) < 0) ||
2959         (PyType_Ready(&_PyHamt_ArrayNode_Type) < 0) ||
2960         (PyType_Ready(&_PyHamt_BitmapNode_Type) < 0) ||
2961         (PyType_Ready(&_PyHamt_CollisionNode_Type) < 0) ||
2962         (PyType_Ready(&_PyHamtKeys_Type) < 0) ||
2963         (PyType_Ready(&_PyHamtValues_Type) < 0) ||
2964         (PyType_Ready(&_PyHamtItems_Type) < 0))
2965     {
2966         return 0;
2967     }
2968 
2969     return 1;
2970 }
2971 
2972 void
_PyHamt_Fini(void)2973 _PyHamt_Fini(void)
2974 {
2975     Py_CLEAR(_empty_hamt);
2976     Py_CLEAR(_empty_bitmap_node);
2977 }
2978