• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 
3 #include "structmember.h"
4 #include "internal/pystate.h"
5 #include "internal/hamt.h"
6 
7 /*
8 This file provides an implemention of an immutable mapping using the
9 Hash Array Mapped Trie (or HAMT) datastructure.
10 
11 This design allows to have:
12 
13 1. Efficient copy: immutable mappings can be copied by reference,
14    making it an O(1) operation.
15 
16 2. Efficient mutations: due to structural sharing, only a portion of
17    the trie needs to be copied when the collection is mutated.  The
18    cost of set/delete operations is O(log N).
19 
20 3. Efficient lookups: O(log N).
21 
22 (where N is number of key/value items in the immutable mapping.)
23 
24 
25 HAMT
26 ====
27 
28 The core idea of HAMT is that the shape of the trie is encoded into the
29 hashes of keys.
30 
31 Say we want to store a K/V pair in our mapping.  First, we calculate the
32 hash of K, let's say it's 19830128, or in binary:
33 
34     0b1001011101001010101110000 = 19830128
35 
36 Now let's partition this bit representation of the hash into blocks of
37 5 bits each:
38 
39     0b00_00000_10010_11101_00101_01011_10000 = 19830128
40           (6)   (5)   (4)   (3)   (2)   (1)
41 
42 Each block of 5 bits represents a number between 0 and 31.  So if we have
43 a tree that consists of nodes, each of which is an array of 32 pointers,
44 those 5-bit blocks will encode a position on a single tree level.
45 
46 For example, storing the key K with hash 19830128, results in the following
47 tree structure:
48 
49                      (array of 32 pointers)
50                      +---+ -- +----+----+----+ -- +----+
51   root node          | 0 | .. | 15 | 16 | 17 | .. | 31 |   0b10000 = 16 (1)
52   (level 1)          +---+ -- +----+----+----+ -- +----+
53                                       |
54                      +---+ -- +----+----+----+ -- +----+
55   a 2nd level node   | 0 | .. | 10 | 11 | 12 | .. | 31 |   0b01011 = 11 (2)
56                      +---+ -- +----+----+----+ -- +----+
57                                       |
58                      +---+ -- +----+----+----+ -- +----+
59   a 3rd level node   | 0 | .. | 04 | 05 | 06 | .. | 31 |   0b00101 = 5  (3)
60                      +---+ -- +----+----+----+ -- +----+
61                                       |
62                      +---+ -- +----+----+----+----+
63   a 4th level node   | 0 | .. | 04 | 29 | 30 | 31 |        0b11101 = 29 (4)
64                      +---+ -- +----+----+----+----+
65                                       |
66                      +---+ -- +----+----+----+ -- +----+
67   a 5th level node   | 0 | .. | 17 | 18 | 19 | .. | 31 |   0b10010 = 18 (5)
68                      +---+ -- +----+----+----+ -- +----+
69                                       |
70                        +--------------+
71                        |
72                      +---+ -- +----+----+----+ -- +----+
73   a 6th level node   | 0 | .. | 15 | 16 | 17 | .. | 31 |   0b00000 = 0  (6)
74                      +---+ -- +----+----+----+ -- +----+
75                        |
76                        V -- our value (or collision)
77 
78 To rehash: for a K/V pair, the hash of K encodes where in the tree V will
79 be stored.
80 
81 To optimize memory footprint and handle hash collisions, our implementation
82 uses three different types of nodes:
83 
84  * A Bitmap node;
85  * An Array node;
86  * A Collision node.
87 
88 Because we implement an immutable dictionary, our nodes are also
89 immutable.  Therefore, when we need to modify a node, we copy it, and
90 do that modification to the copy.
91 
92 
93 Array Nodes
94 -----------
95 
96 These nodes are very simple.  Essentially they are arrays of 32 pointers
97 we used to illustrate the high-level idea in the previous section.
98 
99 We use Array nodes only when we need to store more than 16 pointers
100 in a single node.
101 
102 Array nodes do not store key objects or value objects.  They are used
103 only as an indirection level - their pointers point to other nodes in
104 the tree.
105 
106 
107 Bitmap Node
108 -----------
109 
110 Allocating a new 32-pointers array for every node of our tree would be
111 very expensive.  Unless we store millions of keys, most of tree nodes would
112 be very sparse.
113 
114 When we have less than 16 elements in a node, we don't want to use the
115 Array node, that would mean that we waste a lot of memory.  Instead,
116 we can use bitmap compression and can have just as many pointers
117 as we need!
118 
119 Bitmap nodes consist of two fields:
120 
121 1. An array of pointers.  If a Bitmap node holds N elements, the
122    array will be of N pointers.
123 
124 2. A 32bit integer -- a bitmap field.  If an N-th bit is set in the
125    bitmap, it means that the node has an N-th element.
126 
127 For example, say we need to store a 3 elements sparse array:
128 
129    +---+  --  +---+  --  +----+  --  +----+
130    | 0 |  ..  | 4 |  ..  | 11 |  ..  | 17 |
131    +---+  --  +---+  --  +----+  --  +----+
132                 |          |           |
133                 o1         o2          o3
134 
135 We allocate a three-pointer Bitmap node.  Its bitmap field will be
136 then set to:
137 
138    0b_00100_00010_00000_10000 == (1 << 17) | (1 << 11) | (1 << 4)
139 
140 To check if our Bitmap node has an I-th element we can do:
141 
142    bitmap & (1 << I)
143 
144 
145 And here's a formula to calculate a position in our pointer array
146 which would correspond to an I-th element:
147 
148    popcount(bitmap & ((1 << I) - 1))
149 
150 
151 Let's break it down:
152 
153  * `popcount` is a function that returns a number of bits set to 1;
154 
155  * `((1 << I) - 1)` is a mask to filter the bitmask to contain bits
156    set to the *right* of our bit.
157 
158 
159 So for our 17, 11, and 4 indexes:
160 
161  * bitmap & ((1 << 17) - 1) == 0b100000010000 => 2 bits are set => index is 2.
162 
163  * bitmap & ((1 << 11) - 1) == 0b10000 => 1 bit is set => index is 1.
164 
165  * bitmap & ((1 << 4) - 1) == 0b0 => 0 bits are set => index is 0.
166 
167 
168 To conclude: Bitmap nodes are just like Array nodes -- they can store
169 a number of pointers, but use bitmap compression to eliminate unused
170 pointers.
171 
172 
173 Bitmap nodes have two pointers for each item:
174 
175   +----+----+----+----+  --  +----+----+
176   | k1 | v1 | k2 | v2 |  ..  | kN | vN |
177   +----+----+----+----+  --  +----+----+
178 
179 When kI == NULL, vI points to another tree level.
180 
181 When kI != NULL, the actual key object is stored in kI, and its
182 value is stored in vI.
183 
184 
185 Collision Nodes
186 ---------------
187 
188 Collision nodes are simple arrays of pointers -- two pointers per
189 key/value.  When there's a hash collision, say for k1/v1 and k2/v2
190 we have `hash(k1)==hash(k2)`.  Then our collision node will be:
191 
192   +----+----+----+----+
193   | k1 | v1 | k2 | v2 |
194   +----+----+----+----+
195 
196 
197 Tree Structure
198 --------------
199 
200 All nodes are PyObjects.
201 
202 The `PyHamtObject` object has a pointer to the root node (h_root),
203 and has a length field (h_count).
204 
205 High-level functions accept a PyHamtObject object and dispatch to
206 lower-level functions depending on what kind of node h_root points to.
207 
208 
209 Operations
210 ==========
211 
212 There are three fundamental operations on an immutable dictionary:
213 
214 1. "o.assoc(k, v)" will return a new immutable dictionary, that will be
215    a copy of "o", but with the "k/v" item set.
216 
217    Functions in this file:
218 
219         hamt_node_assoc, hamt_node_bitmap_assoc,
220         hamt_node_array_assoc, hamt_node_collision_assoc
221 
222    `hamt_node_assoc` function accepts a node object, and calls
223    other functions depending on its actual type.
224 
225 2. "o.find(k)" will lookup key "k" in "o".
226 
227    Functions:
228 
229         hamt_node_find, hamt_node_bitmap_find,
230         hamt_node_array_find, hamt_node_collision_find
231 
232 3. "o.without(k)" will return a new immutable dictionary, that will be
233    a copy of "o", buth without the "k" key.
234 
235    Functions:
236 
237         hamt_node_without, hamt_node_bitmap_without,
238         hamt_node_array_without, hamt_node_collision_without
239 
240 
241 Further Reading
242 ===============
243 
244 1. http://blog.higher-order.net/2009/09/08/understanding-clojures-persistenthashmap-deftwice.html
245 
246 2. http://blog.higher-order.net/2010/08/16/assoc-and-clojures-persistenthashmap-part-ii.html
247 
248 3. Clojure's PersistentHashMap implementation:
249    https://github.com/clojure/clojure/blob/master/src/jvm/clojure/lang/PersistentHashMap.java
250 
251 
252 Debug
253 =====
254 
255 The HAMT datatype is accessible for testing purposes under the
256 `_testcapi` module:
257 
258     >>> from _testcapi import hamt
259     >>> h = hamt()
260     >>> h2 = h.set('a', 2)
261     >>> h3 = h2.set('b', 3)
262     >>> list(h3)
263     ['a', 'b']
264 
265 When CPython is built in debug mode, a '__dump__()' method is available
266 to introspect the tree:
267 
268     >>> print(h3.__dump__())
269     HAMT(len=2):
270         BitmapNode(size=4 count=2 bitmap=0b110 id=0x10eb9d9e8):
271             'a': 2
272             'b': 3
273 */
274 
275 
276 #define IS_ARRAY_NODE(node)     (Py_TYPE(node) == &_PyHamt_ArrayNode_Type)
277 #define IS_BITMAP_NODE(node)    (Py_TYPE(node) == &_PyHamt_BitmapNode_Type)
278 #define IS_COLLISION_NODE(node) (Py_TYPE(node) == &_PyHamt_CollisionNode_Type)
279 
280 
281 /* Return type for 'find' (lookup a key) functions.
282 
283    * F_ERROR - an error occurred;
284    * F_NOT_FOUND - the key was not found;
285    * F_FOUND - the key was found.
286 */
287 typedef enum {F_ERROR, F_NOT_FOUND, F_FOUND} hamt_find_t;
288 
289 
290 /* Return type for 'without' (delete a key) functions.
291 
292    * W_ERROR - an error occurred;
293    * W_NOT_FOUND - the key was not found: there's nothing to delete;
294    * W_EMPTY - the key was found: the node/tree would be empty
295      if the key is deleted;
296    * W_NEWNODE - the key was found: a new node/tree is returned
297      without that key.
298 */
299 typedef enum {W_ERROR, W_NOT_FOUND, W_EMPTY, W_NEWNODE} hamt_without_t;
300 
301 
302 /* Low-level iterator protocol type.
303 
304    * I_ITEM - a new item has been yielded;
305    * I_END - the whole tree was visited (similar to StopIteration).
306 */
307 typedef enum {I_ITEM, I_END} hamt_iter_t;
308 
309 
310 #define HAMT_ARRAY_NODE_SIZE 32
311 
312 
313 typedef struct {
314     PyObject_HEAD
315     PyHamtNode *a_array[HAMT_ARRAY_NODE_SIZE];
316     Py_ssize_t a_count;
317 } PyHamtNode_Array;
318 
319 
320 typedef struct {
321     PyObject_VAR_HEAD
322     uint32_t b_bitmap;
323     PyObject *b_array[1];
324 } PyHamtNode_Bitmap;
325 
326 
327 typedef struct {
328     PyObject_VAR_HEAD
329     int32_t c_hash;
330     PyObject *c_array[1];
331 } PyHamtNode_Collision;
332 
333 
334 static PyHamtNode_Bitmap *_empty_bitmap_node;
335 static PyHamtObject *_empty_hamt;
336 
337 
338 static PyHamtObject *
339 hamt_alloc(void);
340 
341 static PyHamtNode *
342 hamt_node_assoc(PyHamtNode *node,
343                 uint32_t shift, int32_t hash,
344                 PyObject *key, PyObject *val, int* added_leaf);
345 
346 static hamt_without_t
347 hamt_node_without(PyHamtNode *node,
348                   uint32_t shift, int32_t hash,
349                   PyObject *key,
350                   PyHamtNode **new_node);
351 
352 static hamt_find_t
353 hamt_node_find(PyHamtNode *node,
354                uint32_t shift, int32_t hash,
355                PyObject *key, PyObject **val);
356 
357 #ifdef Py_DEBUG
358 static int
359 hamt_node_dump(PyHamtNode *node,
360                _PyUnicodeWriter *writer, int level);
361 #endif
362 
363 static PyHamtNode *
364 hamt_node_array_new(Py_ssize_t);
365 
366 static PyHamtNode *
367 hamt_node_collision_new(int32_t hash, Py_ssize_t size);
368 
369 static inline Py_ssize_t
370 hamt_node_collision_count(PyHamtNode_Collision *node);
371 
372 
373 #ifdef Py_DEBUG
374 static void
_hamt_node_array_validate(void * o)375 _hamt_node_array_validate(void *o)
376 {
377     assert(IS_ARRAY_NODE(o));
378     PyHamtNode_Array *node = (PyHamtNode_Array*)(o);
379     Py_ssize_t i = 0, count = 0;
380     for (; i < HAMT_ARRAY_NODE_SIZE; i++) {
381         if (node->a_array[i] != NULL) {
382             count++;
383         }
384     }
385     assert(count == node->a_count);
386 }
387 
388 #define VALIDATE_ARRAY_NODE(NODE) \
389     do { _hamt_node_array_validate(NODE); } while (0);
390 #else
391 #define VALIDATE_ARRAY_NODE(NODE)
392 #endif
393 
394 
395 /* Returns -1 on error */
396 static inline int32_t
hamt_hash(PyObject * o)397 hamt_hash(PyObject *o)
398 {
399     Py_hash_t hash = PyObject_Hash(o);
400 
401 #if SIZEOF_PY_HASH_T <= 4
402     return hash;
403 #else
404     if (hash == -1) {
405         /* exception */
406         return -1;
407     }
408 
409     /* While it's suboptimal to reduce Python's 64 bit hash to
410        32 bits via XOR, it seems that the resulting hash function
411        is good enough (this is also how Long type is hashed in Java.)
412        Storing 10, 100, 1000 Python strings results in a relatively
413        shallow and uniform tree structure.
414 
415        Please don't change this hashing algorithm, as there are many
416        tests that test some exact tree shape to cover all code paths.
417     */
418     int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32);
419     return xored == -1 ? -2 : xored;
420 #endif
421 }
422 
423 static inline uint32_t
hamt_mask(int32_t hash,uint32_t shift)424 hamt_mask(int32_t hash, uint32_t shift)
425 {
426     return (((uint32_t)hash >> shift) & 0x01f);
427 }
428 
429 static inline uint32_t
hamt_bitpos(int32_t hash,uint32_t shift)430 hamt_bitpos(int32_t hash, uint32_t shift)
431 {
432     return (uint32_t)1 << hamt_mask(hash, shift);
433 }
434 
435 static inline uint32_t
hamt_bitcount(uint32_t i)436 hamt_bitcount(uint32_t i)
437 {
438     /* We could use native popcount instruction but that would
439        require to either add configure flags to enable SSE4.2
440        support or to detect it dynamically.  Otherwise, we have
441        a risk of CPython not working properly on older hardware.
442 
443        In practice, there's no observable difference in
444        performance between using a popcount instruction or the
445        following fallback code.
446 
447        The algorithm is copied from:
448        https://graphics.stanford.edu/~seander/bithacks.html
449     */
450     i = i - ((i >> 1) & 0x55555555);
451     i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
452     return (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
453 }
454 
455 static inline uint32_t
hamt_bitindex(uint32_t bitmap,uint32_t bit)456 hamt_bitindex(uint32_t bitmap, uint32_t bit)
457 {
458     return hamt_bitcount(bitmap & (bit - 1));
459 }
460 
461 
462 /////////////////////////////////// Dump Helpers
463 #ifdef Py_DEBUG
464 
465 static int
_hamt_dump_ident(_PyUnicodeWriter * writer,int level)466 _hamt_dump_ident(_PyUnicodeWriter *writer, int level)
467 {
468     /* Write `'    ' * level` to the `writer` */
469     PyObject *str = NULL;
470     PyObject *num = NULL;
471     PyObject *res = NULL;
472     int ret = -1;
473 
474     str = PyUnicode_FromString("    ");
475     if (str == NULL) {
476         goto error;
477     }
478 
479     num = PyLong_FromLong((long)level);
480     if (num == NULL) {
481         goto error;
482     }
483 
484     res = PyNumber_Multiply(str, num);
485     if (res == NULL) {
486         goto error;
487     }
488 
489     ret = _PyUnicodeWriter_WriteStr(writer, res);
490 
491 error:
492     Py_XDECREF(res);
493     Py_XDECREF(str);
494     Py_XDECREF(num);
495     return ret;
496 }
497 
498 static int
_hamt_dump_format(_PyUnicodeWriter * writer,const char * format,...)499 _hamt_dump_format(_PyUnicodeWriter *writer, const char *format, ...)
500 {
501     /* A convenient helper combining _PyUnicodeWriter_WriteStr and
502        PyUnicode_FromFormatV.
503     */
504     PyObject* msg;
505     int ret;
506 
507     va_list vargs;
508 #ifdef HAVE_STDARG_PROTOTYPES
509     va_start(vargs, format);
510 #else
511     va_start(vargs);
512 #endif
513     msg = PyUnicode_FromFormatV(format, vargs);
514     va_end(vargs);
515 
516     if (msg == NULL) {
517         return -1;
518     }
519 
520     ret = _PyUnicodeWriter_WriteStr(writer, msg);
521     Py_DECREF(msg);
522     return ret;
523 }
524 
525 #endif  /* Py_DEBUG */
526 /////////////////////////////////// Bitmap Node
527 
528 
529 static PyHamtNode *
hamt_node_bitmap_new(Py_ssize_t size)530 hamt_node_bitmap_new(Py_ssize_t size)
531 {
532     /* Create a new bitmap node of size 'size' */
533 
534     PyHamtNode_Bitmap *node;
535     Py_ssize_t i;
536 
537     assert(size >= 0);
538     assert(size % 2 == 0);
539 
540     if (size == 0 && _empty_bitmap_node != NULL) {
541         Py_INCREF(_empty_bitmap_node);
542         return (PyHamtNode *)_empty_bitmap_node;
543     }
544 
545     /* No freelist; allocate a new bitmap node */
546     node = PyObject_GC_NewVar(
547         PyHamtNode_Bitmap, &_PyHamt_BitmapNode_Type, size);
548     if (node == NULL) {
549         return NULL;
550     }
551 
552     Py_SIZE(node) = size;
553 
554     for (i = 0; i < size; i++) {
555         node->b_array[i] = NULL;
556     }
557 
558     node->b_bitmap = 0;
559 
560     _PyObject_GC_TRACK(node);
561 
562     if (size == 0 && _empty_bitmap_node == NULL) {
563         /* Since bitmap nodes are immutable, we can cache the instance
564            for size=0 and reuse it whenever we need an empty bitmap node.
565         */
566         _empty_bitmap_node = node;
567         Py_INCREF(_empty_bitmap_node);
568     }
569 
570     return (PyHamtNode *)node;
571 }
572 
573 static inline Py_ssize_t
hamt_node_bitmap_count(PyHamtNode_Bitmap * node)574 hamt_node_bitmap_count(PyHamtNode_Bitmap *node)
575 {
576     return Py_SIZE(node) / 2;
577 }
578 
579 static PyHamtNode_Bitmap *
hamt_node_bitmap_clone(PyHamtNode_Bitmap * node)580 hamt_node_bitmap_clone(PyHamtNode_Bitmap *node)
581 {
582     /* Clone a bitmap node; return a new one with the same child notes. */
583 
584     PyHamtNode_Bitmap *clone;
585     Py_ssize_t i;
586 
587     clone = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(Py_SIZE(node));
588     if (clone == NULL) {
589         return NULL;
590     }
591 
592     for (i = 0; i < Py_SIZE(node); i++) {
593         Py_XINCREF(node->b_array[i]);
594         clone->b_array[i] = node->b_array[i];
595     }
596 
597     clone->b_bitmap = node->b_bitmap;
598     return clone;
599 }
600 
601 static PyHamtNode_Bitmap *
hamt_node_bitmap_clone_without(PyHamtNode_Bitmap * o,uint32_t bit)602 hamt_node_bitmap_clone_without(PyHamtNode_Bitmap *o, uint32_t bit)
603 {
604     assert(bit & o->b_bitmap);
605     assert(hamt_node_bitmap_count(o) > 1);
606 
607     PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(
608         Py_SIZE(o) - 2);
609     if (new == NULL) {
610         return NULL;
611     }
612 
613     uint32_t idx = hamt_bitindex(o->b_bitmap, bit);
614     uint32_t key_idx = 2 * idx;
615     uint32_t val_idx = key_idx + 1;
616     uint32_t i;
617 
618     for (i = 0; i < key_idx; i++) {
619         Py_XINCREF(o->b_array[i]);
620         new->b_array[i] = o->b_array[i];
621     }
622 
623     assert(Py_SIZE(o) >= 0 && Py_SIZE(o) <= 32);
624     for (i = val_idx + 1; i < (uint32_t)Py_SIZE(o); i++) {
625         Py_XINCREF(o->b_array[i]);
626         new->b_array[i - 2] = o->b_array[i];
627     }
628 
629     new->b_bitmap = o->b_bitmap & ~bit;
630     return new;
631 }
632 
633 static PyHamtNode *
hamt_node_new_bitmap_or_collision(uint32_t shift,PyObject * key1,PyObject * val1,int32_t key2_hash,PyObject * key2,PyObject * val2)634 hamt_node_new_bitmap_or_collision(uint32_t shift,
635                                   PyObject *key1, PyObject *val1,
636                                   int32_t key2_hash,
637                                   PyObject *key2, PyObject *val2)
638 {
639     /* Helper method.  Creates a new node for key1/val and key2/val2
640        pairs.
641 
642        If key1 hash is equal to the hash of key2, a Collision node
643        will be created.  If they are not equal, a Bitmap node is
644        created.
645     */
646 
647     int32_t key1_hash = hamt_hash(key1);
648     if (key1_hash == -1) {
649         return NULL;
650     }
651 
652     if (key1_hash == key2_hash) {
653         PyHamtNode_Collision *n;
654         n = (PyHamtNode_Collision *)hamt_node_collision_new(key1_hash, 4);
655         if (n == NULL) {
656             return NULL;
657         }
658 
659         Py_INCREF(key1);
660         n->c_array[0] = key1;
661         Py_INCREF(val1);
662         n->c_array[1] = val1;
663 
664         Py_INCREF(key2);
665         n->c_array[2] = key2;
666         Py_INCREF(val2);
667         n->c_array[3] = val2;
668 
669         return (PyHamtNode *)n;
670     }
671     else {
672         int added_leaf = 0;
673         PyHamtNode *n = hamt_node_bitmap_new(0);
674         if (n == NULL) {
675             return NULL;
676         }
677 
678         PyHamtNode *n2 = hamt_node_assoc(
679             n, shift, key1_hash, key1, val1, &added_leaf);
680         Py_DECREF(n);
681         if (n2 == NULL) {
682             return NULL;
683         }
684 
685         n = hamt_node_assoc(n2, shift, key2_hash, key2, val2, &added_leaf);
686         Py_DECREF(n2);
687         if (n == NULL) {
688             return NULL;
689         }
690 
691         return n;
692     }
693 }
694 
695 static PyHamtNode *
hamt_node_bitmap_assoc(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)696 hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self,
697                        uint32_t shift, int32_t hash,
698                        PyObject *key, PyObject *val, int* added_leaf)
699 {
700     /* assoc operation for bitmap nodes.
701 
702        Return: a new node, or self if key/val already is in the
703        collection.
704 
705        'added_leaf' is later used in '_PyHamt_Assoc' to determine if
706        `hamt.set(key, val)` increased the size of the collection.
707     */
708 
709     uint32_t bit = hamt_bitpos(hash, shift);
710     uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
711 
712     /* Bitmap node layout:
713 
714     +------+------+------+------+  ---  +------+------+
715     | key1 | val1 | key2 | val2 |  ...  | keyN | valN |
716     +------+------+------+------+  ---  +------+------+
717     where `N < Py_SIZE(node)`.
718 
719     The `node->b_bitmap` field is a bitmap.  For a given
720     `(shift, hash)` pair we can determine:
721 
722      - If this node has the corresponding key/val slots.
723      - The index of key/val slots.
724     */
725 
726     if (self->b_bitmap & bit) {
727         /* The key is set in this node */
728 
729         uint32_t key_idx = 2 * idx;
730         uint32_t val_idx = key_idx + 1;
731 
732         assert(val_idx < (size_t)Py_SIZE(self));
733 
734         PyObject *key_or_null = self->b_array[key_idx];
735         PyObject *val_or_node = self->b_array[val_idx];
736 
737         if (key_or_null == NULL) {
738             /* key is NULL.  This means that we have a few keys
739                that have the same (hash, shift) pair. */
740 
741             assert(val_or_node != NULL);
742 
743             PyHamtNode *sub_node = hamt_node_assoc(
744                 (PyHamtNode *)val_or_node,
745                 shift + 5, hash, key, val, added_leaf);
746             if (sub_node == NULL) {
747                 return NULL;
748             }
749 
750             if (val_or_node == (PyObject *)sub_node) {
751                 Py_DECREF(sub_node);
752                 Py_INCREF(self);
753                 return (PyHamtNode *)self;
754             }
755 
756             PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
757             if (ret == NULL) {
758                 return NULL;
759             }
760             Py_SETREF(ret->b_array[val_idx], (PyObject*)sub_node);
761             return (PyHamtNode *)ret;
762         }
763 
764         assert(key != NULL);
765         /* key is not NULL.  This means that we have only one other
766            key in this collection that matches our hash for this shift. */
767 
768         int comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
769         if (comp_err < 0) {  /* exception in __eq__ */
770             return NULL;
771         }
772         if (comp_err == 1) {  /* key == key_or_null */
773             if (val == val_or_node) {
774                 /* we already have the same key/val pair; return self. */
775                 Py_INCREF(self);
776                 return (PyHamtNode *)self;
777             }
778 
779             /* We're setting a new value for the key we had before.
780                Make a new bitmap node with a replaced value, and return it. */
781             PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
782             if (ret == NULL) {
783                 return NULL;
784             }
785             Py_INCREF(val);
786             Py_SETREF(ret->b_array[val_idx], val);
787             return (PyHamtNode *)ret;
788         }
789 
790         /* It's a new key, and it has the same index as *one* another key.
791            We have a collision.  We need to create a new node which will
792            combine the existing key and the key we're adding.
793 
794            `hamt_node_new_bitmap_or_collision` will either create a new
795            Collision node if the keys have identical hashes, or
796            a new Bitmap node.
797         */
798         PyHamtNode *sub_node = hamt_node_new_bitmap_or_collision(
799             shift + 5,
800             key_or_null, val_or_node,  /* existing key/val */
801             hash,
802             key, val  /* new key/val */
803         );
804         if (sub_node == NULL) {
805             return NULL;
806         }
807 
808         PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
809         if (ret == NULL) {
810             Py_DECREF(sub_node);
811             return NULL;
812         }
813         Py_SETREF(ret->b_array[key_idx], NULL);
814         Py_SETREF(ret->b_array[val_idx], (PyObject *)sub_node);
815 
816         *added_leaf = 1;
817         return (PyHamtNode *)ret;
818     }
819     else {
820         /* There was no key before with the same (shift,hash). */
821 
822         uint32_t n = hamt_bitcount(self->b_bitmap);
823 
824         if (n >= 16) {
825             /* When we have a situation where we want to store more
826                than 16 nodes at one level of the tree, we no longer
827                want to use the Bitmap node with bitmap encoding.
828 
829                Instead we start using an Array node, which has
830                simpler (faster) implementation at the expense of
831                having prealocated 32 pointers for its keys/values
832                pairs.
833 
834                Small hamt objects (<30 keys) usually don't have any
835                Array nodes at all.  Between ~30 and ~400 keys hamt
836                objects usually have one Array node, and usually it's
837                a root node.
838             */
839 
840             uint32_t jdx = hamt_mask(hash, shift);
841             /* 'jdx' is the index of where the new key should be added
842                in the new Array node we're about to create. */
843 
844             PyHamtNode *empty = NULL;
845             PyHamtNode_Array *new_node = NULL;
846             PyHamtNode *res = NULL;
847 
848             /* Create a new Array node. */
849             new_node = (PyHamtNode_Array *)hamt_node_array_new(n + 1);
850             if (new_node == NULL) {
851                 goto fin;
852             }
853 
854             /* Create an empty bitmap node for the next
855                hamt_node_assoc call. */
856             empty = hamt_node_bitmap_new(0);
857             if (empty == NULL) {
858                 goto fin;
859             }
860 
861             /* Make a new bitmap node for the key/val we're adding.
862                Set that bitmap node to new-array-node[jdx]. */
863             new_node->a_array[jdx] = hamt_node_assoc(
864                 empty, shift + 5, hash, key, val, added_leaf);
865             if (new_node->a_array[jdx] == NULL) {
866                 goto fin;
867             }
868 
869             /* Copy existing key/value pairs from the current Bitmap
870                node to the new Array node we've just created. */
871             Py_ssize_t i, j;
872             for (i = 0, j = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
873                 if (((self->b_bitmap >> i) & 1) != 0) {
874                     /* Ensure we don't accidentally override `jdx` element
875                        we set few lines above.
876                     */
877                     assert(new_node->a_array[i] == NULL);
878 
879                     if (self->b_array[j] == NULL) {
880                         new_node->a_array[i] =
881                             (PyHamtNode *)self->b_array[j + 1];
882                         Py_INCREF(new_node->a_array[i]);
883                     }
884                     else {
885                         int32_t rehash = hamt_hash(self->b_array[j]);
886                         if (rehash == -1) {
887                             goto fin;
888                         }
889 
890                         new_node->a_array[i] = hamt_node_assoc(
891                             empty, shift + 5,
892                             rehash,
893                             self->b_array[j],
894                             self->b_array[j + 1],
895                             added_leaf);
896 
897                         if (new_node->a_array[i] == NULL) {
898                             goto fin;
899                         }
900                     }
901                     j += 2;
902                 }
903             }
904 
905             VALIDATE_ARRAY_NODE(new_node)
906 
907             /* That's it! */
908             res = (PyHamtNode *)new_node;
909 
910         fin:
911             Py_XDECREF(empty);
912             if (res == NULL) {
913                 Py_XDECREF(new_node);
914             }
915             return res;
916         }
917         else {
918             /* We have less than 16 keys at this level; let's just
919                create a new bitmap node out of this node with the
920                new key/val pair added. */
921 
922             uint32_t key_idx = 2 * idx;
923             uint32_t val_idx = key_idx + 1;
924             uint32_t i;
925 
926             *added_leaf = 1;
927 
928             /* Allocate new Bitmap node which can have one more key/val
929                pair in addition to what we have already. */
930             PyHamtNode_Bitmap *new_node =
931                 (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2 * (n + 1));
932             if (new_node == NULL) {
933                 return NULL;
934             }
935 
936             /* Copy all keys/values that will be before the new key/value
937                we are adding. */
938             for (i = 0; i < key_idx; i++) {
939                 Py_XINCREF(self->b_array[i]);
940                 new_node->b_array[i] = self->b_array[i];
941             }
942 
943             /* Set the new key/value to the new Bitmap node. */
944             Py_INCREF(key);
945             new_node->b_array[key_idx] = key;
946             Py_INCREF(val);
947             new_node->b_array[val_idx] = val;
948 
949             /* Copy all keys/values that will be after the new key/value
950                we are adding. */
951             assert(Py_SIZE(self) >= 0 && Py_SIZE(self) <= 32);
952             for (i = key_idx; i < (uint32_t)Py_SIZE(self); i++) {
953                 Py_XINCREF(self->b_array[i]);
954                 new_node->b_array[i + 2] = self->b_array[i];
955             }
956 
957             new_node->b_bitmap = self->b_bitmap | bit;
958             return (PyHamtNode *)new_node;
959         }
960     }
961 }
962 
963 static hamt_without_t
hamt_node_bitmap_without(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)964 hamt_node_bitmap_without(PyHamtNode_Bitmap *self,
965                          uint32_t shift, int32_t hash,
966                          PyObject *key,
967                          PyHamtNode **new_node)
968 {
969     uint32_t bit = hamt_bitpos(hash, shift);
970     if ((self->b_bitmap & bit) == 0) {
971         return W_NOT_FOUND;
972     }
973 
974     uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
975 
976     uint32_t key_idx = 2 * idx;
977     uint32_t val_idx = key_idx + 1;
978 
979     PyObject *key_or_null = self->b_array[key_idx];
980     PyObject *val_or_node = self->b_array[val_idx];
981 
982     if (key_or_null == NULL) {
983         /* key == NULL means that 'value' is another tree node. */
984 
985         PyHamtNode *sub_node = NULL;
986 
987         hamt_without_t res = hamt_node_without(
988             (PyHamtNode *)val_or_node,
989             shift + 5, hash, key, &sub_node);
990 
991         switch (res) {
992             case W_EMPTY:
993                 /* It's impossible for us to receive a W_EMPTY here:
994 
995                     - Array nodes are converted to Bitmap nodes when
996                       we delete 16th item from them;
997 
998                     - Collision nodes are converted to Bitmap when
999                       there is one item in them;
1000 
1001                     - Bitmap node's without() inlines single-item
1002                       sub-nodes.
1003 
1004                    So in no situation we can have a single-item
1005                    Bitmap child of another Bitmap node.
1006                 */
1007                 Py_UNREACHABLE();
1008 
1009             case W_NEWNODE: {
1010                 assert(sub_node != NULL);
1011 
1012                 if (IS_BITMAP_NODE(sub_node)) {
1013                     PyHamtNode_Bitmap *sub_tree = (PyHamtNode_Bitmap *)sub_node;
1014                     if (hamt_node_bitmap_count(sub_tree) == 1 &&
1015                             sub_tree->b_array[0] != NULL)
1016                     {
1017                         /* A bitmap node with one key/value pair.  Just
1018                            merge it into this node.
1019 
1020                            Note that we don't inline Bitmap nodes that
1021                            have a NULL key -- those nodes point to another
1022                            tree level, and we cannot simply move tree levels
1023                            up or down.
1024                         */
1025 
1026                         PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1027                         if (clone == NULL) {
1028                             Py_DECREF(sub_node);
1029                             return W_ERROR;
1030                         }
1031 
1032                         PyObject *key = sub_tree->b_array[0];
1033                         PyObject *val = sub_tree->b_array[1];
1034 
1035                         Py_INCREF(key);
1036                         Py_XSETREF(clone->b_array[key_idx], key);
1037                         Py_INCREF(val);
1038                         Py_SETREF(clone->b_array[val_idx], val);
1039 
1040                         Py_DECREF(sub_tree);
1041 
1042                         *new_node = (PyHamtNode *)clone;
1043                         return W_NEWNODE;
1044                     }
1045                 }
1046 
1047 #ifdef Py_DEBUG
1048                 /* Ensure that Collision.without implementation
1049                    converts to Bitmap nodes itself.
1050                 */
1051                 if (IS_COLLISION_NODE(sub_node)) {
1052                     assert(hamt_node_collision_count(
1053                             (PyHamtNode_Collision*)sub_node) > 1);
1054                 }
1055 #endif
1056 
1057                 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1058                 if (clone == NULL) {
1059                     return W_ERROR;
1060                 }
1061 
1062                 Py_SETREF(clone->b_array[val_idx],
1063                           (PyObject *)sub_node);  /* borrow */
1064 
1065                 *new_node = (PyHamtNode *)clone;
1066                 return W_NEWNODE;
1067             }
1068 
1069             case W_ERROR:
1070             case W_NOT_FOUND:
1071                 assert(sub_node == NULL);
1072                 return res;
1073 
1074             default:
1075                 Py_UNREACHABLE();
1076         }
1077     }
1078     else {
1079         /* We have a regular key/value pair */
1080 
1081         int cmp = PyObject_RichCompareBool(key_or_null, key, Py_EQ);
1082         if (cmp < 0) {
1083             return W_ERROR;
1084         }
1085         if (cmp == 0) {
1086             return W_NOT_FOUND;
1087         }
1088 
1089         if (hamt_node_bitmap_count(self) == 1) {
1090             return W_EMPTY;
1091         }
1092 
1093         *new_node = (PyHamtNode *)
1094             hamt_node_bitmap_clone_without(self, bit);
1095         if (*new_node == NULL) {
1096             return W_ERROR;
1097         }
1098 
1099         return W_NEWNODE;
1100     }
1101 }
1102 
1103 static hamt_find_t
hamt_node_bitmap_find(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1104 hamt_node_bitmap_find(PyHamtNode_Bitmap *self,
1105                       uint32_t shift, int32_t hash,
1106                       PyObject *key, PyObject **val)
1107 {
1108     /* Lookup a key in a Bitmap node. */
1109 
1110     uint32_t bit = hamt_bitpos(hash, shift);
1111     uint32_t idx;
1112     uint32_t key_idx;
1113     uint32_t val_idx;
1114     PyObject *key_or_null;
1115     PyObject *val_or_node;
1116     int comp_err;
1117 
1118     if ((self->b_bitmap & bit) == 0) {
1119         return F_NOT_FOUND;
1120     }
1121 
1122     idx = hamt_bitindex(self->b_bitmap, bit);
1123     key_idx = idx * 2;
1124     val_idx = key_idx + 1;
1125 
1126     assert(val_idx < (size_t)Py_SIZE(self));
1127 
1128     key_or_null = self->b_array[key_idx];
1129     val_or_node = self->b_array[val_idx];
1130 
1131     if (key_or_null == NULL) {
1132         /* There are a few keys that have the same hash at the current shift
1133            that match our key.  Dispatch the lookup further down the tree. */
1134         assert(val_or_node != NULL);
1135         return hamt_node_find((PyHamtNode *)val_or_node,
1136                               shift + 5, hash, key, val);
1137     }
1138 
1139     /* We have only one key -- a potential match.  Let's compare if the
1140        key we are looking at is equal to the key we are looking for. */
1141     assert(key != NULL);
1142     comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
1143     if (comp_err < 0) {  /* exception in __eq__ */
1144         return F_ERROR;
1145     }
1146     if (comp_err == 1) {  /* key == key_or_null */
1147         *val = val_or_node;
1148         return F_FOUND;
1149     }
1150 
1151     return F_NOT_FOUND;
1152 }
1153 
1154 static int
hamt_node_bitmap_traverse(PyHamtNode_Bitmap * self,visitproc visit,void * arg)1155 hamt_node_bitmap_traverse(PyHamtNode_Bitmap *self, visitproc visit, void *arg)
1156 {
1157     /* Bitmap's tp_traverse */
1158 
1159     Py_ssize_t i;
1160 
1161     for (i = Py_SIZE(self); --i >= 0; ) {
1162         Py_VISIT(self->b_array[i]);
1163     }
1164 
1165     return 0;
1166 }
1167 
1168 static void
hamt_node_bitmap_dealloc(PyHamtNode_Bitmap * self)1169 hamt_node_bitmap_dealloc(PyHamtNode_Bitmap *self)
1170 {
1171     /* Bitmap's tp_dealloc */
1172 
1173     Py_ssize_t len = Py_SIZE(self);
1174     Py_ssize_t i;
1175 
1176     PyObject_GC_UnTrack(self);
1177     Py_TRASHCAN_SAFE_BEGIN(self)
1178 
1179     if (len > 0) {
1180         i = len;
1181         while (--i >= 0) {
1182             Py_XDECREF(self->b_array[i]);
1183         }
1184     }
1185 
1186     Py_TYPE(self)->tp_free((PyObject *)self);
1187     Py_TRASHCAN_SAFE_END(self)
1188 }
1189 
1190 #ifdef Py_DEBUG
1191 static int
hamt_node_bitmap_dump(PyHamtNode_Bitmap * node,_PyUnicodeWriter * writer,int level)1192 hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
1193                       _PyUnicodeWriter *writer, int level)
1194 {
1195     /* Debug build: __dump__() method implementation for Bitmap nodes. */
1196 
1197     Py_ssize_t i;
1198     PyObject *tmp1;
1199     PyObject *tmp2;
1200 
1201     if (_hamt_dump_ident(writer, level + 1)) {
1202         goto error;
1203     }
1204 
1205     if (_hamt_dump_format(writer, "BitmapNode(size=%zd count=%zd ",
1206                           Py_SIZE(node), Py_SIZE(node) / 2))
1207     {
1208         goto error;
1209     }
1210 
1211     tmp1 = PyLong_FromUnsignedLong(node->b_bitmap);
1212     if (tmp1 == NULL) {
1213         goto error;
1214     }
1215     tmp2 = _PyLong_Format(tmp1, 2);
1216     Py_DECREF(tmp1);
1217     if (tmp2 == NULL) {
1218         goto error;
1219     }
1220     if (_hamt_dump_format(writer, "bitmap=%S id=%p):\n", tmp2, node)) {
1221         Py_DECREF(tmp2);
1222         goto error;
1223     }
1224     Py_DECREF(tmp2);
1225 
1226     for (i = 0; i < Py_SIZE(node); i += 2) {
1227         PyObject *key_or_null = node->b_array[i];
1228         PyObject *val_or_node = node->b_array[i + 1];
1229 
1230         if (_hamt_dump_ident(writer, level + 2)) {
1231             goto error;
1232         }
1233 
1234         if (key_or_null == NULL) {
1235             if (_hamt_dump_format(writer, "NULL:\n")) {
1236                 goto error;
1237             }
1238 
1239             if (hamt_node_dump((PyHamtNode *)val_or_node,
1240                                writer, level + 2))
1241             {
1242                 goto error;
1243             }
1244         }
1245         else {
1246             if (_hamt_dump_format(writer, "%R: %R", key_or_null,
1247                                   val_or_node))
1248             {
1249                 goto error;
1250             }
1251         }
1252 
1253         if (_hamt_dump_format(writer, "\n")) {
1254             goto error;
1255         }
1256     }
1257 
1258     return 0;
1259 error:
1260     return -1;
1261 }
1262 #endif  /* Py_DEBUG */
1263 
1264 
1265 /////////////////////////////////// Collision Node
1266 
1267 
1268 static PyHamtNode *
hamt_node_collision_new(int32_t hash,Py_ssize_t size)1269 hamt_node_collision_new(int32_t hash, Py_ssize_t size)
1270 {
1271     /* Create a new Collision node. */
1272 
1273     PyHamtNode_Collision *node;
1274     Py_ssize_t i;
1275 
1276     assert(size >= 4);
1277     assert(size % 2 == 0);
1278 
1279     node = PyObject_GC_NewVar(
1280         PyHamtNode_Collision, &_PyHamt_CollisionNode_Type, size);
1281     if (node == NULL) {
1282         return NULL;
1283     }
1284 
1285     for (i = 0; i < size; i++) {
1286         node->c_array[i] = NULL;
1287     }
1288 
1289     Py_SIZE(node) = size;
1290     node->c_hash = hash;
1291 
1292     _PyObject_GC_TRACK(node);
1293 
1294     return (PyHamtNode *)node;
1295 }
1296 
1297 static hamt_find_t
hamt_node_collision_find_index(PyHamtNode_Collision * self,PyObject * key,Py_ssize_t * idx)1298 hamt_node_collision_find_index(PyHamtNode_Collision *self, PyObject *key,
1299                                Py_ssize_t *idx)
1300 {
1301     /* Lookup `key` in the Collision node `self`.  Set the index of the
1302        found key to 'idx'. */
1303 
1304     Py_ssize_t i;
1305     PyObject *el;
1306 
1307     for (i = 0; i < Py_SIZE(self); i += 2) {
1308         el = self->c_array[i];
1309 
1310         assert(el != NULL);
1311         int cmp = PyObject_RichCompareBool(key, el, Py_EQ);
1312         if (cmp < 0) {
1313             return F_ERROR;
1314         }
1315         if (cmp == 1) {
1316             *idx = i;
1317             return F_FOUND;
1318         }
1319     }
1320 
1321     return F_NOT_FOUND;
1322 }
1323 
1324 static PyHamtNode *
hamt_node_collision_assoc(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)1325 hamt_node_collision_assoc(PyHamtNode_Collision *self,
1326                           uint32_t shift, int32_t hash,
1327                           PyObject *key, PyObject *val, int* added_leaf)
1328 {
1329     /* Set a new key to this level (currently a Collision node)
1330        of the tree. */
1331 
1332     if (hash == self->c_hash) {
1333         /* The hash of the 'key' we are adding matches the hash of
1334            other keys in this Collision node. */
1335 
1336         Py_ssize_t key_idx = -1;
1337         hamt_find_t found;
1338         PyHamtNode_Collision *new_node;
1339         Py_ssize_t i;
1340 
1341         /* Let's try to lookup the new 'key', maybe we already have it. */
1342         found = hamt_node_collision_find_index(self, key, &key_idx);
1343         switch (found) {
1344             case F_ERROR:
1345                 /* Exception. */
1346                 return NULL;
1347 
1348             case F_NOT_FOUND:
1349                 /* This is a totally new key.  Clone the current node,
1350                    add a new key/value to the cloned node. */
1351 
1352                 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1353                     self->c_hash, Py_SIZE(self) + 2);
1354                 if (new_node == NULL) {
1355                     return NULL;
1356                 }
1357 
1358                 for (i = 0; i < Py_SIZE(self); i++) {
1359                     Py_INCREF(self->c_array[i]);
1360                     new_node->c_array[i] = self->c_array[i];
1361                 }
1362 
1363                 Py_INCREF(key);
1364                 new_node->c_array[i] = key;
1365                 Py_INCREF(val);
1366                 new_node->c_array[i + 1] = val;
1367 
1368                 *added_leaf = 1;
1369                 return (PyHamtNode *)new_node;
1370 
1371             case F_FOUND:
1372                 /* There's a key which is equal to the key we are adding. */
1373 
1374                 assert(key_idx >= 0);
1375                 assert(key_idx < Py_SIZE(self));
1376                 Py_ssize_t val_idx = key_idx + 1;
1377 
1378                 if (self->c_array[val_idx] == val) {
1379                     /* We're setting a key/value pair that's already set. */
1380                     Py_INCREF(self);
1381                     return (PyHamtNode *)self;
1382                 }
1383 
1384                 /* We need to replace old value for the key
1385                    with a new value.  Create a new Collision node.*/
1386                 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1387                     self->c_hash, Py_SIZE(self));
1388                 if (new_node == NULL) {
1389                     return NULL;
1390                 }
1391 
1392                 /* Copy all elements of the old node to the new one. */
1393                 for (i = 0; i < Py_SIZE(self); i++) {
1394                     Py_INCREF(self->c_array[i]);
1395                     new_node->c_array[i] = self->c_array[i];
1396                 }
1397 
1398                 /* Replace the old value with the new value for the our key. */
1399                 Py_DECREF(new_node->c_array[val_idx]);
1400                 Py_INCREF(val);
1401                 new_node->c_array[val_idx] = val;
1402 
1403                 return (PyHamtNode *)new_node;
1404 
1405             default:
1406                 Py_UNREACHABLE();
1407         }
1408     }
1409     else {
1410         /* The hash of the new key is different from the hash that
1411            all keys of this Collision node have.
1412 
1413            Create a Bitmap node inplace with two children:
1414            key/value pair that we're adding, and the Collision node
1415            we're replacing on this tree level.
1416         */
1417 
1418         PyHamtNode_Bitmap *new_node;
1419         PyHamtNode *assoc_res;
1420 
1421         new_node = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2);
1422         if (new_node == NULL) {
1423             return NULL;
1424         }
1425         new_node->b_bitmap = hamt_bitpos(self->c_hash, shift);
1426         Py_INCREF(self);
1427         new_node->b_array[1] = (PyObject*) self;
1428 
1429         assoc_res = hamt_node_bitmap_assoc(
1430             new_node, shift, hash, key, val, added_leaf);
1431         Py_DECREF(new_node);
1432         return assoc_res;
1433     }
1434 }
1435 
1436 static inline Py_ssize_t
hamt_node_collision_count(PyHamtNode_Collision * node)1437 hamt_node_collision_count(PyHamtNode_Collision *node)
1438 {
1439     return Py_SIZE(node) / 2;
1440 }
1441 
1442 static hamt_without_t
hamt_node_collision_without(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)1443 hamt_node_collision_without(PyHamtNode_Collision *self,
1444                             uint32_t shift, int32_t hash,
1445                             PyObject *key,
1446                             PyHamtNode **new_node)
1447 {
1448     if (hash != self->c_hash) {
1449         return W_NOT_FOUND;
1450     }
1451 
1452     Py_ssize_t key_idx = -1;
1453     hamt_find_t found = hamt_node_collision_find_index(self, key, &key_idx);
1454 
1455     switch (found) {
1456         case F_ERROR:
1457             return W_ERROR;
1458 
1459         case F_NOT_FOUND:
1460             return W_NOT_FOUND;
1461 
1462         case F_FOUND:
1463             assert(key_idx >= 0);
1464             assert(key_idx < Py_SIZE(self));
1465 
1466             Py_ssize_t new_count = hamt_node_collision_count(self) - 1;
1467 
1468             if (new_count == 0) {
1469                 /* The node has only one key/value pair and it's for the
1470                    key we're trying to delete.  So a new node will be empty
1471                    after the removal.
1472                 */
1473                 return W_EMPTY;
1474             }
1475 
1476             if (new_count == 1) {
1477                 /* The node has two keys, and after deletion the
1478                    new Collision node would have one.  Collision nodes
1479                    with one key shouldn't exist, so convert it to a
1480                    Bitmap node.
1481                 */
1482                 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)
1483                     hamt_node_bitmap_new(2);
1484                 if (node == NULL) {
1485                     return W_ERROR;
1486                 }
1487 
1488                 if (key_idx == 0) {
1489                     Py_INCREF(self->c_array[2]);
1490                     node->b_array[0] = self->c_array[2];
1491                     Py_INCREF(self->c_array[3]);
1492                     node->b_array[1] = self->c_array[3];
1493                 }
1494                 else {
1495                     assert(key_idx == 2);
1496                     Py_INCREF(self->c_array[0]);
1497                     node->b_array[0] = self->c_array[0];
1498                     Py_INCREF(self->c_array[1]);
1499                     node->b_array[1] = self->c_array[1];
1500                 }
1501 
1502                 node->b_bitmap = hamt_bitpos(hash, shift);
1503 
1504                 *new_node = (PyHamtNode *)node;
1505                 return W_NEWNODE;
1506             }
1507 
1508             /* Allocate a new Collision node with capacity for one
1509                less key/value pair */
1510             PyHamtNode_Collision *new = (PyHamtNode_Collision *)
1511                 hamt_node_collision_new(
1512                     self->c_hash, Py_SIZE(self) - 2);
1513             if (new == NULL) {
1514                 return W_ERROR;
1515             }
1516 
1517             /* Copy all other keys from `self` to `new` */
1518             Py_ssize_t i;
1519             for (i = 0; i < key_idx; i++) {
1520                 Py_INCREF(self->c_array[i]);
1521                 new->c_array[i] = self->c_array[i];
1522             }
1523             for (i = key_idx + 2; i < Py_SIZE(self); i++) {
1524                 Py_INCREF(self->c_array[i]);
1525                 new->c_array[i - 2] = self->c_array[i];
1526             }
1527 
1528             *new_node = (PyHamtNode*)new;
1529             return W_NEWNODE;
1530 
1531         default:
1532             Py_UNREACHABLE();
1533     }
1534 }
1535 
1536 static hamt_find_t
hamt_node_collision_find(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1537 hamt_node_collision_find(PyHamtNode_Collision *self,
1538                          uint32_t shift, int32_t hash,
1539                          PyObject *key, PyObject **val)
1540 {
1541     /* Lookup `key` in the Collision node `self`.  Set the value
1542        for the found key to 'val'. */
1543 
1544     Py_ssize_t idx = -1;
1545     hamt_find_t res;
1546 
1547     res = hamt_node_collision_find_index(self, key, &idx);
1548     if (res == F_ERROR || res == F_NOT_FOUND) {
1549         return res;
1550     }
1551 
1552     assert(idx >= 0);
1553     assert(idx + 1 < Py_SIZE(self));
1554 
1555     *val = self->c_array[idx + 1];
1556     assert(*val != NULL);
1557 
1558     return F_FOUND;
1559 }
1560 
1561 
1562 static int
hamt_node_collision_traverse(PyHamtNode_Collision * self,visitproc visit,void * arg)1563 hamt_node_collision_traverse(PyHamtNode_Collision *self,
1564                              visitproc visit, void *arg)
1565 {
1566     /* Collision's tp_traverse */
1567 
1568     Py_ssize_t i;
1569 
1570     for (i = Py_SIZE(self); --i >= 0; ) {
1571         Py_VISIT(self->c_array[i]);
1572     }
1573 
1574     return 0;
1575 }
1576 
1577 static void
hamt_node_collision_dealloc(PyHamtNode_Collision * self)1578 hamt_node_collision_dealloc(PyHamtNode_Collision *self)
1579 {
1580     /* Collision's tp_dealloc */
1581 
1582     Py_ssize_t len = Py_SIZE(self);
1583 
1584     PyObject_GC_UnTrack(self);
1585     Py_TRASHCAN_SAFE_BEGIN(self)
1586 
1587     if (len > 0) {
1588 
1589         while (--len >= 0) {
1590             Py_XDECREF(self->c_array[len]);
1591         }
1592     }
1593 
1594     Py_TYPE(self)->tp_free((PyObject *)self);
1595     Py_TRASHCAN_SAFE_END(self)
1596 }
1597 
1598 #ifdef Py_DEBUG
1599 static int
hamt_node_collision_dump(PyHamtNode_Collision * node,_PyUnicodeWriter * writer,int level)1600 hamt_node_collision_dump(PyHamtNode_Collision *node,
1601                          _PyUnicodeWriter *writer, int level)
1602 {
1603     /* Debug build: __dump__() method implementation for Collision nodes. */
1604 
1605     Py_ssize_t i;
1606 
1607     if (_hamt_dump_ident(writer, level + 1)) {
1608         goto error;
1609     }
1610 
1611     if (_hamt_dump_format(writer, "CollisionNode(size=%zd id=%p):\n",
1612                           Py_SIZE(node), node))
1613     {
1614         goto error;
1615     }
1616 
1617     for (i = 0; i < Py_SIZE(node); i += 2) {
1618         PyObject *key = node->c_array[i];
1619         PyObject *val = node->c_array[i + 1];
1620 
1621         if (_hamt_dump_ident(writer, level + 2)) {
1622             goto error;
1623         }
1624 
1625         if (_hamt_dump_format(writer, "%R: %R\n", key, val)) {
1626             goto error;
1627         }
1628     }
1629 
1630     return 0;
1631 error:
1632     return -1;
1633 }
1634 #endif  /* Py_DEBUG */
1635 
1636 
1637 /////////////////////////////////// Array Node
1638 
1639 
1640 static PyHamtNode *
hamt_node_array_new(Py_ssize_t count)1641 hamt_node_array_new(Py_ssize_t count)
1642 {
1643     Py_ssize_t i;
1644 
1645     PyHamtNode_Array *node = PyObject_GC_New(
1646         PyHamtNode_Array, &_PyHamt_ArrayNode_Type);
1647     if (node == NULL) {
1648         return NULL;
1649     }
1650 
1651     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1652         node->a_array[i] = NULL;
1653     }
1654 
1655     node->a_count = count;
1656 
1657     _PyObject_GC_TRACK(node);
1658     return (PyHamtNode *)node;
1659 }
1660 
1661 static PyHamtNode_Array *
hamt_node_array_clone(PyHamtNode_Array * node)1662 hamt_node_array_clone(PyHamtNode_Array *node)
1663 {
1664     PyHamtNode_Array *clone;
1665     Py_ssize_t i;
1666 
1667     VALIDATE_ARRAY_NODE(node)
1668 
1669     /* Create a new Array node. */
1670     clone = (PyHamtNode_Array *)hamt_node_array_new(node->a_count);
1671     if (clone == NULL) {
1672         return NULL;
1673     }
1674 
1675     /* Copy all elements from the current Array node to the new one. */
1676     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1677         Py_XINCREF(node->a_array[i]);
1678         clone->a_array[i] = node->a_array[i];
1679     }
1680 
1681     VALIDATE_ARRAY_NODE(clone)
1682     return clone;
1683 }
1684 
1685 static PyHamtNode *
hamt_node_array_assoc(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)1686 hamt_node_array_assoc(PyHamtNode_Array *self,
1687                       uint32_t shift, int32_t hash,
1688                       PyObject *key, PyObject *val, int* added_leaf)
1689 {
1690     /* Set a new key to this level (currently a Collision node)
1691        of the tree.
1692 
1693        Array nodes don't store values, they can only point to
1694        other nodes.  They are simple arrays of 32 BaseNode pointers/
1695      */
1696 
1697     uint32_t idx = hamt_mask(hash, shift);
1698     PyHamtNode *node = self->a_array[idx];
1699     PyHamtNode *child_node;
1700     PyHamtNode_Array *new_node;
1701     Py_ssize_t i;
1702 
1703     if (node == NULL) {
1704         /* There's no child node for the given hash.  Create a new
1705            Bitmap node for this key. */
1706 
1707         PyHamtNode_Bitmap *empty = NULL;
1708 
1709         /* Get an empty Bitmap node to work with. */
1710         empty = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(0);
1711         if (empty == NULL) {
1712             return NULL;
1713         }
1714 
1715         /* Set key/val to the newly created empty Bitmap, thus
1716            creating a new Bitmap node with our key/value pair. */
1717         child_node = hamt_node_bitmap_assoc(
1718             empty,
1719             shift + 5, hash, key, val, added_leaf);
1720         Py_DECREF(empty);
1721         if (child_node == NULL) {
1722             return NULL;
1723         }
1724 
1725         /* Create a new Array node. */
1726         new_node = (PyHamtNode_Array *)hamt_node_array_new(self->a_count + 1);
1727         if (new_node == NULL) {
1728             Py_DECREF(child_node);
1729             return NULL;
1730         }
1731 
1732         /* Copy all elements from the current Array node to the
1733            new one. */
1734         for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1735             Py_XINCREF(self->a_array[i]);
1736             new_node->a_array[i] = self->a_array[i];
1737         }
1738 
1739         assert(new_node->a_array[idx] == NULL);
1740         new_node->a_array[idx] = child_node;  /* borrow */
1741         VALIDATE_ARRAY_NODE(new_node)
1742     }
1743     else {
1744         /* There's a child node for the given hash.
1745            Set the key to it./ */
1746         child_node = hamt_node_assoc(
1747             node, shift + 5, hash, key, val, added_leaf);
1748         if (child_node == NULL) {
1749             return NULL;
1750         }
1751         else if (child_node == (PyHamtNode *)self) {
1752             Py_DECREF(child_node);
1753             return (PyHamtNode *)self;
1754         }
1755 
1756         new_node = hamt_node_array_clone(self);
1757         if (new_node == NULL) {
1758             Py_DECREF(child_node);
1759             return NULL;
1760         }
1761 
1762         Py_SETREF(new_node->a_array[idx], child_node);  /* borrow */
1763         VALIDATE_ARRAY_NODE(new_node)
1764     }
1765 
1766     return (PyHamtNode *)new_node;
1767 }
1768 
1769 static hamt_without_t
hamt_node_array_without(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)1770 hamt_node_array_without(PyHamtNode_Array *self,
1771                         uint32_t shift, int32_t hash,
1772                         PyObject *key,
1773                         PyHamtNode **new_node)
1774 {
1775     uint32_t idx = hamt_mask(hash, shift);
1776     PyHamtNode *node = self->a_array[idx];
1777 
1778     if (node == NULL) {
1779         return W_NOT_FOUND;
1780     }
1781 
1782     PyHamtNode *sub_node = NULL;
1783     hamt_without_t res = hamt_node_without(
1784         (PyHamtNode *)node,
1785         shift + 5, hash, key, &sub_node);
1786 
1787     switch (res) {
1788         case W_NOT_FOUND:
1789         case W_ERROR:
1790             assert(sub_node == NULL);
1791             return res;
1792 
1793         case W_NEWNODE: {
1794             /* We need to replace a node at the `idx` index.
1795                Clone this node and replace.
1796             */
1797             assert(sub_node != NULL);
1798 
1799             PyHamtNode_Array *clone = hamt_node_array_clone(self);
1800             if (clone == NULL) {
1801                 Py_DECREF(sub_node);
1802                 return W_ERROR;
1803             }
1804 
1805             Py_SETREF(clone->a_array[idx], sub_node);  /* borrow */
1806             *new_node = (PyHamtNode*)clone;  /* borrow */
1807             return W_NEWNODE;
1808         }
1809 
1810         case W_EMPTY: {
1811             assert(sub_node == NULL);
1812             /* We need to remove a node at the `idx` index.
1813                Calculate the size of the replacement Array node.
1814             */
1815             Py_ssize_t new_count = self->a_count - 1;
1816 
1817             if (new_count == 0) {
1818                 return W_EMPTY;
1819             }
1820 
1821             if (new_count >= 16) {
1822                 /* We convert Bitmap nodes to Array nodes, when a
1823                    Bitmap node needs to store more than 15 key/value
1824                    pairs.  So we will create a new Array node if we
1825                    the number of key/values after deletion is still
1826                    greater than 15.
1827                 */
1828 
1829                 PyHamtNode_Array *new = hamt_node_array_clone(self);
1830                 if (new == NULL) {
1831                     return W_ERROR;
1832                 }
1833                 new->a_count = new_count;
1834                 Py_CLEAR(new->a_array[idx]);
1835 
1836                 *new_node = (PyHamtNode*)new;  /* borrow */
1837                 return W_NEWNODE;
1838             }
1839 
1840             /* New Array node would have less than 16 key/value
1841                pairs.  We need to create a replacement Bitmap node. */
1842 
1843             Py_ssize_t bitmap_size = new_count * 2;
1844             uint32_t bitmap = 0;
1845 
1846             PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)
1847                 hamt_node_bitmap_new(bitmap_size);
1848             if (new == NULL) {
1849                 return W_ERROR;
1850             }
1851 
1852             Py_ssize_t new_i = 0;
1853             for (uint32_t i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1854                 if (i == idx) {
1855                     /* Skip the node we are deleting. */
1856                     continue;
1857                 }
1858 
1859                 PyHamtNode *node = self->a_array[i];
1860                 if (node == NULL) {
1861                     /* Skip any missing nodes. */
1862                     continue;
1863                 }
1864 
1865                 bitmap |= 1 << i;
1866 
1867                 if (IS_BITMAP_NODE(node)) {
1868                     PyHamtNode_Bitmap *child = (PyHamtNode_Bitmap *)node;
1869 
1870                     if (hamt_node_bitmap_count(child) == 1 &&
1871                             child->b_array[0] != NULL)
1872                     {
1873                         /* node is a Bitmap with one key/value pair, just
1874                            merge it into the new Bitmap node we're building.
1875 
1876                            Note that we don't inline Bitmap nodes that
1877                            have a NULL key -- those nodes point to another
1878                            tree level, and we cannot simply move tree levels
1879                            up or down.
1880                         */
1881                         PyObject *key = child->b_array[0];
1882                         PyObject *val = child->b_array[1];
1883 
1884                         Py_INCREF(key);
1885                         new->b_array[new_i] = key;
1886                         Py_INCREF(val);
1887                         new->b_array[new_i + 1] = val;
1888                     }
1889                     else {
1890                         new->b_array[new_i] = NULL;
1891                         Py_INCREF(node);
1892                         new->b_array[new_i + 1] = (PyObject*)node;
1893                     }
1894                 }
1895                 else {
1896 
1897 #ifdef Py_DEBUG
1898                     if (IS_COLLISION_NODE(node)) {
1899                         Py_ssize_t child_count = hamt_node_collision_count(
1900                             (PyHamtNode_Collision*)node);
1901                         assert(child_count > 1);
1902                     }
1903                     else if (IS_ARRAY_NODE(node)) {
1904                         assert(((PyHamtNode_Array*)node)->a_count >= 16);
1905                     }
1906 #endif
1907 
1908                     /* Just copy the node into our new Bitmap */
1909                     new->b_array[new_i] = NULL;
1910                     Py_INCREF(node);
1911                     new->b_array[new_i + 1] = (PyObject*)node;
1912                 }
1913 
1914                 new_i += 2;
1915             }
1916 
1917             new->b_bitmap = bitmap;
1918             *new_node = (PyHamtNode*)new;  /* borrow */
1919             return W_NEWNODE;
1920         }
1921 
1922         default:
1923             Py_UNREACHABLE();
1924     }
1925 }
1926 
1927 static hamt_find_t
hamt_node_array_find(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1928 hamt_node_array_find(PyHamtNode_Array *self,
1929                      uint32_t shift, int32_t hash,
1930                      PyObject *key, PyObject **val)
1931 {
1932     /* Lookup `key` in the Array node `self`.  Set the value
1933        for the found key to 'val'. */
1934 
1935     uint32_t idx = hamt_mask(hash, shift);
1936     PyHamtNode *node;
1937 
1938     node = self->a_array[idx];
1939     if (node == NULL) {
1940         return F_NOT_FOUND;
1941     }
1942 
1943     /* Dispatch to the generic hamt_node_find */
1944     return hamt_node_find(node, shift + 5, hash, key, val);
1945 }
1946 
1947 static int
hamt_node_array_traverse(PyHamtNode_Array * self,visitproc visit,void * arg)1948 hamt_node_array_traverse(PyHamtNode_Array *self,
1949                          visitproc visit, void *arg)
1950 {
1951     /* Array's tp_traverse */
1952 
1953     Py_ssize_t i;
1954 
1955     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1956         Py_VISIT(self->a_array[i]);
1957     }
1958 
1959     return 0;
1960 }
1961 
1962 static void
hamt_node_array_dealloc(PyHamtNode_Array * self)1963 hamt_node_array_dealloc(PyHamtNode_Array *self)
1964 {
1965     /* Array's tp_dealloc */
1966 
1967     Py_ssize_t i;
1968 
1969     PyObject_GC_UnTrack(self);
1970     Py_TRASHCAN_SAFE_BEGIN(self)
1971 
1972     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1973         Py_XDECREF(self->a_array[i]);
1974     }
1975 
1976     Py_TYPE(self)->tp_free((PyObject *)self);
1977     Py_TRASHCAN_SAFE_END(self)
1978 }
1979 
1980 #ifdef Py_DEBUG
1981 static int
hamt_node_array_dump(PyHamtNode_Array * node,_PyUnicodeWriter * writer,int level)1982 hamt_node_array_dump(PyHamtNode_Array *node,
1983                      _PyUnicodeWriter *writer, int level)
1984 {
1985     /* Debug build: __dump__() method implementation for Array nodes. */
1986 
1987     Py_ssize_t i;
1988 
1989     if (_hamt_dump_ident(writer, level + 1)) {
1990         goto error;
1991     }
1992 
1993     if (_hamt_dump_format(writer, "ArrayNode(id=%p):\n", node)) {
1994         goto error;
1995     }
1996 
1997     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1998         if (node->a_array[i] == NULL) {
1999             continue;
2000         }
2001 
2002         if (_hamt_dump_ident(writer, level + 2)) {
2003             goto error;
2004         }
2005 
2006         if (_hamt_dump_format(writer, "%d::\n", i)) {
2007             goto error;
2008         }
2009 
2010         if (hamt_node_dump(node->a_array[i], writer, level + 1)) {
2011             goto error;
2012         }
2013 
2014         if (_hamt_dump_format(writer, "\n")) {
2015             goto error;
2016         }
2017     }
2018 
2019     return 0;
2020 error:
2021     return -1;
2022 }
2023 #endif  /* Py_DEBUG */
2024 
2025 
2026 /////////////////////////////////// Node Dispatch
2027 
2028 
2029 static PyHamtNode *
hamt_node_assoc(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)2030 hamt_node_assoc(PyHamtNode *node,
2031                 uint32_t shift, int32_t hash,
2032                 PyObject *key, PyObject *val, int* added_leaf)
2033 {
2034     /* Set key/value to the 'node' starting with the given shift/hash.
2035        Return a new node, or the same node if key/value already
2036        set.
2037 
2038        added_leaf will be set to 1 if key/value wasn't in the
2039        tree before.
2040 
2041        This method automatically dispatches to the suitable
2042        hamt_node_{nodetype}_assoc method.
2043     */
2044 
2045     if (IS_BITMAP_NODE(node)) {
2046         return hamt_node_bitmap_assoc(
2047             (PyHamtNode_Bitmap *)node,
2048             shift, hash, key, val, added_leaf);
2049     }
2050     else if (IS_ARRAY_NODE(node)) {
2051         return hamt_node_array_assoc(
2052             (PyHamtNode_Array *)node,
2053             shift, hash, key, val, added_leaf);
2054     }
2055     else {
2056         assert(IS_COLLISION_NODE(node));
2057         return hamt_node_collision_assoc(
2058             (PyHamtNode_Collision *)node,
2059             shift, hash, key, val, added_leaf);
2060     }
2061 }
2062 
2063 static hamt_without_t
hamt_node_without(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)2064 hamt_node_without(PyHamtNode *node,
2065                   uint32_t shift, int32_t hash,
2066                   PyObject *key,
2067                   PyHamtNode **new_node)
2068 {
2069     if (IS_BITMAP_NODE(node)) {
2070         return hamt_node_bitmap_without(
2071             (PyHamtNode_Bitmap *)node,
2072             shift, hash, key,
2073             new_node);
2074     }
2075     else if (IS_ARRAY_NODE(node)) {
2076         return hamt_node_array_without(
2077             (PyHamtNode_Array *)node,
2078             shift, hash, key,
2079             new_node);
2080     }
2081     else {
2082         assert(IS_COLLISION_NODE(node));
2083         return hamt_node_collision_without(
2084             (PyHamtNode_Collision *)node,
2085             shift, hash, key,
2086             new_node);
2087     }
2088 }
2089 
2090 static hamt_find_t
hamt_node_find(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)2091 hamt_node_find(PyHamtNode *node,
2092                uint32_t shift, int32_t hash,
2093                PyObject *key, PyObject **val)
2094 {
2095     /* Find the key in the node starting with the given shift/hash.
2096 
2097        If a value is found, the result will be set to F_FOUND, and
2098        *val will point to the found value object.
2099 
2100        If a value wasn't found, the result will be set to F_NOT_FOUND.
2101 
2102        If an exception occurs during the call, the result will be F_ERROR.
2103 
2104        This method automatically dispatches to the suitable
2105        hamt_node_{nodetype}_find method.
2106     */
2107 
2108     if (IS_BITMAP_NODE(node)) {
2109         return hamt_node_bitmap_find(
2110             (PyHamtNode_Bitmap *)node,
2111             shift, hash, key, val);
2112 
2113     }
2114     else if (IS_ARRAY_NODE(node)) {
2115         return hamt_node_array_find(
2116             (PyHamtNode_Array *)node,
2117             shift, hash, key, val);
2118     }
2119     else {
2120         assert(IS_COLLISION_NODE(node));
2121         return hamt_node_collision_find(
2122             (PyHamtNode_Collision *)node,
2123             shift, hash, key, val);
2124     }
2125 }
2126 
2127 #ifdef Py_DEBUG
2128 static int
hamt_node_dump(PyHamtNode * node,_PyUnicodeWriter * writer,int level)2129 hamt_node_dump(PyHamtNode *node,
2130                _PyUnicodeWriter *writer, int level)
2131 {
2132     /* Debug build: __dump__() method implementation for a node.
2133 
2134        This method automatically dispatches to the suitable
2135        hamt_node_{nodetype})_dump method.
2136     */
2137 
2138     if (IS_BITMAP_NODE(node)) {
2139         return hamt_node_bitmap_dump(
2140             (PyHamtNode_Bitmap *)node, writer, level);
2141     }
2142     else if (IS_ARRAY_NODE(node)) {
2143         return hamt_node_array_dump(
2144             (PyHamtNode_Array *)node, writer, level);
2145     }
2146     else {
2147         assert(IS_COLLISION_NODE(node));
2148         return hamt_node_collision_dump(
2149             (PyHamtNode_Collision *)node, writer, level);
2150     }
2151 }
2152 #endif  /* Py_DEBUG */
2153 
2154 
2155 /////////////////////////////////// Iterators: Machinery
2156 
2157 
2158 static hamt_iter_t
2159 hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val);
2160 
2161 
2162 static void
hamt_iterator_init(PyHamtIteratorState * iter,PyHamtNode * root)2163 hamt_iterator_init(PyHamtIteratorState *iter, PyHamtNode *root)
2164 {
2165     for (uint32_t i = 0; i < _Py_HAMT_MAX_TREE_DEPTH; i++) {
2166         iter->i_nodes[i] = NULL;
2167         iter->i_pos[i] = 0;
2168     }
2169 
2170     iter->i_level = 0;
2171 
2172     /* Note: we don't incref/decref nodes in i_nodes. */
2173     iter->i_nodes[0] = root;
2174 }
2175 
2176 static hamt_iter_t
hamt_iterator_bitmap_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2177 hamt_iterator_bitmap_next(PyHamtIteratorState *iter,
2178                           PyObject **key, PyObject **val)
2179 {
2180     int8_t level = iter->i_level;
2181 
2182     PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)(iter->i_nodes[level]);
2183     Py_ssize_t pos = iter->i_pos[level];
2184 
2185     if (pos + 1 >= Py_SIZE(node)) {
2186 #ifdef Py_DEBUG
2187         assert(iter->i_level >= 0);
2188         iter->i_nodes[iter->i_level] = NULL;
2189 #endif
2190         iter->i_level--;
2191         return hamt_iterator_next(iter, key, val);
2192     }
2193 
2194     if (node->b_array[pos] == NULL) {
2195         iter->i_pos[level] = pos + 2;
2196 
2197         int8_t next_level = level + 1;
2198         assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2199         iter->i_level = next_level;
2200         iter->i_pos[next_level] = 0;
2201         iter->i_nodes[next_level] = (PyHamtNode *)
2202             node->b_array[pos + 1];
2203 
2204         return hamt_iterator_next(iter, key, val);
2205     }
2206 
2207     *key = node->b_array[pos];
2208     *val = node->b_array[pos + 1];
2209     iter->i_pos[level] = pos + 2;
2210     return I_ITEM;
2211 }
2212 
2213 static hamt_iter_t
hamt_iterator_collision_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2214 hamt_iterator_collision_next(PyHamtIteratorState *iter,
2215                              PyObject **key, PyObject **val)
2216 {
2217     int8_t level = iter->i_level;
2218 
2219     PyHamtNode_Collision *node = (PyHamtNode_Collision *)(iter->i_nodes[level]);
2220     Py_ssize_t pos = iter->i_pos[level];
2221 
2222     if (pos + 1 >= Py_SIZE(node)) {
2223 #ifdef Py_DEBUG
2224         assert(iter->i_level >= 0);
2225         iter->i_nodes[iter->i_level] = NULL;
2226 #endif
2227         iter->i_level--;
2228         return hamt_iterator_next(iter, key, val);
2229     }
2230 
2231     *key = node->c_array[pos];
2232     *val = node->c_array[pos + 1];
2233     iter->i_pos[level] = pos + 2;
2234     return I_ITEM;
2235 }
2236 
2237 static hamt_iter_t
hamt_iterator_array_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2238 hamt_iterator_array_next(PyHamtIteratorState *iter,
2239                          PyObject **key, PyObject **val)
2240 {
2241     int8_t level = iter->i_level;
2242 
2243     PyHamtNode_Array *node = (PyHamtNode_Array *)(iter->i_nodes[level]);
2244     Py_ssize_t pos = iter->i_pos[level];
2245 
2246     if (pos >= HAMT_ARRAY_NODE_SIZE) {
2247 #ifdef Py_DEBUG
2248         assert(iter->i_level >= 0);
2249         iter->i_nodes[iter->i_level] = NULL;
2250 #endif
2251         iter->i_level--;
2252         return hamt_iterator_next(iter, key, val);
2253     }
2254 
2255     for (Py_ssize_t i = pos; i < HAMT_ARRAY_NODE_SIZE; i++) {
2256         if (node->a_array[i] != NULL) {
2257             iter->i_pos[level] = i + 1;
2258 
2259             int8_t next_level = level + 1;
2260             assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2261             iter->i_pos[next_level] = 0;
2262             iter->i_nodes[next_level] = node->a_array[i];
2263             iter->i_level = next_level;
2264 
2265             return hamt_iterator_next(iter, key, val);
2266         }
2267     }
2268 
2269 #ifdef Py_DEBUG
2270         assert(iter->i_level >= 0);
2271         iter->i_nodes[iter->i_level] = NULL;
2272 #endif
2273 
2274     iter->i_level--;
2275     return hamt_iterator_next(iter, key, val);
2276 }
2277 
2278 static hamt_iter_t
hamt_iterator_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2279 hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val)
2280 {
2281     if (iter->i_level < 0) {
2282         return I_END;
2283     }
2284 
2285     assert(iter->i_level < _Py_HAMT_MAX_TREE_DEPTH);
2286 
2287     PyHamtNode *current = iter->i_nodes[iter->i_level];
2288 
2289     if (IS_BITMAP_NODE(current)) {
2290         return hamt_iterator_bitmap_next(iter, key, val);
2291     }
2292     else if (IS_ARRAY_NODE(current)) {
2293         return hamt_iterator_array_next(iter, key, val);
2294     }
2295     else {
2296         assert(IS_COLLISION_NODE(current));
2297         return hamt_iterator_collision_next(iter, key, val);
2298     }
2299 }
2300 
2301 
2302 /////////////////////////////////// HAMT high-level functions
2303 
2304 
2305 PyHamtObject *
_PyHamt_Assoc(PyHamtObject * o,PyObject * key,PyObject * val)2306 _PyHamt_Assoc(PyHamtObject *o, PyObject *key, PyObject *val)
2307 {
2308     int32_t key_hash;
2309     int added_leaf = 0;
2310     PyHamtNode *new_root;
2311     PyHamtObject *new_o;
2312 
2313     key_hash = hamt_hash(key);
2314     if (key_hash == -1) {
2315         return NULL;
2316     }
2317 
2318     new_root = hamt_node_assoc(
2319         (PyHamtNode *)(o->h_root),
2320         0, key_hash, key, val, &added_leaf);
2321     if (new_root == NULL) {
2322         return NULL;
2323     }
2324 
2325     if (new_root == o->h_root) {
2326         Py_DECREF(new_root);
2327         Py_INCREF(o);
2328         return o;
2329     }
2330 
2331     new_o = hamt_alloc();
2332     if (new_o == NULL) {
2333         Py_DECREF(new_root);
2334         return NULL;
2335     }
2336 
2337     new_o->h_root = new_root;  /* borrow */
2338     new_o->h_count = added_leaf ? o->h_count + 1 : o->h_count;
2339 
2340     return new_o;
2341 }
2342 
2343 PyHamtObject *
_PyHamt_Without(PyHamtObject * o,PyObject * key)2344 _PyHamt_Without(PyHamtObject *o, PyObject *key)
2345 {
2346     int32_t key_hash = hamt_hash(key);
2347     if (key_hash == -1) {
2348         return NULL;
2349     }
2350 
2351     PyHamtNode *new_root = NULL;
2352 
2353     hamt_without_t res = hamt_node_without(
2354         (PyHamtNode *)(o->h_root),
2355         0, key_hash, key,
2356         &new_root);
2357 
2358     switch (res) {
2359         case W_ERROR:
2360             return NULL;
2361         case W_EMPTY:
2362             return _PyHamt_New();
2363         case W_NOT_FOUND:
2364             Py_INCREF(o);
2365             return o;
2366         case W_NEWNODE: {
2367             assert(new_root != NULL);
2368 
2369             PyHamtObject *new_o = hamt_alloc();
2370             if (new_o == NULL) {
2371                 Py_DECREF(new_root);
2372                 return NULL;
2373             }
2374 
2375             new_o->h_root = new_root;  /* borrow */
2376             new_o->h_count = o->h_count - 1;
2377             assert(new_o->h_count >= 0);
2378             return new_o;
2379         }
2380         default:
2381             Py_UNREACHABLE();
2382     }
2383 }
2384 
2385 static hamt_find_t
hamt_find(PyHamtObject * o,PyObject * key,PyObject ** val)2386 hamt_find(PyHamtObject *o, PyObject *key, PyObject **val)
2387 {
2388     if (o->h_count == 0) {
2389         return F_NOT_FOUND;
2390     }
2391 
2392     int32_t key_hash = hamt_hash(key);
2393     if (key_hash == -1) {
2394         return F_ERROR;
2395     }
2396 
2397     return hamt_node_find(o->h_root, 0, key_hash, key, val);
2398 }
2399 
2400 
2401 int
_PyHamt_Find(PyHamtObject * o,PyObject * key,PyObject ** val)2402 _PyHamt_Find(PyHamtObject *o, PyObject *key, PyObject **val)
2403 {
2404     hamt_find_t res = hamt_find(o, key, val);
2405     switch (res) {
2406         case F_ERROR:
2407             return -1;
2408         case F_NOT_FOUND:
2409             return 0;
2410         case F_FOUND:
2411             return 1;
2412         default:
2413             Py_UNREACHABLE();
2414     }
2415 }
2416 
2417 
2418 int
_PyHamt_Eq(PyHamtObject * v,PyHamtObject * w)2419 _PyHamt_Eq(PyHamtObject *v, PyHamtObject *w)
2420 {
2421     if (v == w) {
2422         return 1;
2423     }
2424 
2425     if (v->h_count != w->h_count) {
2426         return 0;
2427     }
2428 
2429     PyHamtIteratorState iter;
2430     hamt_iter_t iter_res;
2431     hamt_find_t find_res;
2432     PyObject *v_key;
2433     PyObject *v_val;
2434     PyObject *w_val;
2435 
2436     hamt_iterator_init(&iter, v->h_root);
2437 
2438     do {
2439         iter_res = hamt_iterator_next(&iter, &v_key, &v_val);
2440         if (iter_res == I_ITEM) {
2441             find_res = hamt_find(w, v_key, &w_val);
2442             switch (find_res) {
2443                 case F_ERROR:
2444                     return -1;
2445 
2446                 case F_NOT_FOUND:
2447                     return 0;
2448 
2449                 case F_FOUND: {
2450                     int cmp = PyObject_RichCompareBool(v_val, w_val, Py_EQ);
2451                     if (cmp < 0) {
2452                         return -1;
2453                     }
2454                     if (cmp == 0) {
2455                         return 0;
2456                     }
2457                 }
2458             }
2459         }
2460     } while (iter_res != I_END);
2461 
2462     return 1;
2463 }
2464 
2465 Py_ssize_t
_PyHamt_Len(PyHamtObject * o)2466 _PyHamt_Len(PyHamtObject *o)
2467 {
2468     return o->h_count;
2469 }
2470 
2471 static PyHamtObject *
hamt_alloc(void)2472 hamt_alloc(void)
2473 {
2474     PyHamtObject *o;
2475     o = PyObject_GC_New(PyHamtObject, &_PyHamt_Type);
2476     if (o == NULL) {
2477         return NULL;
2478     }
2479     o->h_count = 0;
2480     o->h_root = NULL;
2481     o->h_weakreflist = NULL;
2482     PyObject_GC_Track(o);
2483     return o;
2484 }
2485 
2486 PyHamtObject *
_PyHamt_New(void)2487 _PyHamt_New(void)
2488 {
2489     if (_empty_hamt != NULL) {
2490         /* HAMT is an immutable object so we can easily cache an
2491            empty instance. */
2492         Py_INCREF(_empty_hamt);
2493         return _empty_hamt;
2494     }
2495 
2496     PyHamtObject *o = hamt_alloc();
2497     if (o == NULL) {
2498         return NULL;
2499     }
2500 
2501     o->h_root = hamt_node_bitmap_new(0);
2502     if (o->h_root == NULL) {
2503         Py_DECREF(o);
2504         return NULL;
2505     }
2506 
2507     o->h_count = 0;
2508 
2509     if (_empty_hamt == NULL) {
2510         Py_INCREF(o);
2511         _empty_hamt = o;
2512     }
2513 
2514     return o;
2515 }
2516 
2517 #ifdef Py_DEBUG
2518 static PyObject *
hamt_dump(PyHamtObject * self)2519 hamt_dump(PyHamtObject *self)
2520 {
2521     _PyUnicodeWriter writer;
2522 
2523     _PyUnicodeWriter_Init(&writer);
2524 
2525     if (_hamt_dump_format(&writer, "HAMT(len=%zd):\n", self->h_count)) {
2526         goto error;
2527     }
2528 
2529     if (hamt_node_dump(self->h_root, &writer, 0)) {
2530         goto error;
2531     }
2532 
2533     return _PyUnicodeWriter_Finish(&writer);
2534 
2535 error:
2536     _PyUnicodeWriter_Dealloc(&writer);
2537     return NULL;
2538 }
2539 #endif  /* Py_DEBUG */
2540 
2541 
2542 /////////////////////////////////// Iterators: Shared Iterator Implementation
2543 
2544 
2545 static int
hamt_baseiter_tp_clear(PyHamtIterator * it)2546 hamt_baseiter_tp_clear(PyHamtIterator *it)
2547 {
2548     Py_CLEAR(it->hi_obj);
2549     return 0;
2550 }
2551 
2552 static void
hamt_baseiter_tp_dealloc(PyHamtIterator * it)2553 hamt_baseiter_tp_dealloc(PyHamtIterator *it)
2554 {
2555     PyObject_GC_UnTrack(it);
2556     (void)hamt_baseiter_tp_clear(it);
2557     PyObject_GC_Del(it);
2558 }
2559 
2560 static int
hamt_baseiter_tp_traverse(PyHamtIterator * it,visitproc visit,void * arg)2561 hamt_baseiter_tp_traverse(PyHamtIterator *it, visitproc visit, void *arg)
2562 {
2563     Py_VISIT(it->hi_obj);
2564     return 0;
2565 }
2566 
2567 static PyObject *
hamt_baseiter_tp_iternext(PyHamtIterator * it)2568 hamt_baseiter_tp_iternext(PyHamtIterator *it)
2569 {
2570     PyObject *key;
2571     PyObject *val;
2572     hamt_iter_t res = hamt_iterator_next(&it->hi_iter, &key, &val);
2573 
2574     switch (res) {
2575         case I_END:
2576             PyErr_SetNone(PyExc_StopIteration);
2577             return NULL;
2578 
2579         case I_ITEM: {
2580             return (*(it->hi_yield))(key, val);
2581         }
2582 
2583         default: {
2584             Py_UNREACHABLE();
2585         }
2586     }
2587 }
2588 
2589 static Py_ssize_t
hamt_baseiter_tp_len(PyHamtIterator * it)2590 hamt_baseiter_tp_len(PyHamtIterator *it)
2591 {
2592     return it->hi_obj->h_count;
2593 }
2594 
2595 static PyMappingMethods PyHamtIterator_as_mapping = {
2596     (lenfunc)hamt_baseiter_tp_len,
2597 };
2598 
2599 static PyObject *
hamt_baseiter_new(PyTypeObject * type,binaryfunc yield,PyHamtObject * o)2600 hamt_baseiter_new(PyTypeObject *type, binaryfunc yield, PyHamtObject *o)
2601 {
2602     PyHamtIterator *it = PyObject_GC_New(PyHamtIterator, type);
2603     if (it == NULL) {
2604         return NULL;
2605     }
2606 
2607     Py_INCREF(o);
2608     it->hi_obj = o;
2609     it->hi_yield = yield;
2610 
2611     hamt_iterator_init(&it->hi_iter, o->h_root);
2612 
2613     return (PyObject*)it;
2614 }
2615 
2616 #define ITERATOR_TYPE_SHARED_SLOTS                              \
2617     .tp_basicsize = sizeof(PyHamtIterator),                     \
2618     .tp_itemsize = 0,                                           \
2619     .tp_as_mapping = &PyHamtIterator_as_mapping,                \
2620     .tp_dealloc = (destructor)hamt_baseiter_tp_dealloc,         \
2621     .tp_getattro = PyObject_GenericGetAttr,                     \
2622     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,        \
2623     .tp_traverse = (traverseproc)hamt_baseiter_tp_traverse,     \
2624     .tp_clear = (inquiry)hamt_baseiter_tp_clear,                \
2625     .tp_iter = PyObject_SelfIter,                               \
2626     .tp_iternext = (iternextfunc)hamt_baseiter_tp_iternext,
2627 
2628 
2629 /////////////////////////////////// _PyHamtItems_Type
2630 
2631 
2632 PyTypeObject _PyHamtItems_Type = {
2633     PyVarObject_HEAD_INIT(NULL, 0)
2634     "items",
2635     ITERATOR_TYPE_SHARED_SLOTS
2636 };
2637 
2638 static PyObject *
hamt_iter_yield_items(PyObject * key,PyObject * val)2639 hamt_iter_yield_items(PyObject *key, PyObject *val)
2640 {
2641     return PyTuple_Pack(2, key, val);
2642 }
2643 
2644 PyObject *
_PyHamt_NewIterItems(PyHamtObject * o)2645 _PyHamt_NewIterItems(PyHamtObject *o)
2646 {
2647     return hamt_baseiter_new(
2648         &_PyHamtItems_Type, hamt_iter_yield_items, o);
2649 }
2650 
2651 
2652 /////////////////////////////////// _PyHamtKeys_Type
2653 
2654 
2655 PyTypeObject _PyHamtKeys_Type = {
2656     PyVarObject_HEAD_INIT(NULL, 0)
2657     "keys",
2658     ITERATOR_TYPE_SHARED_SLOTS
2659 };
2660 
2661 static PyObject *
hamt_iter_yield_keys(PyObject * key,PyObject * val)2662 hamt_iter_yield_keys(PyObject *key, PyObject *val)
2663 {
2664     Py_INCREF(key);
2665     return key;
2666 }
2667 
2668 PyObject *
_PyHamt_NewIterKeys(PyHamtObject * o)2669 _PyHamt_NewIterKeys(PyHamtObject *o)
2670 {
2671     return hamt_baseiter_new(
2672         &_PyHamtKeys_Type, hamt_iter_yield_keys, o);
2673 }
2674 
2675 
2676 /////////////////////////////////// _PyHamtValues_Type
2677 
2678 
2679 PyTypeObject _PyHamtValues_Type = {
2680     PyVarObject_HEAD_INIT(NULL, 0)
2681     "values",
2682     ITERATOR_TYPE_SHARED_SLOTS
2683 };
2684 
2685 static PyObject *
hamt_iter_yield_values(PyObject * key,PyObject * val)2686 hamt_iter_yield_values(PyObject *key, PyObject *val)
2687 {
2688     Py_INCREF(val);
2689     return val;
2690 }
2691 
2692 PyObject *
_PyHamt_NewIterValues(PyHamtObject * o)2693 _PyHamt_NewIterValues(PyHamtObject *o)
2694 {
2695     return hamt_baseiter_new(
2696         &_PyHamtValues_Type, hamt_iter_yield_values, o);
2697 }
2698 
2699 
2700 /////////////////////////////////// _PyHamt_Type
2701 
2702 
2703 #ifdef Py_DEBUG
2704 static PyObject *
2705 hamt_dump(PyHamtObject *self);
2706 #endif
2707 
2708 
2709 static PyObject *
hamt_tp_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2710 hamt_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2711 {
2712     return (PyObject*)_PyHamt_New();
2713 }
2714 
2715 static int
hamt_tp_clear(PyHamtObject * self)2716 hamt_tp_clear(PyHamtObject *self)
2717 {
2718     Py_CLEAR(self->h_root);
2719     return 0;
2720 }
2721 
2722 
2723 static int
hamt_tp_traverse(PyHamtObject * self,visitproc visit,void * arg)2724 hamt_tp_traverse(PyHamtObject *self, visitproc visit, void *arg)
2725 {
2726     Py_VISIT(self->h_root);
2727     return 0;
2728 }
2729 
2730 static void
hamt_tp_dealloc(PyHamtObject * self)2731 hamt_tp_dealloc(PyHamtObject *self)
2732 {
2733     PyObject_GC_UnTrack(self);
2734     if (self->h_weakreflist != NULL) {
2735         PyObject_ClearWeakRefs((PyObject*)self);
2736     }
2737     (void)hamt_tp_clear(self);
2738     Py_TYPE(self)->tp_free(self);
2739 }
2740 
2741 
2742 static PyObject *
hamt_tp_richcompare(PyObject * v,PyObject * w,int op)2743 hamt_tp_richcompare(PyObject *v, PyObject *w, int op)
2744 {
2745     if (!PyHamt_Check(v) || !PyHamt_Check(w) || (op != Py_EQ && op != Py_NE)) {
2746         Py_RETURN_NOTIMPLEMENTED;
2747     }
2748 
2749     int res = _PyHamt_Eq((PyHamtObject *)v, (PyHamtObject *)w);
2750     if (res < 0) {
2751         return NULL;
2752     }
2753 
2754     if (op == Py_NE) {
2755         res = !res;
2756     }
2757 
2758     if (res) {
2759         Py_RETURN_TRUE;
2760     }
2761     else {
2762         Py_RETURN_FALSE;
2763     }
2764 }
2765 
2766 static int
hamt_tp_contains(PyHamtObject * self,PyObject * key)2767 hamt_tp_contains(PyHamtObject *self, PyObject *key)
2768 {
2769     PyObject *val;
2770     return _PyHamt_Find(self, key, &val);
2771 }
2772 
2773 static PyObject *
hamt_tp_subscript(PyHamtObject * self,PyObject * key)2774 hamt_tp_subscript(PyHamtObject *self, PyObject *key)
2775 {
2776     PyObject *val;
2777     hamt_find_t res = hamt_find(self, key, &val);
2778     switch (res) {
2779         case F_ERROR:
2780             return NULL;
2781         case F_FOUND:
2782             Py_INCREF(val);
2783             return val;
2784         case F_NOT_FOUND:
2785             PyErr_SetObject(PyExc_KeyError, key);
2786             return NULL;
2787         default:
2788             Py_UNREACHABLE();
2789     }
2790 }
2791 
2792 static Py_ssize_t
hamt_tp_len(PyHamtObject * self)2793 hamt_tp_len(PyHamtObject *self)
2794 {
2795     return _PyHamt_Len(self);
2796 }
2797 
2798 static PyObject *
hamt_tp_iter(PyHamtObject * self)2799 hamt_tp_iter(PyHamtObject *self)
2800 {
2801     return _PyHamt_NewIterKeys(self);
2802 }
2803 
2804 static PyObject *
hamt_py_set(PyHamtObject * self,PyObject * args)2805 hamt_py_set(PyHamtObject *self, PyObject *args)
2806 {
2807     PyObject *key;
2808     PyObject *val;
2809 
2810     if (!PyArg_UnpackTuple(args, "set", 2, 2, &key, &val)) {
2811         return NULL;
2812     }
2813 
2814     return (PyObject *)_PyHamt_Assoc(self, key, val);
2815 }
2816 
2817 static PyObject *
hamt_py_get(PyHamtObject * self,PyObject * args)2818 hamt_py_get(PyHamtObject *self, PyObject *args)
2819 {
2820     PyObject *key;
2821     PyObject *def = NULL;
2822 
2823     if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &def)) {
2824         return NULL;
2825     }
2826 
2827     PyObject *val = NULL;
2828     hamt_find_t res = hamt_find(self, key, &val);
2829     switch (res) {
2830         case F_ERROR:
2831             return NULL;
2832         case F_FOUND:
2833             Py_INCREF(val);
2834             return val;
2835         case F_NOT_FOUND:
2836             if (def == NULL) {
2837                 Py_RETURN_NONE;
2838             }
2839             Py_INCREF(def);
2840             return def;
2841         default:
2842             Py_UNREACHABLE();
2843     }
2844 }
2845 
2846 static PyObject *
hamt_py_delete(PyHamtObject * self,PyObject * key)2847 hamt_py_delete(PyHamtObject *self, PyObject *key)
2848 {
2849     return (PyObject *)_PyHamt_Without(self, key);
2850 }
2851 
2852 static PyObject *
hamt_py_items(PyHamtObject * self,PyObject * args)2853 hamt_py_items(PyHamtObject *self, PyObject *args)
2854 {
2855     return _PyHamt_NewIterItems(self);
2856 }
2857 
2858 static PyObject *
hamt_py_values(PyHamtObject * self,PyObject * args)2859 hamt_py_values(PyHamtObject *self, PyObject *args)
2860 {
2861     return _PyHamt_NewIterValues(self);
2862 }
2863 
2864 static PyObject *
hamt_py_keys(PyHamtObject * self,PyObject * args)2865 hamt_py_keys(PyHamtObject *self, PyObject *args)
2866 {
2867     return _PyHamt_NewIterKeys(self);
2868 }
2869 
2870 #ifdef Py_DEBUG
2871 static PyObject *
hamt_py_dump(PyHamtObject * self,PyObject * args)2872 hamt_py_dump(PyHamtObject *self, PyObject *args)
2873 {
2874     return hamt_dump(self);
2875 }
2876 #endif
2877 
2878 
2879 static PyMethodDef PyHamt_methods[] = {
2880     {"set", (PyCFunction)hamt_py_set, METH_VARARGS, NULL},
2881     {"get", (PyCFunction)hamt_py_get, METH_VARARGS, NULL},
2882     {"delete", (PyCFunction)hamt_py_delete, METH_O, NULL},
2883     {"items", (PyCFunction)hamt_py_items, METH_NOARGS, NULL},
2884     {"keys", (PyCFunction)hamt_py_keys, METH_NOARGS, NULL},
2885     {"values", (PyCFunction)hamt_py_values, METH_NOARGS, NULL},
2886 #ifdef Py_DEBUG
2887     {"__dump__", (PyCFunction)hamt_py_dump, METH_NOARGS, NULL},
2888 #endif
2889     {NULL, NULL}
2890 };
2891 
2892 static PySequenceMethods PyHamt_as_sequence = {
2893     0,                                /* sq_length */
2894     0,                                /* sq_concat */
2895     0,                                /* sq_repeat */
2896     0,                                /* sq_item */
2897     0,                                /* sq_slice */
2898     0,                                /* sq_ass_item */
2899     0,                                /* sq_ass_slice */
2900     (objobjproc)hamt_tp_contains,     /* sq_contains */
2901     0,                                /* sq_inplace_concat */
2902     0,                                /* sq_inplace_repeat */
2903 };
2904 
2905 static PyMappingMethods PyHamt_as_mapping = {
2906     (lenfunc)hamt_tp_len,             /* mp_length */
2907     (binaryfunc)hamt_tp_subscript,    /* mp_subscript */
2908 };
2909 
2910 PyTypeObject _PyHamt_Type = {
2911     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2912     "hamt",
2913     sizeof(PyHamtObject),
2914     .tp_methods = PyHamt_methods,
2915     .tp_as_mapping = &PyHamt_as_mapping,
2916     .tp_as_sequence = &PyHamt_as_sequence,
2917     .tp_iter = (getiterfunc)hamt_tp_iter,
2918     .tp_dealloc = (destructor)hamt_tp_dealloc,
2919     .tp_getattro = PyObject_GenericGetAttr,
2920     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2921     .tp_richcompare = hamt_tp_richcompare,
2922     .tp_traverse = (traverseproc)hamt_tp_traverse,
2923     .tp_clear = (inquiry)hamt_tp_clear,
2924     .tp_new = hamt_tp_new,
2925     .tp_weaklistoffset = offsetof(PyHamtObject, h_weakreflist),
2926     .tp_hash = PyObject_HashNotImplemented,
2927 };
2928 
2929 
2930 /////////////////////////////////// Tree Node Types
2931 
2932 
2933 PyTypeObject _PyHamt_ArrayNode_Type = {
2934     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2935     "hamt_array_node",
2936     sizeof(PyHamtNode_Array),
2937     0,
2938     .tp_dealloc = (destructor)hamt_node_array_dealloc,
2939     .tp_getattro = PyObject_GenericGetAttr,
2940     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2941     .tp_traverse = (traverseproc)hamt_node_array_traverse,
2942     .tp_free = PyObject_GC_Del,
2943     .tp_hash = PyObject_HashNotImplemented,
2944 };
2945 
2946 PyTypeObject _PyHamt_BitmapNode_Type = {
2947     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2948     "hamt_bitmap_node",
2949     sizeof(PyHamtNode_Bitmap) - sizeof(PyObject *),
2950     sizeof(PyObject *),
2951     .tp_dealloc = (destructor)hamt_node_bitmap_dealloc,
2952     .tp_getattro = PyObject_GenericGetAttr,
2953     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2954     .tp_traverse = (traverseproc)hamt_node_bitmap_traverse,
2955     .tp_free = PyObject_GC_Del,
2956     .tp_hash = PyObject_HashNotImplemented,
2957 };
2958 
2959 PyTypeObject _PyHamt_CollisionNode_Type = {
2960     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2961     "hamt_collision_node",
2962     sizeof(PyHamtNode_Collision) - sizeof(PyObject *),
2963     sizeof(PyObject *),
2964     .tp_dealloc = (destructor)hamt_node_collision_dealloc,
2965     .tp_getattro = PyObject_GenericGetAttr,
2966     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2967     .tp_traverse = (traverseproc)hamt_node_collision_traverse,
2968     .tp_free = PyObject_GC_Del,
2969     .tp_hash = PyObject_HashNotImplemented,
2970 };
2971 
2972 
2973 int
_PyHamt_Init(void)2974 _PyHamt_Init(void)
2975 {
2976     if ((PyType_Ready(&_PyHamt_Type) < 0) ||
2977         (PyType_Ready(&_PyHamt_ArrayNode_Type) < 0) ||
2978         (PyType_Ready(&_PyHamt_BitmapNode_Type) < 0) ||
2979         (PyType_Ready(&_PyHamt_CollisionNode_Type) < 0) ||
2980         (PyType_Ready(&_PyHamtKeys_Type) < 0) ||
2981         (PyType_Ready(&_PyHamtValues_Type) < 0) ||
2982         (PyType_Ready(&_PyHamtItems_Type) < 0))
2983     {
2984         return 0;
2985     }
2986 
2987     return 1;
2988 }
2989 
2990 void
_PyHamt_Fini(void)2991 _PyHamt_Fini(void)
2992 {
2993     Py_CLEAR(_empty_hamt);
2994     Py_CLEAR(_empty_bitmap_node);
2995 }
2996