• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "Python.h"
2 
3 #include "pycore_hamt.h"
4 #include "pycore_object.h"        // _PyObject_GC_TRACK()
5 #include <stddef.h>               // offsetof()
6 
7 /*
8 This file provides an implementation of an immutable mapping using the
9 Hash Array Mapped Trie (or HAMT) datastructure.
10 
11 This design allows to have:
12 
13 1. Efficient copy: immutable mappings can be copied by reference,
14    making it an O(1) operation.
15 
16 2. Efficient mutations: due to structural sharing, only a portion of
17    the trie needs to be copied when the collection is mutated.  The
18    cost of set/delete operations is O(log N).
19 
20 3. Efficient lookups: O(log N).
21 
22 (where N is number of key/value items in the immutable mapping.)
23 
24 
25 HAMT
26 ====
27 
28 The core idea of HAMT is that the shape of the trie is encoded into the
29 hashes of keys.
30 
31 Say we want to store a K/V pair in our mapping.  First, we calculate the
32 hash of K, let's say it's 19830128, or in binary:
33 
34     0b1001011101001010101110000 = 19830128
35 
36 Now let's partition this bit representation of the hash into blocks of
37 5 bits each:
38 
39     0b00_00000_10010_11101_00101_01011_10000 = 19830128
40           (6)   (5)   (4)   (3)   (2)   (1)
41 
42 Each block of 5 bits represents a number between 0 and 31.  So if we have
43 a tree that consists of nodes, each of which is an array of 32 pointers,
44 those 5-bit blocks will encode a position on a single tree level.
45 
46 For example, storing the key K with hash 19830128, results in the following
47 tree structure:
48 
49                      (array of 32 pointers)
50                      +---+ -- +----+----+----+ -- +----+
51   root node          | 0 | .. | 15 | 16 | 17 | .. | 31 |   0b10000 = 16 (1)
52   (level 1)          +---+ -- +----+----+----+ -- +----+
53                                       |
54                      +---+ -- +----+----+----+ -- +----+
55   a 2nd level node   | 0 | .. | 10 | 11 | 12 | .. | 31 |   0b01011 = 11 (2)
56                      +---+ -- +----+----+----+ -- +----+
57                                       |
58                      +---+ -- +----+----+----+ -- +----+
59   a 3rd level node   | 0 | .. | 04 | 05 | 06 | .. | 31 |   0b00101 = 5  (3)
60                      +---+ -- +----+----+----+ -- +----+
61                                       |
62                      +---+ -- +----+----+----+----+
63   a 4th level node   | 0 | .. | 04 | 29 | 30 | 31 |        0b11101 = 29 (4)
64                      +---+ -- +----+----+----+----+
65                                       |
66                      +---+ -- +----+----+----+ -- +----+
67   a 5th level node   | 0 | .. | 17 | 18 | 19 | .. | 31 |   0b10010 = 18 (5)
68                      +---+ -- +----+----+----+ -- +----+
69                                       |
70                        +--------------+
71                        |
72                      +---+ -- +----+----+----+ -- +----+
73   a 6th level node   | 0 | .. | 15 | 16 | 17 | .. | 31 |   0b00000 = 0  (6)
74                      +---+ -- +----+----+----+ -- +----+
75                        |
76                        V -- our value (or collision)
77 
78 To rehash: for a K/V pair, the hash of K encodes where in the tree V will
79 be stored.
80 
81 To optimize memory footprint and handle hash collisions, our implementation
82 uses three different types of nodes:
83 
84  * A Bitmap node;
85  * An Array node;
86  * A Collision node.
87 
88 Because we implement an immutable dictionary, our nodes are also
89 immutable.  Therefore, when we need to modify a node, we copy it, and
90 do that modification to the copy.
91 
92 
93 Array Nodes
94 -----------
95 
96 These nodes are very simple.  Essentially they are arrays of 32 pointers
97 we used to illustrate the high-level idea in the previous section.
98 
99 We use Array nodes only when we need to store more than 16 pointers
100 in a single node.
101 
102 Array nodes do not store key objects or value objects.  They are used
103 only as an indirection level - their pointers point to other nodes in
104 the tree.
105 
106 
107 Bitmap Node
108 -----------
109 
110 Allocating a new 32-pointers array for every node of our tree would be
111 very expensive.  Unless we store millions of keys, most of tree nodes would
112 be very sparse.
113 
114 When we have less than 16 elements in a node, we don't want to use the
115 Array node, that would mean that we waste a lot of memory.  Instead,
116 we can use bitmap compression and can have just as many pointers
117 as we need!
118 
119 Bitmap nodes consist of two fields:
120 
121 1. An array of pointers.  If a Bitmap node holds N elements, the
122    array will be of N pointers.
123 
124 2. A 32bit integer -- a bitmap field.  If an N-th bit is set in the
125    bitmap, it means that the node has an N-th element.
126 
127 For example, say we need to store a 3 elements sparse array:
128 
129    +---+  --  +---+  --  +----+  --  +----+
130    | 0 |  ..  | 4 |  ..  | 11 |  ..  | 17 |
131    +---+  --  +---+  --  +----+  --  +----+
132                 |          |           |
133                 o1         o2          o3
134 
135 We allocate a three-pointer Bitmap node.  Its bitmap field will be
136 then set to:
137 
138    0b_00100_00010_00000_10000 == (1 << 17) | (1 << 11) | (1 << 4)
139 
140 To check if our Bitmap node has an I-th element we can do:
141 
142    bitmap & (1 << I)
143 
144 
145 And here's a formula to calculate a position in our pointer array
146 which would correspond to an I-th element:
147 
148    popcount(bitmap & ((1 << I) - 1))
149 
150 
151 Let's break it down:
152 
153  * `popcount` is a function that returns a number of bits set to 1;
154 
155  * `((1 << I) - 1)` is a mask to filter the bitmask to contain bits
156    set to the *right* of our bit.
157 
158 
159 So for our 17, 11, and 4 indexes:
160 
161  * bitmap & ((1 << 17) - 1) == 0b100000010000 => 2 bits are set => index is 2.
162 
163  * bitmap & ((1 << 11) - 1) == 0b10000 => 1 bit is set => index is 1.
164 
165  * bitmap & ((1 << 4) - 1) == 0b0 => 0 bits are set => index is 0.
166 
167 
168 To conclude: Bitmap nodes are just like Array nodes -- they can store
169 a number of pointers, but use bitmap compression to eliminate unused
170 pointers.
171 
172 
173 Bitmap nodes have two pointers for each item:
174 
175   +----+----+----+----+  --  +----+----+
176   | k1 | v1 | k2 | v2 |  ..  | kN | vN |
177   +----+----+----+----+  --  +----+----+
178 
179 When kI == NULL, vI points to another tree level.
180 
181 When kI != NULL, the actual key object is stored in kI, and its
182 value is stored in vI.
183 
184 
185 Collision Nodes
186 ---------------
187 
188 Collision nodes are simple arrays of pointers -- two pointers per
189 key/value.  When there's a hash collision, say for k1/v1 and k2/v2
190 we have `hash(k1)==hash(k2)`.  Then our collision node will be:
191 
192   +----+----+----+----+
193   | k1 | v1 | k2 | v2 |
194   +----+----+----+----+
195 
196 
197 Tree Structure
198 --------------
199 
200 All nodes are PyObjects.
201 
202 The `PyHamtObject` object has a pointer to the root node (h_root),
203 and has a length field (h_count).
204 
205 High-level functions accept a PyHamtObject object and dispatch to
206 lower-level functions depending on what kind of node h_root points to.
207 
208 
209 Operations
210 ==========
211 
212 There are three fundamental operations on an immutable dictionary:
213 
214 1. "o.assoc(k, v)" will return a new immutable dictionary, that will be
215    a copy of "o", but with the "k/v" item set.
216 
217    Functions in this file:
218 
219         hamt_node_assoc, hamt_node_bitmap_assoc,
220         hamt_node_array_assoc, hamt_node_collision_assoc
221 
222    `hamt_node_assoc` function accepts a node object, and calls
223    other functions depending on its actual type.
224 
225 2. "o.find(k)" will lookup key "k" in "o".
226 
227    Functions:
228 
229         hamt_node_find, hamt_node_bitmap_find,
230         hamt_node_array_find, hamt_node_collision_find
231 
232 3. "o.without(k)" will return a new immutable dictionary, that will be
233    a copy of "o", buth without the "k" key.
234 
235    Functions:
236 
237         hamt_node_without, hamt_node_bitmap_without,
238         hamt_node_array_without, hamt_node_collision_without
239 
240 
241 Further Reading
242 ===============
243 
244 1. http://blog.higher-order.net/2009/09/08/understanding-clojures-persistenthashmap-deftwice.html
245 
246 2. http://blog.higher-order.net/2010/08/16/assoc-and-clojures-persistenthashmap-part-ii.html
247 
248 3. Clojure's PersistentHashMap implementation:
249    https://github.com/clojure/clojure/blob/master/src/jvm/clojure/lang/PersistentHashMap.java
250 
251 
252 Debug
253 =====
254 
255 The HAMT datatype is accessible for testing purposes under the
256 `_testcapi` module:
257 
258     >>> from _testcapi import hamt
259     >>> h = hamt()
260     >>> h2 = h.set('a', 2)
261     >>> h3 = h2.set('b', 3)
262     >>> list(h3)
263     ['a', 'b']
264 
265 When CPython is built in debug mode, a '__dump__()' method is available
266 to introspect the tree:
267 
268     >>> print(h3.__dump__())
269     HAMT(len=2):
270         BitmapNode(size=4 count=2 bitmap=0b110 id=0x10eb9d9e8):
271             'a': 2
272             'b': 3
273 */
274 
275 
276 #define IS_ARRAY_NODE(node)     Py_IS_TYPE(node, &_PyHamt_ArrayNode_Type)
277 #define IS_BITMAP_NODE(node)    Py_IS_TYPE(node, &_PyHamt_BitmapNode_Type)
278 #define IS_COLLISION_NODE(node) Py_IS_TYPE(node, &_PyHamt_CollisionNode_Type)
279 
280 
281 /* Return type for 'find' (lookup a key) functions.
282 
283    * F_ERROR - an error occurred;
284    * F_NOT_FOUND - the key was not found;
285    * F_FOUND - the key was found.
286 */
287 typedef enum {F_ERROR, F_NOT_FOUND, F_FOUND} hamt_find_t;
288 
289 
290 /* Return type for 'without' (delete a key) functions.
291 
292    * W_ERROR - an error occurred;
293    * W_NOT_FOUND - the key was not found: there's nothing to delete;
294    * W_EMPTY - the key was found: the node/tree would be empty
295      if the key is deleted;
296    * W_NEWNODE - the key was found: a new node/tree is returned
297      without that key.
298 */
299 typedef enum {W_ERROR, W_NOT_FOUND, W_EMPTY, W_NEWNODE} hamt_without_t;
300 
301 
302 /* Low-level iterator protocol type.
303 
304    * I_ITEM - a new item has been yielded;
305    * I_END - the whole tree was visited (similar to StopIteration).
306 */
307 typedef enum {I_ITEM, I_END} hamt_iter_t;
308 
309 
310 #define HAMT_ARRAY_NODE_SIZE 32
311 
312 
313 typedef struct {
314     PyObject_HEAD
315     PyHamtNode *a_array[HAMT_ARRAY_NODE_SIZE];
316     Py_ssize_t a_count;
317 } PyHamtNode_Array;
318 
319 
320 typedef struct {
321     PyObject_VAR_HEAD
322     uint32_t b_bitmap;
323     PyObject *b_array[1];
324 } PyHamtNode_Bitmap;
325 
326 
327 typedef struct {
328     PyObject_VAR_HEAD
329     int32_t c_hash;
330     PyObject *c_array[1];
331 } PyHamtNode_Collision;
332 
333 
334 static PyHamtNode_Bitmap *_empty_bitmap_node;
335 static PyHamtObject *_empty_hamt;
336 
337 
338 static PyHamtObject *
339 hamt_alloc(void);
340 
341 static PyHamtNode *
342 hamt_node_assoc(PyHamtNode *node,
343                 uint32_t shift, int32_t hash,
344                 PyObject *key, PyObject *val, int* added_leaf);
345 
346 static hamt_without_t
347 hamt_node_without(PyHamtNode *node,
348                   uint32_t shift, int32_t hash,
349                   PyObject *key,
350                   PyHamtNode **new_node);
351 
352 static hamt_find_t
353 hamt_node_find(PyHamtNode *node,
354                uint32_t shift, int32_t hash,
355                PyObject *key, PyObject **val);
356 
357 #ifdef Py_DEBUG
358 static int
359 hamt_node_dump(PyHamtNode *node,
360                _PyUnicodeWriter *writer, int level);
361 #endif
362 
363 static PyHamtNode *
364 hamt_node_array_new(Py_ssize_t);
365 
366 static PyHamtNode *
367 hamt_node_collision_new(int32_t hash, Py_ssize_t size);
368 
369 static inline Py_ssize_t
370 hamt_node_collision_count(PyHamtNode_Collision *node);
371 
372 
373 #ifdef Py_DEBUG
374 static void
_hamt_node_array_validate(void * obj_raw)375 _hamt_node_array_validate(void *obj_raw)
376 {
377     PyObject *obj = _PyObject_CAST(obj_raw);
378     assert(IS_ARRAY_NODE(obj));
379     PyHamtNode_Array *node = (PyHamtNode_Array*)obj;
380     Py_ssize_t i = 0, count = 0;
381     for (; i < HAMT_ARRAY_NODE_SIZE; i++) {
382         if (node->a_array[i] != NULL) {
383             count++;
384         }
385     }
386     assert(count == node->a_count);
387 }
388 
389 #define VALIDATE_ARRAY_NODE(NODE) \
390     do { _hamt_node_array_validate(NODE); } while (0);
391 #else
392 #define VALIDATE_ARRAY_NODE(NODE)
393 #endif
394 
395 
396 /* Returns -1 on error */
397 static inline int32_t
hamt_hash(PyObject * o)398 hamt_hash(PyObject *o)
399 {
400     Py_hash_t hash = PyObject_Hash(o);
401 
402 #if SIZEOF_PY_HASH_T <= 4
403     return hash;
404 #else
405     if (hash == -1) {
406         /* exception */
407         return -1;
408     }
409 
410     /* While it's suboptimal to reduce Python's 64 bit hash to
411        32 bits via XOR, it seems that the resulting hash function
412        is good enough (this is also how Long type is hashed in Java.)
413        Storing 10, 100, 1000 Python strings results in a relatively
414        shallow and uniform tree structure.
415 
416        Please don't change this hashing algorithm, as there are many
417        tests that test some exact tree shape to cover all code paths.
418     */
419     int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32);
420     return xored == -1 ? -2 : xored;
421 #endif
422 }
423 
424 static inline uint32_t
hamt_mask(int32_t hash,uint32_t shift)425 hamt_mask(int32_t hash, uint32_t shift)
426 {
427     return (((uint32_t)hash >> shift) & 0x01f);
428 }
429 
430 static inline uint32_t
hamt_bitpos(int32_t hash,uint32_t shift)431 hamt_bitpos(int32_t hash, uint32_t shift)
432 {
433     return (uint32_t)1 << hamt_mask(hash, shift);
434 }
435 
436 static inline uint32_t
hamt_bitcount(uint32_t i)437 hamt_bitcount(uint32_t i)
438 {
439     /* We could use native popcount instruction but that would
440        require to either add configure flags to enable SSE4.2
441        support or to detect it dynamically.  Otherwise, we have
442        a risk of CPython not working properly on older hardware.
443 
444        In practice, there's no observable difference in
445        performance between using a popcount instruction or the
446        following fallback code.
447 
448        The algorithm is copied from:
449        https://graphics.stanford.edu/~seander/bithacks.html
450     */
451     i = i - ((i >> 1) & 0x55555555);
452     i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
453     return (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
454 }
455 
456 static inline uint32_t
hamt_bitindex(uint32_t bitmap,uint32_t bit)457 hamt_bitindex(uint32_t bitmap, uint32_t bit)
458 {
459     return hamt_bitcount(bitmap & (bit - 1));
460 }
461 
462 
463 /////////////////////////////////// Dump Helpers
464 #ifdef Py_DEBUG
465 
466 static int
_hamt_dump_ident(_PyUnicodeWriter * writer,int level)467 _hamt_dump_ident(_PyUnicodeWriter *writer, int level)
468 {
469     /* Write `'    ' * level` to the `writer` */
470     PyObject *str = NULL;
471     PyObject *num = NULL;
472     PyObject *res = NULL;
473     int ret = -1;
474 
475     str = PyUnicode_FromString("    ");
476     if (str == NULL) {
477         goto error;
478     }
479 
480     num = PyLong_FromLong((long)level);
481     if (num == NULL) {
482         goto error;
483     }
484 
485     res = PyNumber_Multiply(str, num);
486     if (res == NULL) {
487         goto error;
488     }
489 
490     ret = _PyUnicodeWriter_WriteStr(writer, res);
491 
492 error:
493     Py_XDECREF(res);
494     Py_XDECREF(str);
495     Py_XDECREF(num);
496     return ret;
497 }
498 
499 static int
_hamt_dump_format(_PyUnicodeWriter * writer,const char * format,...)500 _hamt_dump_format(_PyUnicodeWriter *writer, const char *format, ...)
501 {
502     /* A convenient helper combining _PyUnicodeWriter_WriteStr and
503        PyUnicode_FromFormatV.
504     */
505     PyObject* msg;
506     int ret;
507 
508     va_list vargs;
509 #ifdef HAVE_STDARG_PROTOTYPES
510     va_start(vargs, format);
511 #else
512     va_start(vargs);
513 #endif
514     msg = PyUnicode_FromFormatV(format, vargs);
515     va_end(vargs);
516 
517     if (msg == NULL) {
518         return -1;
519     }
520 
521     ret = _PyUnicodeWriter_WriteStr(writer, msg);
522     Py_DECREF(msg);
523     return ret;
524 }
525 
526 #endif  /* Py_DEBUG */
527 /////////////////////////////////// Bitmap Node
528 
529 
530 static PyHamtNode *
hamt_node_bitmap_new(Py_ssize_t size)531 hamt_node_bitmap_new(Py_ssize_t size)
532 {
533     /* Create a new bitmap node of size 'size' */
534 
535     PyHamtNode_Bitmap *node;
536     Py_ssize_t i;
537 
538     assert(size >= 0);
539     assert(size % 2 == 0);
540 
541     if (size == 0 && _empty_bitmap_node != NULL) {
542         Py_INCREF(_empty_bitmap_node);
543         return (PyHamtNode *)_empty_bitmap_node;
544     }
545 
546     /* No freelist; allocate a new bitmap node */
547     node = PyObject_GC_NewVar(
548         PyHamtNode_Bitmap, &_PyHamt_BitmapNode_Type, size);
549     if (node == NULL) {
550         return NULL;
551     }
552 
553     Py_SET_SIZE(node, size);
554 
555     for (i = 0; i < size; i++) {
556         node->b_array[i] = NULL;
557     }
558 
559     node->b_bitmap = 0;
560 
561     _PyObject_GC_TRACK(node);
562 
563     if (size == 0 && _empty_bitmap_node == NULL) {
564         /* Since bitmap nodes are immutable, we can cache the instance
565            for size=0 and reuse it whenever we need an empty bitmap node.
566         */
567         _empty_bitmap_node = node;
568         Py_INCREF(_empty_bitmap_node);
569     }
570 
571     return (PyHamtNode *)node;
572 }
573 
574 static inline Py_ssize_t
hamt_node_bitmap_count(PyHamtNode_Bitmap * node)575 hamt_node_bitmap_count(PyHamtNode_Bitmap *node)
576 {
577     return Py_SIZE(node) / 2;
578 }
579 
580 static PyHamtNode_Bitmap *
hamt_node_bitmap_clone(PyHamtNode_Bitmap * node)581 hamt_node_bitmap_clone(PyHamtNode_Bitmap *node)
582 {
583     /* Clone a bitmap node; return a new one with the same child notes. */
584 
585     PyHamtNode_Bitmap *clone;
586     Py_ssize_t i;
587 
588     clone = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(Py_SIZE(node));
589     if (clone == NULL) {
590         return NULL;
591     }
592 
593     for (i = 0; i < Py_SIZE(node); i++) {
594         Py_XINCREF(node->b_array[i]);
595         clone->b_array[i] = node->b_array[i];
596     }
597 
598     clone->b_bitmap = node->b_bitmap;
599     return clone;
600 }
601 
602 static PyHamtNode_Bitmap *
hamt_node_bitmap_clone_without(PyHamtNode_Bitmap * o,uint32_t bit)603 hamt_node_bitmap_clone_without(PyHamtNode_Bitmap *o, uint32_t bit)
604 {
605     assert(bit & o->b_bitmap);
606     assert(hamt_node_bitmap_count(o) > 1);
607 
608     PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(
609         Py_SIZE(o) - 2);
610     if (new == NULL) {
611         return NULL;
612     }
613 
614     uint32_t idx = hamt_bitindex(o->b_bitmap, bit);
615     uint32_t key_idx = 2 * idx;
616     uint32_t val_idx = key_idx + 1;
617     uint32_t i;
618 
619     for (i = 0; i < key_idx; i++) {
620         Py_XINCREF(o->b_array[i]);
621         new->b_array[i] = o->b_array[i];
622     }
623 
624     assert(Py_SIZE(o) >= 0 && Py_SIZE(o) <= 32);
625     for (i = val_idx + 1; i < (uint32_t)Py_SIZE(o); i++) {
626         Py_XINCREF(o->b_array[i]);
627         new->b_array[i - 2] = o->b_array[i];
628     }
629 
630     new->b_bitmap = o->b_bitmap & ~bit;
631     return new;
632 }
633 
634 static PyHamtNode *
hamt_node_new_bitmap_or_collision(uint32_t shift,PyObject * key1,PyObject * val1,int32_t key2_hash,PyObject * key2,PyObject * val2)635 hamt_node_new_bitmap_or_collision(uint32_t shift,
636                                   PyObject *key1, PyObject *val1,
637                                   int32_t key2_hash,
638                                   PyObject *key2, PyObject *val2)
639 {
640     /* Helper method.  Creates a new node for key1/val and key2/val2
641        pairs.
642 
643        If key1 hash is equal to the hash of key2, a Collision node
644        will be created.  If they are not equal, a Bitmap node is
645        created.
646     */
647 
648     int32_t key1_hash = hamt_hash(key1);
649     if (key1_hash == -1) {
650         return NULL;
651     }
652 
653     if (key1_hash == key2_hash) {
654         PyHamtNode_Collision *n;
655         n = (PyHamtNode_Collision *)hamt_node_collision_new(key1_hash, 4);
656         if (n == NULL) {
657             return NULL;
658         }
659 
660         Py_INCREF(key1);
661         n->c_array[0] = key1;
662         Py_INCREF(val1);
663         n->c_array[1] = val1;
664 
665         Py_INCREF(key2);
666         n->c_array[2] = key2;
667         Py_INCREF(val2);
668         n->c_array[3] = val2;
669 
670         return (PyHamtNode *)n;
671     }
672     else {
673         int added_leaf = 0;
674         PyHamtNode *n = hamt_node_bitmap_new(0);
675         if (n == NULL) {
676             return NULL;
677         }
678 
679         PyHamtNode *n2 = hamt_node_assoc(
680             n, shift, key1_hash, key1, val1, &added_leaf);
681         Py_DECREF(n);
682         if (n2 == NULL) {
683             return NULL;
684         }
685 
686         n = hamt_node_assoc(n2, shift, key2_hash, key2, val2, &added_leaf);
687         Py_DECREF(n2);
688         if (n == NULL) {
689             return NULL;
690         }
691 
692         return n;
693     }
694 }
695 
696 static PyHamtNode *
hamt_node_bitmap_assoc(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)697 hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self,
698                        uint32_t shift, int32_t hash,
699                        PyObject *key, PyObject *val, int* added_leaf)
700 {
701     /* assoc operation for bitmap nodes.
702 
703        Return: a new node, or self if key/val already is in the
704        collection.
705 
706        'added_leaf' is later used in '_PyHamt_Assoc' to determine if
707        `hamt.set(key, val)` increased the size of the collection.
708     */
709 
710     uint32_t bit = hamt_bitpos(hash, shift);
711     uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
712 
713     /* Bitmap node layout:
714 
715     +------+------+------+------+  ---  +------+------+
716     | key1 | val1 | key2 | val2 |  ...  | keyN | valN |
717     +------+------+------+------+  ---  +------+------+
718     where `N < Py_SIZE(node)`.
719 
720     The `node->b_bitmap` field is a bitmap.  For a given
721     `(shift, hash)` pair we can determine:
722 
723      - If this node has the corresponding key/val slots.
724      - The index of key/val slots.
725     */
726 
727     if (self->b_bitmap & bit) {
728         /* The key is set in this node */
729 
730         uint32_t key_idx = 2 * idx;
731         uint32_t val_idx = key_idx + 1;
732 
733         assert(val_idx < (size_t)Py_SIZE(self));
734 
735         PyObject *key_or_null = self->b_array[key_idx];
736         PyObject *val_or_node = self->b_array[val_idx];
737 
738         if (key_or_null == NULL) {
739             /* key is NULL.  This means that we have a few keys
740                that have the same (hash, shift) pair. */
741 
742             assert(val_or_node != NULL);
743 
744             PyHamtNode *sub_node = hamt_node_assoc(
745                 (PyHamtNode *)val_or_node,
746                 shift + 5, hash, key, val, added_leaf);
747             if (sub_node == NULL) {
748                 return NULL;
749             }
750 
751             if (val_or_node == (PyObject *)sub_node) {
752                 Py_DECREF(sub_node);
753                 Py_INCREF(self);
754                 return (PyHamtNode *)self;
755             }
756 
757             PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
758             if (ret == NULL) {
759                 return NULL;
760             }
761             Py_SETREF(ret->b_array[val_idx], (PyObject*)sub_node);
762             return (PyHamtNode *)ret;
763         }
764 
765         assert(key != NULL);
766         /* key is not NULL.  This means that we have only one other
767            key in this collection that matches our hash for this shift. */
768 
769         int comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
770         if (comp_err < 0) {  /* exception in __eq__ */
771             return NULL;
772         }
773         if (comp_err == 1) {  /* key == key_or_null */
774             if (val == val_or_node) {
775                 /* we already have the same key/val pair; return self. */
776                 Py_INCREF(self);
777                 return (PyHamtNode *)self;
778             }
779 
780             /* We're setting a new value for the key we had before.
781                Make a new bitmap node with a replaced value, and return it. */
782             PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
783             if (ret == NULL) {
784                 return NULL;
785             }
786             Py_INCREF(val);
787             Py_SETREF(ret->b_array[val_idx], val);
788             return (PyHamtNode *)ret;
789         }
790 
791         /* It's a new key, and it has the same index as *one* another key.
792            We have a collision.  We need to create a new node which will
793            combine the existing key and the key we're adding.
794 
795            `hamt_node_new_bitmap_or_collision` will either create a new
796            Collision node if the keys have identical hashes, or
797            a new Bitmap node.
798         */
799         PyHamtNode *sub_node = hamt_node_new_bitmap_or_collision(
800             shift + 5,
801             key_or_null, val_or_node,  /* existing key/val */
802             hash,
803             key, val  /* new key/val */
804         );
805         if (sub_node == NULL) {
806             return NULL;
807         }
808 
809         PyHamtNode_Bitmap *ret = hamt_node_bitmap_clone(self);
810         if (ret == NULL) {
811             Py_DECREF(sub_node);
812             return NULL;
813         }
814         Py_SETREF(ret->b_array[key_idx], NULL);
815         Py_SETREF(ret->b_array[val_idx], (PyObject *)sub_node);
816 
817         *added_leaf = 1;
818         return (PyHamtNode *)ret;
819     }
820     else {
821         /* There was no key before with the same (shift,hash). */
822 
823         uint32_t n = hamt_bitcount(self->b_bitmap);
824 
825         if (n >= 16) {
826             /* When we have a situation where we want to store more
827                than 16 nodes at one level of the tree, we no longer
828                want to use the Bitmap node with bitmap encoding.
829 
830                Instead we start using an Array node, which has
831                simpler (faster) implementation at the expense of
832                having preallocated 32 pointers for its keys/values
833                pairs.
834 
835                Small hamt objects (<30 keys) usually don't have any
836                Array nodes at all.  Between ~30 and ~400 keys hamt
837                objects usually have one Array node, and usually it's
838                a root node.
839             */
840 
841             uint32_t jdx = hamt_mask(hash, shift);
842             /* 'jdx' is the index of where the new key should be added
843                in the new Array node we're about to create. */
844 
845             PyHamtNode *empty = NULL;
846             PyHamtNode_Array *new_node = NULL;
847             PyHamtNode *res = NULL;
848 
849             /* Create a new Array node. */
850             new_node = (PyHamtNode_Array *)hamt_node_array_new(n + 1);
851             if (new_node == NULL) {
852                 goto fin;
853             }
854 
855             /* Create an empty bitmap node for the next
856                hamt_node_assoc call. */
857             empty = hamt_node_bitmap_new(0);
858             if (empty == NULL) {
859                 goto fin;
860             }
861 
862             /* Make a new bitmap node for the key/val we're adding.
863                Set that bitmap node to new-array-node[jdx]. */
864             new_node->a_array[jdx] = hamt_node_assoc(
865                 empty, shift + 5, hash, key, val, added_leaf);
866             if (new_node->a_array[jdx] == NULL) {
867                 goto fin;
868             }
869 
870             /* Copy existing key/value pairs from the current Bitmap
871                node to the new Array node we've just created. */
872             Py_ssize_t i, j;
873             for (i = 0, j = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
874                 if (((self->b_bitmap >> i) & 1) != 0) {
875                     /* Ensure we don't accidentally override `jdx` element
876                        we set few lines above.
877                     */
878                     assert(new_node->a_array[i] == NULL);
879 
880                     if (self->b_array[j] == NULL) {
881                         new_node->a_array[i] =
882                             (PyHamtNode *)self->b_array[j + 1];
883                         Py_INCREF(new_node->a_array[i]);
884                     }
885                     else {
886                         int32_t rehash = hamt_hash(self->b_array[j]);
887                         if (rehash == -1) {
888                             goto fin;
889                         }
890 
891                         new_node->a_array[i] = hamt_node_assoc(
892                             empty, shift + 5,
893                             rehash,
894                             self->b_array[j],
895                             self->b_array[j + 1],
896                             added_leaf);
897 
898                         if (new_node->a_array[i] == NULL) {
899                             goto fin;
900                         }
901                     }
902                     j += 2;
903                 }
904             }
905 
906             VALIDATE_ARRAY_NODE(new_node)
907 
908             /* That's it! */
909             res = (PyHamtNode *)new_node;
910 
911         fin:
912             Py_XDECREF(empty);
913             if (res == NULL) {
914                 Py_XDECREF(new_node);
915             }
916             return res;
917         }
918         else {
919             /* We have less than 16 keys at this level; let's just
920                create a new bitmap node out of this node with the
921                new key/val pair added. */
922 
923             uint32_t key_idx = 2 * idx;
924             uint32_t val_idx = key_idx + 1;
925             uint32_t i;
926 
927             *added_leaf = 1;
928 
929             /* Allocate new Bitmap node which can have one more key/val
930                pair in addition to what we have already. */
931             PyHamtNode_Bitmap *new_node =
932                 (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2 * (n + 1));
933             if (new_node == NULL) {
934                 return NULL;
935             }
936 
937             /* Copy all keys/values that will be before the new key/value
938                we are adding. */
939             for (i = 0; i < key_idx; i++) {
940                 Py_XINCREF(self->b_array[i]);
941                 new_node->b_array[i] = self->b_array[i];
942             }
943 
944             /* Set the new key/value to the new Bitmap node. */
945             Py_INCREF(key);
946             new_node->b_array[key_idx] = key;
947             Py_INCREF(val);
948             new_node->b_array[val_idx] = val;
949 
950             /* Copy all keys/values that will be after the new key/value
951                we are adding. */
952             assert(Py_SIZE(self) >= 0 && Py_SIZE(self) <= 32);
953             for (i = key_idx; i < (uint32_t)Py_SIZE(self); i++) {
954                 Py_XINCREF(self->b_array[i]);
955                 new_node->b_array[i + 2] = self->b_array[i];
956             }
957 
958             new_node->b_bitmap = self->b_bitmap | bit;
959             return (PyHamtNode *)new_node;
960         }
961     }
962 }
963 
964 static hamt_without_t
hamt_node_bitmap_without(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)965 hamt_node_bitmap_without(PyHamtNode_Bitmap *self,
966                          uint32_t shift, int32_t hash,
967                          PyObject *key,
968                          PyHamtNode **new_node)
969 {
970     uint32_t bit = hamt_bitpos(hash, shift);
971     if ((self->b_bitmap & bit) == 0) {
972         return W_NOT_FOUND;
973     }
974 
975     uint32_t idx = hamt_bitindex(self->b_bitmap, bit);
976 
977     uint32_t key_idx = 2 * idx;
978     uint32_t val_idx = key_idx + 1;
979 
980     PyObject *key_or_null = self->b_array[key_idx];
981     PyObject *val_or_node = self->b_array[val_idx];
982 
983     if (key_or_null == NULL) {
984         /* key == NULL means that 'value' is another tree node. */
985 
986         PyHamtNode *sub_node = NULL;
987 
988         hamt_without_t res = hamt_node_without(
989             (PyHamtNode *)val_or_node,
990             shift + 5, hash, key, &sub_node);
991 
992         switch (res) {
993             case W_EMPTY:
994                 /* It's impossible for us to receive a W_EMPTY here:
995 
996                     - Array nodes are converted to Bitmap nodes when
997                       we delete 16th item from them;
998 
999                     - Collision nodes are converted to Bitmap when
1000                       there is one item in them;
1001 
1002                     - Bitmap node's without() inlines single-item
1003                       sub-nodes.
1004 
1005                    So in no situation we can have a single-item
1006                    Bitmap child of another Bitmap node.
1007                 */
1008                 Py_UNREACHABLE();
1009 
1010             case W_NEWNODE: {
1011                 assert(sub_node != NULL);
1012 
1013                 if (IS_BITMAP_NODE(sub_node)) {
1014                     PyHamtNode_Bitmap *sub_tree = (PyHamtNode_Bitmap *)sub_node;
1015                     if (hamt_node_bitmap_count(sub_tree) == 1 &&
1016                             sub_tree->b_array[0] != NULL)
1017                     {
1018                         /* A bitmap node with one key/value pair.  Just
1019                            merge it into this node.
1020 
1021                            Note that we don't inline Bitmap nodes that
1022                            have a NULL key -- those nodes point to another
1023                            tree level, and we cannot simply move tree levels
1024                            up or down.
1025                         */
1026 
1027                         PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1028                         if (clone == NULL) {
1029                             Py_DECREF(sub_node);
1030                             return W_ERROR;
1031                         }
1032 
1033                         PyObject *key = sub_tree->b_array[0];
1034                         PyObject *val = sub_tree->b_array[1];
1035 
1036                         Py_INCREF(key);
1037                         Py_XSETREF(clone->b_array[key_idx], key);
1038                         Py_INCREF(val);
1039                         Py_SETREF(clone->b_array[val_idx], val);
1040 
1041                         Py_DECREF(sub_tree);
1042 
1043                         *new_node = (PyHamtNode *)clone;
1044                         return W_NEWNODE;
1045                     }
1046                 }
1047 
1048 #ifdef Py_DEBUG
1049                 /* Ensure that Collision.without implementation
1050                    converts to Bitmap nodes itself.
1051                 */
1052                 if (IS_COLLISION_NODE(sub_node)) {
1053                     assert(hamt_node_collision_count(
1054                             (PyHamtNode_Collision*)sub_node) > 1);
1055                 }
1056 #endif
1057 
1058                 PyHamtNode_Bitmap *clone = hamt_node_bitmap_clone(self);
1059                 if (clone == NULL) {
1060                     return W_ERROR;
1061                 }
1062 
1063                 Py_SETREF(clone->b_array[val_idx],
1064                           (PyObject *)sub_node);  /* borrow */
1065 
1066                 *new_node = (PyHamtNode *)clone;
1067                 return W_NEWNODE;
1068             }
1069 
1070             case W_ERROR:
1071             case W_NOT_FOUND:
1072                 assert(sub_node == NULL);
1073                 return res;
1074 
1075             default:
1076                 Py_UNREACHABLE();
1077         }
1078     }
1079     else {
1080         /* We have a regular key/value pair */
1081 
1082         int cmp = PyObject_RichCompareBool(key_or_null, key, Py_EQ);
1083         if (cmp < 0) {
1084             return W_ERROR;
1085         }
1086         if (cmp == 0) {
1087             return W_NOT_FOUND;
1088         }
1089 
1090         if (hamt_node_bitmap_count(self) == 1) {
1091             return W_EMPTY;
1092         }
1093 
1094         *new_node = (PyHamtNode *)
1095             hamt_node_bitmap_clone_without(self, bit);
1096         if (*new_node == NULL) {
1097             return W_ERROR;
1098         }
1099 
1100         return W_NEWNODE;
1101     }
1102 }
1103 
1104 static hamt_find_t
hamt_node_bitmap_find(PyHamtNode_Bitmap * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1105 hamt_node_bitmap_find(PyHamtNode_Bitmap *self,
1106                       uint32_t shift, int32_t hash,
1107                       PyObject *key, PyObject **val)
1108 {
1109     /* Lookup a key in a Bitmap node. */
1110 
1111     uint32_t bit = hamt_bitpos(hash, shift);
1112     uint32_t idx;
1113     uint32_t key_idx;
1114     uint32_t val_idx;
1115     PyObject *key_or_null;
1116     PyObject *val_or_node;
1117     int comp_err;
1118 
1119     if ((self->b_bitmap & bit) == 0) {
1120         return F_NOT_FOUND;
1121     }
1122 
1123     idx = hamt_bitindex(self->b_bitmap, bit);
1124     key_idx = idx * 2;
1125     val_idx = key_idx + 1;
1126 
1127     assert(val_idx < (size_t)Py_SIZE(self));
1128 
1129     key_or_null = self->b_array[key_idx];
1130     val_or_node = self->b_array[val_idx];
1131 
1132     if (key_or_null == NULL) {
1133         /* There are a few keys that have the same hash at the current shift
1134            that match our key.  Dispatch the lookup further down the tree. */
1135         assert(val_or_node != NULL);
1136         return hamt_node_find((PyHamtNode *)val_or_node,
1137                               shift + 5, hash, key, val);
1138     }
1139 
1140     /* We have only one key -- a potential match.  Let's compare if the
1141        key we are looking at is equal to the key we are looking for. */
1142     assert(key != NULL);
1143     comp_err = PyObject_RichCompareBool(key, key_or_null, Py_EQ);
1144     if (comp_err < 0) {  /* exception in __eq__ */
1145         return F_ERROR;
1146     }
1147     if (comp_err == 1) {  /* key == key_or_null */
1148         *val = val_or_node;
1149         return F_FOUND;
1150     }
1151 
1152     return F_NOT_FOUND;
1153 }
1154 
1155 static int
hamt_node_bitmap_traverse(PyHamtNode_Bitmap * self,visitproc visit,void * arg)1156 hamt_node_bitmap_traverse(PyHamtNode_Bitmap *self, visitproc visit, void *arg)
1157 {
1158     /* Bitmap's tp_traverse */
1159 
1160     Py_ssize_t i;
1161 
1162     for (i = Py_SIZE(self); --i >= 0; ) {
1163         Py_VISIT(self->b_array[i]);
1164     }
1165 
1166     return 0;
1167 }
1168 
1169 static void
hamt_node_bitmap_dealloc(PyHamtNode_Bitmap * self)1170 hamt_node_bitmap_dealloc(PyHamtNode_Bitmap *self)
1171 {
1172     /* Bitmap's tp_dealloc */
1173 
1174     Py_ssize_t len = Py_SIZE(self);
1175     Py_ssize_t i;
1176 
1177     PyObject_GC_UnTrack(self);
1178     Py_TRASHCAN_BEGIN(self, hamt_node_bitmap_dealloc)
1179 
1180     if (len > 0) {
1181         i = len;
1182         while (--i >= 0) {
1183             Py_XDECREF(self->b_array[i]);
1184         }
1185     }
1186 
1187     Py_TYPE(self)->tp_free((PyObject *)self);
1188     Py_TRASHCAN_END
1189 }
1190 
1191 #ifdef Py_DEBUG
1192 static int
hamt_node_bitmap_dump(PyHamtNode_Bitmap * node,_PyUnicodeWriter * writer,int level)1193 hamt_node_bitmap_dump(PyHamtNode_Bitmap *node,
1194                       _PyUnicodeWriter *writer, int level)
1195 {
1196     /* Debug build: __dump__() method implementation for Bitmap nodes. */
1197 
1198     Py_ssize_t i;
1199     PyObject *tmp1;
1200     PyObject *tmp2;
1201 
1202     if (_hamt_dump_ident(writer, level + 1)) {
1203         goto error;
1204     }
1205 
1206     if (_hamt_dump_format(writer, "BitmapNode(size=%zd count=%zd ",
1207                           Py_SIZE(node), Py_SIZE(node) / 2))
1208     {
1209         goto error;
1210     }
1211 
1212     tmp1 = PyLong_FromUnsignedLong(node->b_bitmap);
1213     if (tmp1 == NULL) {
1214         goto error;
1215     }
1216     tmp2 = _PyLong_Format(tmp1, 2);
1217     Py_DECREF(tmp1);
1218     if (tmp2 == NULL) {
1219         goto error;
1220     }
1221     if (_hamt_dump_format(writer, "bitmap=%S id=%p):\n", tmp2, node)) {
1222         Py_DECREF(tmp2);
1223         goto error;
1224     }
1225     Py_DECREF(tmp2);
1226 
1227     for (i = 0; i < Py_SIZE(node); i += 2) {
1228         PyObject *key_or_null = node->b_array[i];
1229         PyObject *val_or_node = node->b_array[i + 1];
1230 
1231         if (_hamt_dump_ident(writer, level + 2)) {
1232             goto error;
1233         }
1234 
1235         if (key_or_null == NULL) {
1236             if (_hamt_dump_format(writer, "NULL:\n")) {
1237                 goto error;
1238             }
1239 
1240             if (hamt_node_dump((PyHamtNode *)val_or_node,
1241                                writer, level + 2))
1242             {
1243                 goto error;
1244             }
1245         }
1246         else {
1247             if (_hamt_dump_format(writer, "%R: %R", key_or_null,
1248                                   val_or_node))
1249             {
1250                 goto error;
1251             }
1252         }
1253 
1254         if (_hamt_dump_format(writer, "\n")) {
1255             goto error;
1256         }
1257     }
1258 
1259     return 0;
1260 error:
1261     return -1;
1262 }
1263 #endif  /* Py_DEBUG */
1264 
1265 
1266 /////////////////////////////////// Collision Node
1267 
1268 
1269 static PyHamtNode *
hamt_node_collision_new(int32_t hash,Py_ssize_t size)1270 hamt_node_collision_new(int32_t hash, Py_ssize_t size)
1271 {
1272     /* Create a new Collision node. */
1273 
1274     PyHamtNode_Collision *node;
1275     Py_ssize_t i;
1276 
1277     assert(size >= 4);
1278     assert(size % 2 == 0);
1279 
1280     node = PyObject_GC_NewVar(
1281         PyHamtNode_Collision, &_PyHamt_CollisionNode_Type, size);
1282     if (node == NULL) {
1283         return NULL;
1284     }
1285 
1286     for (i = 0; i < size; i++) {
1287         node->c_array[i] = NULL;
1288     }
1289 
1290     Py_SET_SIZE(node, size);
1291     node->c_hash = hash;
1292 
1293     _PyObject_GC_TRACK(node);
1294 
1295     return (PyHamtNode *)node;
1296 }
1297 
1298 static hamt_find_t
hamt_node_collision_find_index(PyHamtNode_Collision * self,PyObject * key,Py_ssize_t * idx)1299 hamt_node_collision_find_index(PyHamtNode_Collision *self, PyObject *key,
1300                                Py_ssize_t *idx)
1301 {
1302     /* Lookup `key` in the Collision node `self`.  Set the index of the
1303        found key to 'idx'. */
1304 
1305     Py_ssize_t i;
1306     PyObject *el;
1307 
1308     for (i = 0; i < Py_SIZE(self); i += 2) {
1309         el = self->c_array[i];
1310 
1311         assert(el != NULL);
1312         int cmp = PyObject_RichCompareBool(key, el, Py_EQ);
1313         if (cmp < 0) {
1314             return F_ERROR;
1315         }
1316         if (cmp == 1) {
1317             *idx = i;
1318             return F_FOUND;
1319         }
1320     }
1321 
1322     return F_NOT_FOUND;
1323 }
1324 
1325 static PyHamtNode *
hamt_node_collision_assoc(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)1326 hamt_node_collision_assoc(PyHamtNode_Collision *self,
1327                           uint32_t shift, int32_t hash,
1328                           PyObject *key, PyObject *val, int* added_leaf)
1329 {
1330     /* Set a new key to this level (currently a Collision node)
1331        of the tree. */
1332 
1333     if (hash == self->c_hash) {
1334         /* The hash of the 'key' we are adding matches the hash of
1335            other keys in this Collision node. */
1336 
1337         Py_ssize_t key_idx = -1;
1338         hamt_find_t found;
1339         PyHamtNode_Collision *new_node;
1340         Py_ssize_t i;
1341 
1342         /* Let's try to lookup the new 'key', maybe we already have it. */
1343         found = hamt_node_collision_find_index(self, key, &key_idx);
1344         switch (found) {
1345             case F_ERROR:
1346                 /* Exception. */
1347                 return NULL;
1348 
1349             case F_NOT_FOUND:
1350                 /* This is a totally new key.  Clone the current node,
1351                    add a new key/value to the cloned node. */
1352 
1353                 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1354                     self->c_hash, Py_SIZE(self) + 2);
1355                 if (new_node == NULL) {
1356                     return NULL;
1357                 }
1358 
1359                 for (i = 0; i < Py_SIZE(self); i++) {
1360                     Py_INCREF(self->c_array[i]);
1361                     new_node->c_array[i] = self->c_array[i];
1362                 }
1363 
1364                 Py_INCREF(key);
1365                 new_node->c_array[i] = key;
1366                 Py_INCREF(val);
1367                 new_node->c_array[i + 1] = val;
1368 
1369                 *added_leaf = 1;
1370                 return (PyHamtNode *)new_node;
1371 
1372             case F_FOUND:
1373                 /* There's a key which is equal to the key we are adding. */
1374 
1375                 assert(key_idx >= 0);
1376                 assert(key_idx < Py_SIZE(self));
1377                 Py_ssize_t val_idx = key_idx + 1;
1378 
1379                 if (self->c_array[val_idx] == val) {
1380                     /* We're setting a key/value pair that's already set. */
1381                     Py_INCREF(self);
1382                     return (PyHamtNode *)self;
1383                 }
1384 
1385                 /* We need to replace old value for the key
1386                    with a new value.  Create a new Collision node.*/
1387                 new_node = (PyHamtNode_Collision *)hamt_node_collision_new(
1388                     self->c_hash, Py_SIZE(self));
1389                 if (new_node == NULL) {
1390                     return NULL;
1391                 }
1392 
1393                 /* Copy all elements of the old node to the new one. */
1394                 for (i = 0; i < Py_SIZE(self); i++) {
1395                     Py_INCREF(self->c_array[i]);
1396                     new_node->c_array[i] = self->c_array[i];
1397                 }
1398 
1399                 /* Replace the old value with the new value for the our key. */
1400                 Py_DECREF(new_node->c_array[val_idx]);
1401                 Py_INCREF(val);
1402                 new_node->c_array[val_idx] = val;
1403 
1404                 return (PyHamtNode *)new_node;
1405 
1406             default:
1407                 Py_UNREACHABLE();
1408         }
1409     }
1410     else {
1411         /* The hash of the new key is different from the hash that
1412            all keys of this Collision node have.
1413 
1414            Create a Bitmap node inplace with two children:
1415            key/value pair that we're adding, and the Collision node
1416            we're replacing on this tree level.
1417         */
1418 
1419         PyHamtNode_Bitmap *new_node;
1420         PyHamtNode *assoc_res;
1421 
1422         new_node = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(2);
1423         if (new_node == NULL) {
1424             return NULL;
1425         }
1426         new_node->b_bitmap = hamt_bitpos(self->c_hash, shift);
1427         Py_INCREF(self);
1428         new_node->b_array[1] = (PyObject*) self;
1429 
1430         assoc_res = hamt_node_bitmap_assoc(
1431             new_node, shift, hash, key, val, added_leaf);
1432         Py_DECREF(new_node);
1433         return assoc_res;
1434     }
1435 }
1436 
1437 static inline Py_ssize_t
hamt_node_collision_count(PyHamtNode_Collision * node)1438 hamt_node_collision_count(PyHamtNode_Collision *node)
1439 {
1440     return Py_SIZE(node) / 2;
1441 }
1442 
1443 static hamt_without_t
hamt_node_collision_without(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)1444 hamt_node_collision_without(PyHamtNode_Collision *self,
1445                             uint32_t shift, int32_t hash,
1446                             PyObject *key,
1447                             PyHamtNode **new_node)
1448 {
1449     if (hash != self->c_hash) {
1450         return W_NOT_FOUND;
1451     }
1452 
1453     Py_ssize_t key_idx = -1;
1454     hamt_find_t found = hamt_node_collision_find_index(self, key, &key_idx);
1455 
1456     switch (found) {
1457         case F_ERROR:
1458             return W_ERROR;
1459 
1460         case F_NOT_FOUND:
1461             return W_NOT_FOUND;
1462 
1463         case F_FOUND:
1464             assert(key_idx >= 0);
1465             assert(key_idx < Py_SIZE(self));
1466 
1467             Py_ssize_t new_count = hamt_node_collision_count(self) - 1;
1468 
1469             if (new_count == 0) {
1470                 /* The node has only one key/value pair and it's for the
1471                    key we're trying to delete.  So a new node will be empty
1472                    after the removal.
1473                 */
1474                 return W_EMPTY;
1475             }
1476 
1477             if (new_count == 1) {
1478                 /* The node has two keys, and after deletion the
1479                    new Collision node would have one.  Collision nodes
1480                    with one key shouldn't exist, so convert it to a
1481                    Bitmap node.
1482                 */
1483                 PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)
1484                     hamt_node_bitmap_new(2);
1485                 if (node == NULL) {
1486                     return W_ERROR;
1487                 }
1488 
1489                 if (key_idx == 0) {
1490                     Py_INCREF(self->c_array[2]);
1491                     node->b_array[0] = self->c_array[2];
1492                     Py_INCREF(self->c_array[3]);
1493                     node->b_array[1] = self->c_array[3];
1494                 }
1495                 else {
1496                     assert(key_idx == 2);
1497                     Py_INCREF(self->c_array[0]);
1498                     node->b_array[0] = self->c_array[0];
1499                     Py_INCREF(self->c_array[1]);
1500                     node->b_array[1] = self->c_array[1];
1501                 }
1502 
1503                 node->b_bitmap = hamt_bitpos(hash, shift);
1504 
1505                 *new_node = (PyHamtNode *)node;
1506                 return W_NEWNODE;
1507             }
1508 
1509             /* Allocate a new Collision node with capacity for one
1510                less key/value pair */
1511             PyHamtNode_Collision *new = (PyHamtNode_Collision *)
1512                 hamt_node_collision_new(
1513                     self->c_hash, Py_SIZE(self) - 2);
1514             if (new == NULL) {
1515                 return W_ERROR;
1516             }
1517 
1518             /* Copy all other keys from `self` to `new` */
1519             Py_ssize_t i;
1520             for (i = 0; i < key_idx; i++) {
1521                 Py_INCREF(self->c_array[i]);
1522                 new->c_array[i] = self->c_array[i];
1523             }
1524             for (i = key_idx + 2; i < Py_SIZE(self); i++) {
1525                 Py_INCREF(self->c_array[i]);
1526                 new->c_array[i - 2] = self->c_array[i];
1527             }
1528 
1529             *new_node = (PyHamtNode*)new;
1530             return W_NEWNODE;
1531 
1532         default:
1533             Py_UNREACHABLE();
1534     }
1535 }
1536 
1537 static hamt_find_t
hamt_node_collision_find(PyHamtNode_Collision * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1538 hamt_node_collision_find(PyHamtNode_Collision *self,
1539                          uint32_t shift, int32_t hash,
1540                          PyObject *key, PyObject **val)
1541 {
1542     /* Lookup `key` in the Collision node `self`.  Set the value
1543        for the found key to 'val'. */
1544 
1545     Py_ssize_t idx = -1;
1546     hamt_find_t res;
1547 
1548     res = hamt_node_collision_find_index(self, key, &idx);
1549     if (res == F_ERROR || res == F_NOT_FOUND) {
1550         return res;
1551     }
1552 
1553     assert(idx >= 0);
1554     assert(idx + 1 < Py_SIZE(self));
1555 
1556     *val = self->c_array[idx + 1];
1557     assert(*val != NULL);
1558 
1559     return F_FOUND;
1560 }
1561 
1562 
1563 static int
hamt_node_collision_traverse(PyHamtNode_Collision * self,visitproc visit,void * arg)1564 hamt_node_collision_traverse(PyHamtNode_Collision *self,
1565                              visitproc visit, void *arg)
1566 {
1567     /* Collision's tp_traverse */
1568 
1569     Py_ssize_t i;
1570 
1571     for (i = Py_SIZE(self); --i >= 0; ) {
1572         Py_VISIT(self->c_array[i]);
1573     }
1574 
1575     return 0;
1576 }
1577 
1578 static void
hamt_node_collision_dealloc(PyHamtNode_Collision * self)1579 hamt_node_collision_dealloc(PyHamtNode_Collision *self)
1580 {
1581     /* Collision's tp_dealloc */
1582 
1583     Py_ssize_t len = Py_SIZE(self);
1584 
1585     PyObject_GC_UnTrack(self);
1586     Py_TRASHCAN_BEGIN(self, hamt_node_collision_dealloc)
1587 
1588     if (len > 0) {
1589 
1590         while (--len >= 0) {
1591             Py_XDECREF(self->c_array[len]);
1592         }
1593     }
1594 
1595     Py_TYPE(self)->tp_free((PyObject *)self);
1596     Py_TRASHCAN_END
1597 }
1598 
1599 #ifdef Py_DEBUG
1600 static int
hamt_node_collision_dump(PyHamtNode_Collision * node,_PyUnicodeWriter * writer,int level)1601 hamt_node_collision_dump(PyHamtNode_Collision *node,
1602                          _PyUnicodeWriter *writer, int level)
1603 {
1604     /* Debug build: __dump__() method implementation for Collision nodes. */
1605 
1606     Py_ssize_t i;
1607 
1608     if (_hamt_dump_ident(writer, level + 1)) {
1609         goto error;
1610     }
1611 
1612     if (_hamt_dump_format(writer, "CollisionNode(size=%zd id=%p):\n",
1613                           Py_SIZE(node), node))
1614     {
1615         goto error;
1616     }
1617 
1618     for (i = 0; i < Py_SIZE(node); i += 2) {
1619         PyObject *key = node->c_array[i];
1620         PyObject *val = node->c_array[i + 1];
1621 
1622         if (_hamt_dump_ident(writer, level + 2)) {
1623             goto error;
1624         }
1625 
1626         if (_hamt_dump_format(writer, "%R: %R\n", key, val)) {
1627             goto error;
1628         }
1629     }
1630 
1631     return 0;
1632 error:
1633     return -1;
1634 }
1635 #endif  /* Py_DEBUG */
1636 
1637 
1638 /////////////////////////////////// Array Node
1639 
1640 
1641 static PyHamtNode *
hamt_node_array_new(Py_ssize_t count)1642 hamt_node_array_new(Py_ssize_t count)
1643 {
1644     Py_ssize_t i;
1645 
1646     PyHamtNode_Array *node = PyObject_GC_New(
1647         PyHamtNode_Array, &_PyHamt_ArrayNode_Type);
1648     if (node == NULL) {
1649         return NULL;
1650     }
1651 
1652     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1653         node->a_array[i] = NULL;
1654     }
1655 
1656     node->a_count = count;
1657 
1658     _PyObject_GC_TRACK(node);
1659     return (PyHamtNode *)node;
1660 }
1661 
1662 static PyHamtNode_Array *
hamt_node_array_clone(PyHamtNode_Array * node)1663 hamt_node_array_clone(PyHamtNode_Array *node)
1664 {
1665     PyHamtNode_Array *clone;
1666     Py_ssize_t i;
1667 
1668     VALIDATE_ARRAY_NODE(node)
1669 
1670     /* Create a new Array node. */
1671     clone = (PyHamtNode_Array *)hamt_node_array_new(node->a_count);
1672     if (clone == NULL) {
1673         return NULL;
1674     }
1675 
1676     /* Copy all elements from the current Array node to the new one. */
1677     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1678         Py_XINCREF(node->a_array[i]);
1679         clone->a_array[i] = node->a_array[i];
1680     }
1681 
1682     VALIDATE_ARRAY_NODE(clone)
1683     return clone;
1684 }
1685 
1686 static PyHamtNode *
hamt_node_array_assoc(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)1687 hamt_node_array_assoc(PyHamtNode_Array *self,
1688                       uint32_t shift, int32_t hash,
1689                       PyObject *key, PyObject *val, int* added_leaf)
1690 {
1691     /* Set a new key to this level (currently a Collision node)
1692        of the tree.
1693 
1694        Array nodes don't store values, they can only point to
1695        other nodes.  They are simple arrays of 32 BaseNode pointers/
1696      */
1697 
1698     uint32_t idx = hamt_mask(hash, shift);
1699     PyHamtNode *node = self->a_array[idx];
1700     PyHamtNode *child_node;
1701     PyHamtNode_Array *new_node;
1702     Py_ssize_t i;
1703 
1704     if (node == NULL) {
1705         /* There's no child node for the given hash.  Create a new
1706            Bitmap node for this key. */
1707 
1708         PyHamtNode_Bitmap *empty = NULL;
1709 
1710         /* Get an empty Bitmap node to work with. */
1711         empty = (PyHamtNode_Bitmap *)hamt_node_bitmap_new(0);
1712         if (empty == NULL) {
1713             return NULL;
1714         }
1715 
1716         /* Set key/val to the newly created empty Bitmap, thus
1717            creating a new Bitmap node with our key/value pair. */
1718         child_node = hamt_node_bitmap_assoc(
1719             empty,
1720             shift + 5, hash, key, val, added_leaf);
1721         Py_DECREF(empty);
1722         if (child_node == NULL) {
1723             return NULL;
1724         }
1725 
1726         /* Create a new Array node. */
1727         new_node = (PyHamtNode_Array *)hamt_node_array_new(self->a_count + 1);
1728         if (new_node == NULL) {
1729             Py_DECREF(child_node);
1730             return NULL;
1731         }
1732 
1733         /* Copy all elements from the current Array node to the
1734            new one. */
1735         for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1736             Py_XINCREF(self->a_array[i]);
1737             new_node->a_array[i] = self->a_array[i];
1738         }
1739 
1740         assert(new_node->a_array[idx] == NULL);
1741         new_node->a_array[idx] = child_node;  /* borrow */
1742         VALIDATE_ARRAY_NODE(new_node)
1743     }
1744     else {
1745         /* There's a child node for the given hash.
1746            Set the key to it./ */
1747         child_node = hamt_node_assoc(
1748             node, shift + 5, hash, key, val, added_leaf);
1749         if (child_node == NULL) {
1750             return NULL;
1751         }
1752         else if (child_node == (PyHamtNode *)self) {
1753             Py_DECREF(child_node);
1754             return (PyHamtNode *)self;
1755         }
1756 
1757         new_node = hamt_node_array_clone(self);
1758         if (new_node == NULL) {
1759             Py_DECREF(child_node);
1760             return NULL;
1761         }
1762 
1763         Py_SETREF(new_node->a_array[idx], child_node);  /* borrow */
1764         VALIDATE_ARRAY_NODE(new_node)
1765     }
1766 
1767     return (PyHamtNode *)new_node;
1768 }
1769 
1770 static hamt_without_t
hamt_node_array_without(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)1771 hamt_node_array_without(PyHamtNode_Array *self,
1772                         uint32_t shift, int32_t hash,
1773                         PyObject *key,
1774                         PyHamtNode **new_node)
1775 {
1776     uint32_t idx = hamt_mask(hash, shift);
1777     PyHamtNode *node = self->a_array[idx];
1778 
1779     if (node == NULL) {
1780         return W_NOT_FOUND;
1781     }
1782 
1783     PyHamtNode *sub_node = NULL;
1784     hamt_without_t res = hamt_node_without(
1785         (PyHamtNode *)node,
1786         shift + 5, hash, key, &sub_node);
1787 
1788     switch (res) {
1789         case W_NOT_FOUND:
1790         case W_ERROR:
1791             assert(sub_node == NULL);
1792             return res;
1793 
1794         case W_NEWNODE: {
1795             /* We need to replace a node at the `idx` index.
1796                Clone this node and replace.
1797             */
1798             assert(sub_node != NULL);
1799 
1800             PyHamtNode_Array *clone = hamt_node_array_clone(self);
1801             if (clone == NULL) {
1802                 Py_DECREF(sub_node);
1803                 return W_ERROR;
1804             }
1805 
1806             Py_SETREF(clone->a_array[idx], sub_node);  /* borrow */
1807             *new_node = (PyHamtNode*)clone;  /* borrow */
1808             return W_NEWNODE;
1809         }
1810 
1811         case W_EMPTY: {
1812             assert(sub_node == NULL);
1813             /* We need to remove a node at the `idx` index.
1814                Calculate the size of the replacement Array node.
1815             */
1816             Py_ssize_t new_count = self->a_count - 1;
1817 
1818             if (new_count == 0) {
1819                 return W_EMPTY;
1820             }
1821 
1822             if (new_count >= 16) {
1823                 /* We convert Bitmap nodes to Array nodes, when a
1824                    Bitmap node needs to store more than 15 key/value
1825                    pairs.  So we will create a new Array node if we
1826                    the number of key/values after deletion is still
1827                    greater than 15.
1828                 */
1829 
1830                 PyHamtNode_Array *new = hamt_node_array_clone(self);
1831                 if (new == NULL) {
1832                     return W_ERROR;
1833                 }
1834                 new->a_count = new_count;
1835                 Py_CLEAR(new->a_array[idx]);
1836 
1837                 *new_node = (PyHamtNode*)new;  /* borrow */
1838                 return W_NEWNODE;
1839             }
1840 
1841             /* New Array node would have less than 16 key/value
1842                pairs.  We need to create a replacement Bitmap node. */
1843 
1844             Py_ssize_t bitmap_size = new_count * 2;
1845             uint32_t bitmap = 0;
1846 
1847             PyHamtNode_Bitmap *new = (PyHamtNode_Bitmap *)
1848                 hamt_node_bitmap_new(bitmap_size);
1849             if (new == NULL) {
1850                 return W_ERROR;
1851             }
1852 
1853             Py_ssize_t new_i = 0;
1854             for (uint32_t i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1855                 if (i == idx) {
1856                     /* Skip the node we are deleting. */
1857                     continue;
1858                 }
1859 
1860                 PyHamtNode *node = self->a_array[i];
1861                 if (node == NULL) {
1862                     /* Skip any missing nodes. */
1863                     continue;
1864                 }
1865 
1866                 bitmap |= 1U << i;
1867 
1868                 if (IS_BITMAP_NODE(node)) {
1869                     PyHamtNode_Bitmap *child = (PyHamtNode_Bitmap *)node;
1870 
1871                     if (hamt_node_bitmap_count(child) == 1 &&
1872                             child->b_array[0] != NULL)
1873                     {
1874                         /* node is a Bitmap with one key/value pair, just
1875                            merge it into the new Bitmap node we're building.
1876 
1877                            Note that we don't inline Bitmap nodes that
1878                            have a NULL key -- those nodes point to another
1879                            tree level, and we cannot simply move tree levels
1880                            up or down.
1881                         */
1882                         PyObject *key = child->b_array[0];
1883                         PyObject *val = child->b_array[1];
1884 
1885                         Py_INCREF(key);
1886                         new->b_array[new_i] = key;
1887                         Py_INCREF(val);
1888                         new->b_array[new_i + 1] = val;
1889                     }
1890                     else {
1891                         new->b_array[new_i] = NULL;
1892                         Py_INCREF(node);
1893                         new->b_array[new_i + 1] = (PyObject*)node;
1894                     }
1895                 }
1896                 else {
1897 
1898 #ifdef Py_DEBUG
1899                     if (IS_COLLISION_NODE(node)) {
1900                         Py_ssize_t child_count = hamt_node_collision_count(
1901                             (PyHamtNode_Collision*)node);
1902                         assert(child_count > 1);
1903                     }
1904                     else if (IS_ARRAY_NODE(node)) {
1905                         assert(((PyHamtNode_Array*)node)->a_count >= 16);
1906                     }
1907 #endif
1908 
1909                     /* Just copy the node into our new Bitmap */
1910                     new->b_array[new_i] = NULL;
1911                     Py_INCREF(node);
1912                     new->b_array[new_i + 1] = (PyObject*)node;
1913                 }
1914 
1915                 new_i += 2;
1916             }
1917 
1918             new->b_bitmap = bitmap;
1919             *new_node = (PyHamtNode*)new;  /* borrow */
1920             return W_NEWNODE;
1921         }
1922 
1923         default:
1924             Py_UNREACHABLE();
1925     }
1926 }
1927 
1928 static hamt_find_t
hamt_node_array_find(PyHamtNode_Array * self,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)1929 hamt_node_array_find(PyHamtNode_Array *self,
1930                      uint32_t shift, int32_t hash,
1931                      PyObject *key, PyObject **val)
1932 {
1933     /* Lookup `key` in the Array node `self`.  Set the value
1934        for the found key to 'val'. */
1935 
1936     uint32_t idx = hamt_mask(hash, shift);
1937     PyHamtNode *node;
1938 
1939     node = self->a_array[idx];
1940     if (node == NULL) {
1941         return F_NOT_FOUND;
1942     }
1943 
1944     /* Dispatch to the generic hamt_node_find */
1945     return hamt_node_find(node, shift + 5, hash, key, val);
1946 }
1947 
1948 static int
hamt_node_array_traverse(PyHamtNode_Array * self,visitproc visit,void * arg)1949 hamt_node_array_traverse(PyHamtNode_Array *self,
1950                          visitproc visit, void *arg)
1951 {
1952     /* Array's tp_traverse */
1953 
1954     Py_ssize_t i;
1955 
1956     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1957         Py_VISIT(self->a_array[i]);
1958     }
1959 
1960     return 0;
1961 }
1962 
1963 static void
hamt_node_array_dealloc(PyHamtNode_Array * self)1964 hamt_node_array_dealloc(PyHamtNode_Array *self)
1965 {
1966     /* Array's tp_dealloc */
1967 
1968     Py_ssize_t i;
1969 
1970     PyObject_GC_UnTrack(self);
1971     Py_TRASHCAN_BEGIN(self, hamt_node_array_dealloc)
1972 
1973     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1974         Py_XDECREF(self->a_array[i]);
1975     }
1976 
1977     Py_TYPE(self)->tp_free((PyObject *)self);
1978     Py_TRASHCAN_END
1979 }
1980 
1981 #ifdef Py_DEBUG
1982 static int
hamt_node_array_dump(PyHamtNode_Array * node,_PyUnicodeWriter * writer,int level)1983 hamt_node_array_dump(PyHamtNode_Array *node,
1984                      _PyUnicodeWriter *writer, int level)
1985 {
1986     /* Debug build: __dump__() method implementation for Array nodes. */
1987 
1988     Py_ssize_t i;
1989 
1990     if (_hamt_dump_ident(writer, level + 1)) {
1991         goto error;
1992     }
1993 
1994     if (_hamt_dump_format(writer, "ArrayNode(id=%p):\n", node)) {
1995         goto error;
1996     }
1997 
1998     for (i = 0; i < HAMT_ARRAY_NODE_SIZE; i++) {
1999         if (node->a_array[i] == NULL) {
2000             continue;
2001         }
2002 
2003         if (_hamt_dump_ident(writer, level + 2)) {
2004             goto error;
2005         }
2006 
2007         if (_hamt_dump_format(writer, "%zd::\n", i)) {
2008             goto error;
2009         }
2010 
2011         if (hamt_node_dump(node->a_array[i], writer, level + 1)) {
2012             goto error;
2013         }
2014 
2015         if (_hamt_dump_format(writer, "\n")) {
2016             goto error;
2017         }
2018     }
2019 
2020     return 0;
2021 error:
2022     return -1;
2023 }
2024 #endif  /* Py_DEBUG */
2025 
2026 
2027 /////////////////////////////////// Node Dispatch
2028 
2029 
2030 static PyHamtNode *
hamt_node_assoc(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyObject * val,int * added_leaf)2031 hamt_node_assoc(PyHamtNode *node,
2032                 uint32_t shift, int32_t hash,
2033                 PyObject *key, PyObject *val, int* added_leaf)
2034 {
2035     /* Set key/value to the 'node' starting with the given shift/hash.
2036        Return a new node, or the same node if key/value already
2037        set.
2038 
2039        added_leaf will be set to 1 if key/value wasn't in the
2040        tree before.
2041 
2042        This method automatically dispatches to the suitable
2043        hamt_node_{nodetype}_assoc method.
2044     */
2045 
2046     if (IS_BITMAP_NODE(node)) {
2047         return hamt_node_bitmap_assoc(
2048             (PyHamtNode_Bitmap *)node,
2049             shift, hash, key, val, added_leaf);
2050     }
2051     else if (IS_ARRAY_NODE(node)) {
2052         return hamt_node_array_assoc(
2053             (PyHamtNode_Array *)node,
2054             shift, hash, key, val, added_leaf);
2055     }
2056     else {
2057         assert(IS_COLLISION_NODE(node));
2058         return hamt_node_collision_assoc(
2059             (PyHamtNode_Collision *)node,
2060             shift, hash, key, val, added_leaf);
2061     }
2062 }
2063 
2064 static hamt_without_t
hamt_node_without(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyHamtNode ** new_node)2065 hamt_node_without(PyHamtNode *node,
2066                   uint32_t shift, int32_t hash,
2067                   PyObject *key,
2068                   PyHamtNode **new_node)
2069 {
2070     if (IS_BITMAP_NODE(node)) {
2071         return hamt_node_bitmap_without(
2072             (PyHamtNode_Bitmap *)node,
2073             shift, hash, key,
2074             new_node);
2075     }
2076     else if (IS_ARRAY_NODE(node)) {
2077         return hamt_node_array_without(
2078             (PyHamtNode_Array *)node,
2079             shift, hash, key,
2080             new_node);
2081     }
2082     else {
2083         assert(IS_COLLISION_NODE(node));
2084         return hamt_node_collision_without(
2085             (PyHamtNode_Collision *)node,
2086             shift, hash, key,
2087             new_node);
2088     }
2089 }
2090 
2091 static hamt_find_t
hamt_node_find(PyHamtNode * node,uint32_t shift,int32_t hash,PyObject * key,PyObject ** val)2092 hamt_node_find(PyHamtNode *node,
2093                uint32_t shift, int32_t hash,
2094                PyObject *key, PyObject **val)
2095 {
2096     /* Find the key in the node starting with the given shift/hash.
2097 
2098        If a value is found, the result will be set to F_FOUND, and
2099        *val will point to the found value object.
2100 
2101        If a value wasn't found, the result will be set to F_NOT_FOUND.
2102 
2103        If an exception occurs during the call, the result will be F_ERROR.
2104 
2105        This method automatically dispatches to the suitable
2106        hamt_node_{nodetype}_find method.
2107     */
2108 
2109     if (IS_BITMAP_NODE(node)) {
2110         return hamt_node_bitmap_find(
2111             (PyHamtNode_Bitmap *)node,
2112             shift, hash, key, val);
2113 
2114     }
2115     else if (IS_ARRAY_NODE(node)) {
2116         return hamt_node_array_find(
2117             (PyHamtNode_Array *)node,
2118             shift, hash, key, val);
2119     }
2120     else {
2121         assert(IS_COLLISION_NODE(node));
2122         return hamt_node_collision_find(
2123             (PyHamtNode_Collision *)node,
2124             shift, hash, key, val);
2125     }
2126 }
2127 
2128 #ifdef Py_DEBUG
2129 static int
hamt_node_dump(PyHamtNode * node,_PyUnicodeWriter * writer,int level)2130 hamt_node_dump(PyHamtNode *node,
2131                _PyUnicodeWriter *writer, int level)
2132 {
2133     /* Debug build: __dump__() method implementation for a node.
2134 
2135        This method automatically dispatches to the suitable
2136        hamt_node_{nodetype})_dump method.
2137     */
2138 
2139     if (IS_BITMAP_NODE(node)) {
2140         return hamt_node_bitmap_dump(
2141             (PyHamtNode_Bitmap *)node, writer, level);
2142     }
2143     else if (IS_ARRAY_NODE(node)) {
2144         return hamt_node_array_dump(
2145             (PyHamtNode_Array *)node, writer, level);
2146     }
2147     else {
2148         assert(IS_COLLISION_NODE(node));
2149         return hamt_node_collision_dump(
2150             (PyHamtNode_Collision *)node, writer, level);
2151     }
2152 }
2153 #endif  /* Py_DEBUG */
2154 
2155 
2156 /////////////////////////////////// Iterators: Machinery
2157 
2158 
2159 static hamt_iter_t
2160 hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val);
2161 
2162 
2163 static void
hamt_iterator_init(PyHamtIteratorState * iter,PyHamtNode * root)2164 hamt_iterator_init(PyHamtIteratorState *iter, PyHamtNode *root)
2165 {
2166     for (uint32_t i = 0; i < _Py_HAMT_MAX_TREE_DEPTH; i++) {
2167         iter->i_nodes[i] = NULL;
2168         iter->i_pos[i] = 0;
2169     }
2170 
2171     iter->i_level = 0;
2172 
2173     /* Note: we don't incref/decref nodes in i_nodes. */
2174     iter->i_nodes[0] = root;
2175 }
2176 
2177 static hamt_iter_t
hamt_iterator_bitmap_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2178 hamt_iterator_bitmap_next(PyHamtIteratorState *iter,
2179                           PyObject **key, PyObject **val)
2180 {
2181     int8_t level = iter->i_level;
2182 
2183     PyHamtNode_Bitmap *node = (PyHamtNode_Bitmap *)(iter->i_nodes[level]);
2184     Py_ssize_t pos = iter->i_pos[level];
2185 
2186     if (pos + 1 >= Py_SIZE(node)) {
2187 #ifdef Py_DEBUG
2188         assert(iter->i_level >= 0);
2189         iter->i_nodes[iter->i_level] = NULL;
2190 #endif
2191         iter->i_level--;
2192         return hamt_iterator_next(iter, key, val);
2193     }
2194 
2195     if (node->b_array[pos] == NULL) {
2196         iter->i_pos[level] = pos + 2;
2197 
2198         int8_t next_level = level + 1;
2199         assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2200         iter->i_level = next_level;
2201         iter->i_pos[next_level] = 0;
2202         iter->i_nodes[next_level] = (PyHamtNode *)
2203             node->b_array[pos + 1];
2204 
2205         return hamt_iterator_next(iter, key, val);
2206     }
2207 
2208     *key = node->b_array[pos];
2209     *val = node->b_array[pos + 1];
2210     iter->i_pos[level] = pos + 2;
2211     return I_ITEM;
2212 }
2213 
2214 static hamt_iter_t
hamt_iterator_collision_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2215 hamt_iterator_collision_next(PyHamtIteratorState *iter,
2216                              PyObject **key, PyObject **val)
2217 {
2218     int8_t level = iter->i_level;
2219 
2220     PyHamtNode_Collision *node = (PyHamtNode_Collision *)(iter->i_nodes[level]);
2221     Py_ssize_t pos = iter->i_pos[level];
2222 
2223     if (pos + 1 >= Py_SIZE(node)) {
2224 #ifdef Py_DEBUG
2225         assert(iter->i_level >= 0);
2226         iter->i_nodes[iter->i_level] = NULL;
2227 #endif
2228         iter->i_level--;
2229         return hamt_iterator_next(iter, key, val);
2230     }
2231 
2232     *key = node->c_array[pos];
2233     *val = node->c_array[pos + 1];
2234     iter->i_pos[level] = pos + 2;
2235     return I_ITEM;
2236 }
2237 
2238 static hamt_iter_t
hamt_iterator_array_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2239 hamt_iterator_array_next(PyHamtIteratorState *iter,
2240                          PyObject **key, PyObject **val)
2241 {
2242     int8_t level = iter->i_level;
2243 
2244     PyHamtNode_Array *node = (PyHamtNode_Array *)(iter->i_nodes[level]);
2245     Py_ssize_t pos = iter->i_pos[level];
2246 
2247     if (pos >= HAMT_ARRAY_NODE_SIZE) {
2248 #ifdef Py_DEBUG
2249         assert(iter->i_level >= 0);
2250         iter->i_nodes[iter->i_level] = NULL;
2251 #endif
2252         iter->i_level--;
2253         return hamt_iterator_next(iter, key, val);
2254     }
2255 
2256     for (Py_ssize_t i = pos; i < HAMT_ARRAY_NODE_SIZE; i++) {
2257         if (node->a_array[i] != NULL) {
2258             iter->i_pos[level] = i + 1;
2259 
2260             int8_t next_level = level + 1;
2261             assert(next_level < _Py_HAMT_MAX_TREE_DEPTH);
2262             iter->i_pos[next_level] = 0;
2263             iter->i_nodes[next_level] = node->a_array[i];
2264             iter->i_level = next_level;
2265 
2266             return hamt_iterator_next(iter, key, val);
2267         }
2268     }
2269 
2270 #ifdef Py_DEBUG
2271         assert(iter->i_level >= 0);
2272         iter->i_nodes[iter->i_level] = NULL;
2273 #endif
2274 
2275     iter->i_level--;
2276     return hamt_iterator_next(iter, key, val);
2277 }
2278 
2279 static hamt_iter_t
hamt_iterator_next(PyHamtIteratorState * iter,PyObject ** key,PyObject ** val)2280 hamt_iterator_next(PyHamtIteratorState *iter, PyObject **key, PyObject **val)
2281 {
2282     if (iter->i_level < 0) {
2283         return I_END;
2284     }
2285 
2286     assert(iter->i_level < _Py_HAMT_MAX_TREE_DEPTH);
2287 
2288     PyHamtNode *current = iter->i_nodes[iter->i_level];
2289 
2290     if (IS_BITMAP_NODE(current)) {
2291         return hamt_iterator_bitmap_next(iter, key, val);
2292     }
2293     else if (IS_ARRAY_NODE(current)) {
2294         return hamt_iterator_array_next(iter, key, val);
2295     }
2296     else {
2297         assert(IS_COLLISION_NODE(current));
2298         return hamt_iterator_collision_next(iter, key, val);
2299     }
2300 }
2301 
2302 
2303 /////////////////////////////////// HAMT high-level functions
2304 
2305 
2306 PyHamtObject *
_PyHamt_Assoc(PyHamtObject * o,PyObject * key,PyObject * val)2307 _PyHamt_Assoc(PyHamtObject *o, PyObject *key, PyObject *val)
2308 {
2309     int32_t key_hash;
2310     int added_leaf = 0;
2311     PyHamtNode *new_root;
2312     PyHamtObject *new_o;
2313 
2314     key_hash = hamt_hash(key);
2315     if (key_hash == -1) {
2316         return NULL;
2317     }
2318 
2319     new_root = hamt_node_assoc(
2320         (PyHamtNode *)(o->h_root),
2321         0, key_hash, key, val, &added_leaf);
2322     if (new_root == NULL) {
2323         return NULL;
2324     }
2325 
2326     if (new_root == o->h_root) {
2327         Py_DECREF(new_root);
2328         Py_INCREF(o);
2329         return o;
2330     }
2331 
2332     new_o = hamt_alloc();
2333     if (new_o == NULL) {
2334         Py_DECREF(new_root);
2335         return NULL;
2336     }
2337 
2338     new_o->h_root = new_root;  /* borrow */
2339     new_o->h_count = added_leaf ? o->h_count + 1 : o->h_count;
2340 
2341     return new_o;
2342 }
2343 
2344 PyHamtObject *
_PyHamt_Without(PyHamtObject * o,PyObject * key)2345 _PyHamt_Without(PyHamtObject *o, PyObject *key)
2346 {
2347     int32_t key_hash = hamt_hash(key);
2348     if (key_hash == -1) {
2349         return NULL;
2350     }
2351 
2352     PyHamtNode *new_root = NULL;
2353 
2354     hamt_without_t res = hamt_node_without(
2355         (PyHamtNode *)(o->h_root),
2356         0, key_hash, key,
2357         &new_root);
2358 
2359     switch (res) {
2360         case W_ERROR:
2361             return NULL;
2362         case W_EMPTY:
2363             return _PyHamt_New();
2364         case W_NOT_FOUND:
2365             Py_INCREF(o);
2366             return o;
2367         case W_NEWNODE: {
2368             assert(new_root != NULL);
2369 
2370             PyHamtObject *new_o = hamt_alloc();
2371             if (new_o == NULL) {
2372                 Py_DECREF(new_root);
2373                 return NULL;
2374             }
2375 
2376             new_o->h_root = new_root;  /* borrow */
2377             new_o->h_count = o->h_count - 1;
2378             assert(new_o->h_count >= 0);
2379             return new_o;
2380         }
2381         default:
2382             Py_UNREACHABLE();
2383     }
2384 }
2385 
2386 static hamt_find_t
hamt_find(PyHamtObject * o,PyObject * key,PyObject ** val)2387 hamt_find(PyHamtObject *o, PyObject *key, PyObject **val)
2388 {
2389     if (o->h_count == 0) {
2390         return F_NOT_FOUND;
2391     }
2392 
2393     int32_t key_hash = hamt_hash(key);
2394     if (key_hash == -1) {
2395         return F_ERROR;
2396     }
2397 
2398     return hamt_node_find(o->h_root, 0, key_hash, key, val);
2399 }
2400 
2401 
2402 int
_PyHamt_Find(PyHamtObject * o,PyObject * key,PyObject ** val)2403 _PyHamt_Find(PyHamtObject *o, PyObject *key, PyObject **val)
2404 {
2405     hamt_find_t res = hamt_find(o, key, val);
2406     switch (res) {
2407         case F_ERROR:
2408             return -1;
2409         case F_NOT_FOUND:
2410             return 0;
2411         case F_FOUND:
2412             return 1;
2413         default:
2414             Py_UNREACHABLE();
2415     }
2416 }
2417 
2418 
2419 int
_PyHamt_Eq(PyHamtObject * v,PyHamtObject * w)2420 _PyHamt_Eq(PyHamtObject *v, PyHamtObject *w)
2421 {
2422     if (v == w) {
2423         return 1;
2424     }
2425 
2426     if (v->h_count != w->h_count) {
2427         return 0;
2428     }
2429 
2430     PyHamtIteratorState iter;
2431     hamt_iter_t iter_res;
2432     hamt_find_t find_res;
2433     PyObject *v_key;
2434     PyObject *v_val;
2435     PyObject *w_val;
2436 
2437     hamt_iterator_init(&iter, v->h_root);
2438 
2439     do {
2440         iter_res = hamt_iterator_next(&iter, &v_key, &v_val);
2441         if (iter_res == I_ITEM) {
2442             find_res = hamt_find(w, v_key, &w_val);
2443             switch (find_res) {
2444                 case F_ERROR:
2445                     return -1;
2446 
2447                 case F_NOT_FOUND:
2448                     return 0;
2449 
2450                 case F_FOUND: {
2451                     int cmp = PyObject_RichCompareBool(v_val, w_val, Py_EQ);
2452                     if (cmp < 0) {
2453                         return -1;
2454                     }
2455                     if (cmp == 0) {
2456                         return 0;
2457                     }
2458                 }
2459             }
2460         }
2461     } while (iter_res != I_END);
2462 
2463     return 1;
2464 }
2465 
2466 Py_ssize_t
_PyHamt_Len(PyHamtObject * o)2467 _PyHamt_Len(PyHamtObject *o)
2468 {
2469     return o->h_count;
2470 }
2471 
2472 static PyHamtObject *
hamt_alloc(void)2473 hamt_alloc(void)
2474 {
2475     PyHamtObject *o;
2476     o = PyObject_GC_New(PyHamtObject, &_PyHamt_Type);
2477     if (o == NULL) {
2478         return NULL;
2479     }
2480     o->h_count = 0;
2481     o->h_root = NULL;
2482     o->h_weakreflist = NULL;
2483     PyObject_GC_Track(o);
2484     return o;
2485 }
2486 
2487 PyHamtObject *
_PyHamt_New(void)2488 _PyHamt_New(void)
2489 {
2490     if (_empty_hamt != NULL) {
2491         /* HAMT is an immutable object so we can easily cache an
2492            empty instance. */
2493         Py_INCREF(_empty_hamt);
2494         return _empty_hamt;
2495     }
2496 
2497     PyHamtObject *o = hamt_alloc();
2498     if (o == NULL) {
2499         return NULL;
2500     }
2501 
2502     o->h_root = hamt_node_bitmap_new(0);
2503     if (o->h_root == NULL) {
2504         Py_DECREF(o);
2505         return NULL;
2506     }
2507 
2508     o->h_count = 0;
2509 
2510     if (_empty_hamt == NULL) {
2511         Py_INCREF(o);
2512         _empty_hamt = o;
2513     }
2514 
2515     return o;
2516 }
2517 
2518 #ifdef Py_DEBUG
2519 static PyObject *
hamt_dump(PyHamtObject * self)2520 hamt_dump(PyHamtObject *self)
2521 {
2522     _PyUnicodeWriter writer;
2523 
2524     _PyUnicodeWriter_Init(&writer);
2525 
2526     if (_hamt_dump_format(&writer, "HAMT(len=%zd):\n", self->h_count)) {
2527         goto error;
2528     }
2529 
2530     if (hamt_node_dump(self->h_root, &writer, 0)) {
2531         goto error;
2532     }
2533 
2534     return _PyUnicodeWriter_Finish(&writer);
2535 
2536 error:
2537     _PyUnicodeWriter_Dealloc(&writer);
2538     return NULL;
2539 }
2540 #endif  /* Py_DEBUG */
2541 
2542 
2543 /////////////////////////////////// Iterators: Shared Iterator Implementation
2544 
2545 
2546 static int
hamt_baseiter_tp_clear(PyHamtIterator * it)2547 hamt_baseiter_tp_clear(PyHamtIterator *it)
2548 {
2549     Py_CLEAR(it->hi_obj);
2550     return 0;
2551 }
2552 
2553 static void
hamt_baseiter_tp_dealloc(PyHamtIterator * it)2554 hamt_baseiter_tp_dealloc(PyHamtIterator *it)
2555 {
2556     PyObject_GC_UnTrack(it);
2557     (void)hamt_baseiter_tp_clear(it);
2558     PyObject_GC_Del(it);
2559 }
2560 
2561 static int
hamt_baseiter_tp_traverse(PyHamtIterator * it,visitproc visit,void * arg)2562 hamt_baseiter_tp_traverse(PyHamtIterator *it, visitproc visit, void *arg)
2563 {
2564     Py_VISIT(it->hi_obj);
2565     return 0;
2566 }
2567 
2568 static PyObject *
hamt_baseiter_tp_iternext(PyHamtIterator * it)2569 hamt_baseiter_tp_iternext(PyHamtIterator *it)
2570 {
2571     PyObject *key;
2572     PyObject *val;
2573     hamt_iter_t res = hamt_iterator_next(&it->hi_iter, &key, &val);
2574 
2575     switch (res) {
2576         case I_END:
2577             PyErr_SetNone(PyExc_StopIteration);
2578             return NULL;
2579 
2580         case I_ITEM: {
2581             return (*(it->hi_yield))(key, val);
2582         }
2583 
2584         default: {
2585             Py_UNREACHABLE();
2586         }
2587     }
2588 }
2589 
2590 static Py_ssize_t
hamt_baseiter_tp_len(PyHamtIterator * it)2591 hamt_baseiter_tp_len(PyHamtIterator *it)
2592 {
2593     return it->hi_obj->h_count;
2594 }
2595 
2596 static PyMappingMethods PyHamtIterator_as_mapping = {
2597     (lenfunc)hamt_baseiter_tp_len,
2598 };
2599 
2600 static PyObject *
hamt_baseiter_new(PyTypeObject * type,binaryfunc yield,PyHamtObject * o)2601 hamt_baseiter_new(PyTypeObject *type, binaryfunc yield, PyHamtObject *o)
2602 {
2603     PyHamtIterator *it = PyObject_GC_New(PyHamtIterator, type);
2604     if (it == NULL) {
2605         return NULL;
2606     }
2607 
2608     Py_INCREF(o);
2609     it->hi_obj = o;
2610     it->hi_yield = yield;
2611 
2612     hamt_iterator_init(&it->hi_iter, o->h_root);
2613 
2614     return (PyObject*)it;
2615 }
2616 
2617 #define ITERATOR_TYPE_SHARED_SLOTS                              \
2618     .tp_basicsize = sizeof(PyHamtIterator),                     \
2619     .tp_itemsize = 0,                                           \
2620     .tp_as_mapping = &PyHamtIterator_as_mapping,                \
2621     .tp_dealloc = (destructor)hamt_baseiter_tp_dealloc,         \
2622     .tp_getattro = PyObject_GenericGetAttr,                     \
2623     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,        \
2624     .tp_traverse = (traverseproc)hamt_baseiter_tp_traverse,     \
2625     .tp_clear = (inquiry)hamt_baseiter_tp_clear,                \
2626     .tp_iter = PyObject_SelfIter,                               \
2627     .tp_iternext = (iternextfunc)hamt_baseiter_tp_iternext,
2628 
2629 
2630 /////////////////////////////////// _PyHamtItems_Type
2631 
2632 
2633 PyTypeObject _PyHamtItems_Type = {
2634     PyVarObject_HEAD_INIT(NULL, 0)
2635     "items",
2636     ITERATOR_TYPE_SHARED_SLOTS
2637 };
2638 
2639 static PyObject *
hamt_iter_yield_items(PyObject * key,PyObject * val)2640 hamt_iter_yield_items(PyObject *key, PyObject *val)
2641 {
2642     return PyTuple_Pack(2, key, val);
2643 }
2644 
2645 PyObject *
_PyHamt_NewIterItems(PyHamtObject * o)2646 _PyHamt_NewIterItems(PyHamtObject *o)
2647 {
2648     return hamt_baseiter_new(
2649         &_PyHamtItems_Type, hamt_iter_yield_items, o);
2650 }
2651 
2652 
2653 /////////////////////////////////// _PyHamtKeys_Type
2654 
2655 
2656 PyTypeObject _PyHamtKeys_Type = {
2657     PyVarObject_HEAD_INIT(NULL, 0)
2658     "keys",
2659     ITERATOR_TYPE_SHARED_SLOTS
2660 };
2661 
2662 static PyObject *
hamt_iter_yield_keys(PyObject * key,PyObject * val)2663 hamt_iter_yield_keys(PyObject *key, PyObject *val)
2664 {
2665     Py_INCREF(key);
2666     return key;
2667 }
2668 
2669 PyObject *
_PyHamt_NewIterKeys(PyHamtObject * o)2670 _PyHamt_NewIterKeys(PyHamtObject *o)
2671 {
2672     return hamt_baseiter_new(
2673         &_PyHamtKeys_Type, hamt_iter_yield_keys, o);
2674 }
2675 
2676 
2677 /////////////////////////////////// _PyHamtValues_Type
2678 
2679 
2680 PyTypeObject _PyHamtValues_Type = {
2681     PyVarObject_HEAD_INIT(NULL, 0)
2682     "values",
2683     ITERATOR_TYPE_SHARED_SLOTS
2684 };
2685 
2686 static PyObject *
hamt_iter_yield_values(PyObject * key,PyObject * val)2687 hamt_iter_yield_values(PyObject *key, PyObject *val)
2688 {
2689     Py_INCREF(val);
2690     return val;
2691 }
2692 
2693 PyObject *
_PyHamt_NewIterValues(PyHamtObject * o)2694 _PyHamt_NewIterValues(PyHamtObject *o)
2695 {
2696     return hamt_baseiter_new(
2697         &_PyHamtValues_Type, hamt_iter_yield_values, o);
2698 }
2699 
2700 
2701 /////////////////////////////////// _PyHamt_Type
2702 
2703 
2704 #ifdef Py_DEBUG
2705 static PyObject *
2706 hamt_dump(PyHamtObject *self);
2707 #endif
2708 
2709 
2710 static PyObject *
hamt_tp_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2711 hamt_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2712 {
2713     return (PyObject*)_PyHamt_New();
2714 }
2715 
2716 static int
hamt_tp_clear(PyHamtObject * self)2717 hamt_tp_clear(PyHamtObject *self)
2718 {
2719     Py_CLEAR(self->h_root);
2720     return 0;
2721 }
2722 
2723 
2724 static int
hamt_tp_traverse(PyHamtObject * self,visitproc visit,void * arg)2725 hamt_tp_traverse(PyHamtObject *self, visitproc visit, void *arg)
2726 {
2727     Py_VISIT(self->h_root);
2728     return 0;
2729 }
2730 
2731 static void
hamt_tp_dealloc(PyHamtObject * self)2732 hamt_tp_dealloc(PyHamtObject *self)
2733 {
2734     PyObject_GC_UnTrack(self);
2735     if (self->h_weakreflist != NULL) {
2736         PyObject_ClearWeakRefs((PyObject*)self);
2737     }
2738     (void)hamt_tp_clear(self);
2739     Py_TYPE(self)->tp_free(self);
2740 }
2741 
2742 
2743 static PyObject *
hamt_tp_richcompare(PyObject * v,PyObject * w,int op)2744 hamt_tp_richcompare(PyObject *v, PyObject *w, int op)
2745 {
2746     if (!PyHamt_Check(v) || !PyHamt_Check(w) || (op != Py_EQ && op != Py_NE)) {
2747         Py_RETURN_NOTIMPLEMENTED;
2748     }
2749 
2750     int res = _PyHamt_Eq((PyHamtObject *)v, (PyHamtObject *)w);
2751     if (res < 0) {
2752         return NULL;
2753     }
2754 
2755     if (op == Py_NE) {
2756         res = !res;
2757     }
2758 
2759     if (res) {
2760         Py_RETURN_TRUE;
2761     }
2762     else {
2763         Py_RETURN_FALSE;
2764     }
2765 }
2766 
2767 static int
hamt_tp_contains(PyHamtObject * self,PyObject * key)2768 hamt_tp_contains(PyHamtObject *self, PyObject *key)
2769 {
2770     PyObject *val;
2771     return _PyHamt_Find(self, key, &val);
2772 }
2773 
2774 static PyObject *
hamt_tp_subscript(PyHamtObject * self,PyObject * key)2775 hamt_tp_subscript(PyHamtObject *self, PyObject *key)
2776 {
2777     PyObject *val;
2778     hamt_find_t res = hamt_find(self, key, &val);
2779     switch (res) {
2780         case F_ERROR:
2781             return NULL;
2782         case F_FOUND:
2783             Py_INCREF(val);
2784             return val;
2785         case F_NOT_FOUND:
2786             PyErr_SetObject(PyExc_KeyError, key);
2787             return NULL;
2788         default:
2789             Py_UNREACHABLE();
2790     }
2791 }
2792 
2793 static Py_ssize_t
hamt_tp_len(PyHamtObject * self)2794 hamt_tp_len(PyHamtObject *self)
2795 {
2796     return _PyHamt_Len(self);
2797 }
2798 
2799 static PyObject *
hamt_tp_iter(PyHamtObject * self)2800 hamt_tp_iter(PyHamtObject *self)
2801 {
2802     return _PyHamt_NewIterKeys(self);
2803 }
2804 
2805 static PyObject *
hamt_py_set(PyHamtObject * self,PyObject * args)2806 hamt_py_set(PyHamtObject *self, PyObject *args)
2807 {
2808     PyObject *key;
2809     PyObject *val;
2810 
2811     if (!PyArg_UnpackTuple(args, "set", 2, 2, &key, &val)) {
2812         return NULL;
2813     }
2814 
2815     return (PyObject *)_PyHamt_Assoc(self, key, val);
2816 }
2817 
2818 static PyObject *
hamt_py_get(PyHamtObject * self,PyObject * args)2819 hamt_py_get(PyHamtObject *self, PyObject *args)
2820 {
2821     PyObject *key;
2822     PyObject *def = NULL;
2823 
2824     if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &def)) {
2825         return NULL;
2826     }
2827 
2828     PyObject *val = NULL;
2829     hamt_find_t res = hamt_find(self, key, &val);
2830     switch (res) {
2831         case F_ERROR:
2832             return NULL;
2833         case F_FOUND:
2834             Py_INCREF(val);
2835             return val;
2836         case F_NOT_FOUND:
2837             if (def == NULL) {
2838                 Py_RETURN_NONE;
2839             }
2840             Py_INCREF(def);
2841             return def;
2842         default:
2843             Py_UNREACHABLE();
2844     }
2845 }
2846 
2847 static PyObject *
hamt_py_delete(PyHamtObject * self,PyObject * key)2848 hamt_py_delete(PyHamtObject *self, PyObject *key)
2849 {
2850     return (PyObject *)_PyHamt_Without(self, key);
2851 }
2852 
2853 static PyObject *
hamt_py_items(PyHamtObject * self,PyObject * args)2854 hamt_py_items(PyHamtObject *self, PyObject *args)
2855 {
2856     return _PyHamt_NewIterItems(self);
2857 }
2858 
2859 static PyObject *
hamt_py_values(PyHamtObject * self,PyObject * args)2860 hamt_py_values(PyHamtObject *self, PyObject *args)
2861 {
2862     return _PyHamt_NewIterValues(self);
2863 }
2864 
2865 static PyObject *
hamt_py_keys(PyHamtObject * self,PyObject * args)2866 hamt_py_keys(PyHamtObject *self, PyObject *args)
2867 {
2868     return _PyHamt_NewIterKeys(self);
2869 }
2870 
2871 #ifdef Py_DEBUG
2872 static PyObject *
hamt_py_dump(PyHamtObject * self,PyObject * args)2873 hamt_py_dump(PyHamtObject *self, PyObject *args)
2874 {
2875     return hamt_dump(self);
2876 }
2877 #endif
2878 
2879 
2880 static PyMethodDef PyHamt_methods[] = {
2881     {"set", (PyCFunction)hamt_py_set, METH_VARARGS, NULL},
2882     {"get", (PyCFunction)hamt_py_get, METH_VARARGS, NULL},
2883     {"delete", (PyCFunction)hamt_py_delete, METH_O, NULL},
2884     {"items", (PyCFunction)hamt_py_items, METH_NOARGS, NULL},
2885     {"keys", (PyCFunction)hamt_py_keys, METH_NOARGS, NULL},
2886     {"values", (PyCFunction)hamt_py_values, METH_NOARGS, NULL},
2887 #ifdef Py_DEBUG
2888     {"__dump__", (PyCFunction)hamt_py_dump, METH_NOARGS, NULL},
2889 #endif
2890     {NULL, NULL}
2891 };
2892 
2893 static PySequenceMethods PyHamt_as_sequence = {
2894     0,                                /* sq_length */
2895     0,                                /* sq_concat */
2896     0,                                /* sq_repeat */
2897     0,                                /* sq_item */
2898     0,                                /* sq_slice */
2899     0,                                /* sq_ass_item */
2900     0,                                /* sq_ass_slice */
2901     (objobjproc)hamt_tp_contains,     /* sq_contains */
2902     0,                                /* sq_inplace_concat */
2903     0,                                /* sq_inplace_repeat */
2904 };
2905 
2906 static PyMappingMethods PyHamt_as_mapping = {
2907     (lenfunc)hamt_tp_len,             /* mp_length */
2908     (binaryfunc)hamt_tp_subscript,    /* mp_subscript */
2909 };
2910 
2911 PyTypeObject _PyHamt_Type = {
2912     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2913     "hamt",
2914     sizeof(PyHamtObject),
2915     .tp_methods = PyHamt_methods,
2916     .tp_as_mapping = &PyHamt_as_mapping,
2917     .tp_as_sequence = &PyHamt_as_sequence,
2918     .tp_iter = (getiterfunc)hamt_tp_iter,
2919     .tp_dealloc = (destructor)hamt_tp_dealloc,
2920     .tp_getattro = PyObject_GenericGetAttr,
2921     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2922     .tp_richcompare = hamt_tp_richcompare,
2923     .tp_traverse = (traverseproc)hamt_tp_traverse,
2924     .tp_clear = (inquiry)hamt_tp_clear,
2925     .tp_new = hamt_tp_new,
2926     .tp_weaklistoffset = offsetof(PyHamtObject, h_weakreflist),
2927     .tp_hash = PyObject_HashNotImplemented,
2928 };
2929 
2930 
2931 /////////////////////////////////// Tree Node Types
2932 
2933 
2934 PyTypeObject _PyHamt_ArrayNode_Type = {
2935     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2936     "hamt_array_node",
2937     sizeof(PyHamtNode_Array),
2938     0,
2939     .tp_dealloc = (destructor)hamt_node_array_dealloc,
2940     .tp_getattro = PyObject_GenericGetAttr,
2941     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2942     .tp_traverse = (traverseproc)hamt_node_array_traverse,
2943     .tp_free = PyObject_GC_Del,
2944     .tp_hash = PyObject_HashNotImplemented,
2945 };
2946 
2947 PyTypeObject _PyHamt_BitmapNode_Type = {
2948     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2949     "hamt_bitmap_node",
2950     sizeof(PyHamtNode_Bitmap) - sizeof(PyObject *),
2951     sizeof(PyObject *),
2952     .tp_dealloc = (destructor)hamt_node_bitmap_dealloc,
2953     .tp_getattro = PyObject_GenericGetAttr,
2954     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2955     .tp_traverse = (traverseproc)hamt_node_bitmap_traverse,
2956     .tp_free = PyObject_GC_Del,
2957     .tp_hash = PyObject_HashNotImplemented,
2958 };
2959 
2960 PyTypeObject _PyHamt_CollisionNode_Type = {
2961     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2962     "hamt_collision_node",
2963     sizeof(PyHamtNode_Collision) - sizeof(PyObject *),
2964     sizeof(PyObject *),
2965     .tp_dealloc = (destructor)hamt_node_collision_dealloc,
2966     .tp_getattro = PyObject_GenericGetAttr,
2967     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
2968     .tp_traverse = (traverseproc)hamt_node_collision_traverse,
2969     .tp_free = PyObject_GC_Del,
2970     .tp_hash = PyObject_HashNotImplemented,
2971 };
2972 
2973 
2974 int
_PyHamt_Init(void)2975 _PyHamt_Init(void)
2976 {
2977     if ((PyType_Ready(&_PyHamt_Type) < 0) ||
2978         (PyType_Ready(&_PyHamt_ArrayNode_Type) < 0) ||
2979         (PyType_Ready(&_PyHamt_BitmapNode_Type) < 0) ||
2980         (PyType_Ready(&_PyHamt_CollisionNode_Type) < 0) ||
2981         (PyType_Ready(&_PyHamtKeys_Type) < 0) ||
2982         (PyType_Ready(&_PyHamtValues_Type) < 0) ||
2983         (PyType_Ready(&_PyHamtItems_Type) < 0))
2984     {
2985         return 0;
2986     }
2987 
2988     return 1;
2989 }
2990 
2991 void
_PyHamt_Fini(void)2992 _PyHamt_Fini(void)
2993 {
2994     Py_CLEAR(_empty_hamt);
2995     Py_CLEAR(_empty_bitmap_node);
2996 }
2997