• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* set object implementation
3 
4    Written and maintained by Raymond D. Hettinger <python@rcn.com>
5    Derived from Lib/sets.py and Objects/dictobject.c.
6 
7    The basic lookup function used by all operations.
8    This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9 
10    The initial probe index is computed as hash mod the table size.
11    Subsequent probe indices are computed as explained in Objects/dictobject.c.
12 
13    To improve cache locality, each probe inspects a series of consecutive
14    nearby entries before moving on to probes elsewhere in memory.  This leaves
15    us with a hybrid of linear probing and randomized probing.  The linear probing
16    reduces the cost of hash collisions because consecutive memory accesses
17    tend to be much cheaper than scattered probes.  After LINEAR_PROBES steps,
18    we then use more of the upper bits from the hash value and apply a simple
19    linear congruential random number genearator.  This helps break-up long
20    chains of collisions.
21 
22    All arithmetic on hash should ignore overflow.
23 
24    Unlike the dictionary implementation, the lookkey function can return
25    NULL if the rich comparison returns an error.
26 
27    Use cases for sets differ considerably from dictionaries where looked-up
28    keys are more likely to be present.  In contrast, sets are primarily
29    about membership testing where the presence of an element is not known in
30    advance.  Accordingly, the set implementation needs to optimize for both
31    the found and not-found case.
32 */
33 
34 #include "Python.h"
35 #include "pycore_object.h"        // _PyObject_GC_UNTRACK()
36 #include <stddef.h>               // offsetof()
37 
38 /* Object used as dummy key to fill deleted entries */
39 static PyObject _dummy_struct;
40 
41 #define dummy (&_dummy_struct)
42 
43 
44 /* ======================================================================== */
45 /* ======= Begin logic for probing the hash table ========================= */
46 
47 /* Set this to zero to turn-off linear probing */
48 #ifndef LINEAR_PROBES
49 #define LINEAR_PROBES 9
50 #endif
51 
52 /* This must be >= 1 */
53 #define PERTURB_SHIFT 5
54 
55 static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)56 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
57 {
58     setentry *table;
59     setentry *entry;
60     size_t perturb = hash;
61     size_t mask = so->mask;
62     size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
63     int probes;
64     int cmp;
65 
66     while (1) {
67         entry = &so->table[i];
68         probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
69         do {
70             if (entry->hash == 0 && entry->key == NULL)
71                 return entry;
72             if (entry->hash == hash) {
73                 PyObject *startkey = entry->key;
74                 assert(startkey != dummy);
75                 if (startkey == key)
76                     return entry;
77                 if (PyUnicode_CheckExact(startkey)
78                     && PyUnicode_CheckExact(key)
79                     && _PyUnicode_EQ(startkey, key))
80                     return entry;
81                 table = so->table;
82                 Py_INCREF(startkey);
83                 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
84                 Py_DECREF(startkey);
85                 if (cmp < 0)
86                     return NULL;
87                 if (table != so->table || entry->key != startkey)
88                     return set_lookkey(so, key, hash);
89                 if (cmp > 0)
90                     return entry;
91                 mask = so->mask;
92             }
93             entry++;
94         } while (probes--);
95         perturb >>= PERTURB_SHIFT;
96         i = (i * 5 + 1 + perturb) & mask;
97     }
98 }
99 
100 static int set_table_resize(PySetObject *, Py_ssize_t);
101 
102 static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)103 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
104 {
105     setentry *table;
106     setentry *entry;
107     size_t perturb;
108     size_t mask;
109     size_t i;                       /* Unsigned for defined overflow behavior */
110     int probes;
111     int cmp;
112 
113     /* Pre-increment is necessary to prevent arbitrary code in the rich
114        comparison from deallocating the key just before the insertion. */
115     Py_INCREF(key);
116 
117   restart:
118 
119     mask = so->mask;
120     i = (size_t)hash & mask;
121     perturb = hash;
122 
123     while (1) {
124         entry = &so->table[i];
125         probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
126         do {
127             if (entry->hash == 0 && entry->key == NULL)
128                 goto found_unused;
129             if (entry->hash == hash) {
130                 PyObject *startkey = entry->key;
131                 assert(startkey != dummy);
132                 if (startkey == key)
133                     goto found_active;
134                 if (PyUnicode_CheckExact(startkey)
135                     && PyUnicode_CheckExact(key)
136                     && _PyUnicode_EQ(startkey, key))
137                     goto found_active;
138                 table = so->table;
139                 Py_INCREF(startkey);
140                 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
141                 Py_DECREF(startkey);
142                 if (cmp > 0)
143                     goto found_active;
144                 if (cmp < 0)
145                     goto comparison_error;
146                 if (table != so->table || entry->key != startkey)
147                     goto restart;
148                 mask = so->mask;
149             }
150             entry++;
151         } while (probes--);
152         perturb >>= PERTURB_SHIFT;
153         i = (i * 5 + 1 + perturb) & mask;
154     }
155 
156   found_unused:
157     so->fill++;
158     so->used++;
159     entry->key = key;
160     entry->hash = hash;
161     if ((size_t)so->fill*5 < mask*3)
162         return 0;
163     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
164 
165   found_active:
166     Py_DECREF(key);
167     return 0;
168 
169   comparison_error:
170     Py_DECREF(key);
171     return -1;
172 }
173 
174 /*
175 Internal routine used by set_table_resize() to insert an item which is
176 known to be absent from the set.  Besides the performance benefit,
177 there is also safety benefit since using set_add_entry() risks making
178 a callback in the middle of a set_table_resize(), see issue 1456209.
179 The caller is responsible for updating the key's reference count and
180 the setobject's fill and used fields.
181 */
182 static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)183 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
184 {
185     setentry *entry;
186     size_t perturb = hash;
187     size_t i = (size_t)hash & mask;
188     size_t j;
189 
190     while (1) {
191         entry = &table[i];
192         if (entry->key == NULL)
193             goto found_null;
194         if (i + LINEAR_PROBES <= mask) {
195             for (j = 0; j < LINEAR_PROBES; j++) {
196                 entry++;
197                 if (entry->key == NULL)
198                     goto found_null;
199             }
200         }
201         perturb >>= PERTURB_SHIFT;
202         i = (i * 5 + 1 + perturb) & mask;
203     }
204   found_null:
205     entry->key = key;
206     entry->hash = hash;
207 }
208 
209 /* ======== End logic for probing the hash table ========================== */
210 /* ======================================================================== */
211 
212 /*
213 Restructure the table by allocating a new table and reinserting all
214 keys again.  When entries have been deleted, the new table may
215 actually be smaller than the old one.
216 */
217 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)218 set_table_resize(PySetObject *so, Py_ssize_t minused)
219 {
220     setentry *oldtable, *newtable, *entry;
221     Py_ssize_t oldmask = so->mask;
222     size_t newmask;
223     int is_oldtable_malloced;
224     setentry small_copy[PySet_MINSIZE];
225 
226     assert(minused >= 0);
227 
228     /* Find the smallest table size > minused. */
229     /* XXX speed-up with intrinsics */
230     size_t newsize = PySet_MINSIZE;
231     while (newsize <= (size_t)minused) {
232         newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
233     }
234 
235     /* Get space for a new table. */
236     oldtable = so->table;
237     assert(oldtable != NULL);
238     is_oldtable_malloced = oldtable != so->smalltable;
239 
240     if (newsize == PySet_MINSIZE) {
241         /* A large table is shrinking, or we can't get any smaller. */
242         newtable = so->smalltable;
243         if (newtable == oldtable) {
244             if (so->fill == so->used) {
245                 /* No dummies, so no point doing anything. */
246                 return 0;
247             }
248             /* We're not going to resize it, but rebuild the
249                table anyway to purge old dummy entries.
250                Subtle:  This is *necessary* if fill==size,
251                as set_lookkey needs at least one virgin slot to
252                terminate failing searches.  If fill < size, it's
253                merely desirable, as dummies slow searches. */
254             assert(so->fill > so->used);
255             memcpy(small_copy, oldtable, sizeof(small_copy));
256             oldtable = small_copy;
257         }
258     }
259     else {
260         newtable = PyMem_NEW(setentry, newsize);
261         if (newtable == NULL) {
262             PyErr_NoMemory();
263             return -1;
264         }
265     }
266 
267     /* Make the set empty, using the new table. */
268     assert(newtable != oldtable);
269     memset(newtable, 0, sizeof(setentry) * newsize);
270     so->mask = newsize - 1;
271     so->table = newtable;
272 
273     /* Copy the data over; this is refcount-neutral for active entries;
274        dummy entries aren't copied over, of course */
275     newmask = (size_t)so->mask;
276     if (so->fill == so->used) {
277         for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
278             if (entry->key != NULL) {
279                 set_insert_clean(newtable, newmask, entry->key, entry->hash);
280             }
281         }
282     } else {
283         so->fill = so->used;
284         for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
285             if (entry->key != NULL && entry->key != dummy) {
286                 set_insert_clean(newtable, newmask, entry->key, entry->hash);
287             }
288         }
289     }
290 
291     if (is_oldtable_malloced)
292         PyMem_DEL(oldtable);
293     return 0;
294 }
295 
296 static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)297 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
298 {
299     setentry *entry;
300 
301     entry = set_lookkey(so, key, hash);
302     if (entry != NULL)
303         return entry->key != NULL;
304     return -1;
305 }
306 
307 #define DISCARD_NOTFOUND 0
308 #define DISCARD_FOUND 1
309 
310 static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)311 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
312 {
313     setentry *entry;
314     PyObject *old_key;
315 
316     entry = set_lookkey(so, key, hash);
317     if (entry == NULL)
318         return -1;
319     if (entry->key == NULL)
320         return DISCARD_NOTFOUND;
321     old_key = entry->key;
322     entry->key = dummy;
323     entry->hash = -1;
324     so->used--;
325     Py_DECREF(old_key);
326     return DISCARD_FOUND;
327 }
328 
329 static int
set_add_key(PySetObject * so,PyObject * key)330 set_add_key(PySetObject *so, PyObject *key)
331 {
332     Py_hash_t hash;
333 
334     if (!PyUnicode_CheckExact(key) ||
335         (hash = ((PyASCIIObject *) key)->hash) == -1) {
336         hash = PyObject_Hash(key);
337         if (hash == -1)
338             return -1;
339     }
340     return set_add_entry(so, key, hash);
341 }
342 
343 static int
set_contains_key(PySetObject * so,PyObject * key)344 set_contains_key(PySetObject *so, PyObject *key)
345 {
346     Py_hash_t hash;
347 
348     if (!PyUnicode_CheckExact(key) ||
349         (hash = ((PyASCIIObject *) key)->hash) == -1) {
350         hash = PyObject_Hash(key);
351         if (hash == -1)
352             return -1;
353     }
354     return set_contains_entry(so, key, hash);
355 }
356 
357 static int
set_discard_key(PySetObject * so,PyObject * key)358 set_discard_key(PySetObject *so, PyObject *key)
359 {
360     Py_hash_t hash;
361 
362     if (!PyUnicode_CheckExact(key) ||
363         (hash = ((PyASCIIObject *) key)->hash) == -1) {
364         hash = PyObject_Hash(key);
365         if (hash == -1)
366             return -1;
367     }
368     return set_discard_entry(so, key, hash);
369 }
370 
371 static void
set_empty_to_minsize(PySetObject * so)372 set_empty_to_minsize(PySetObject *so)
373 {
374     memset(so->smalltable, 0, sizeof(so->smalltable));
375     so->fill = 0;
376     so->used = 0;
377     so->mask = PySet_MINSIZE - 1;
378     so->table = so->smalltable;
379     so->hash = -1;
380 }
381 
382 static int
set_clear_internal(PySetObject * so)383 set_clear_internal(PySetObject *so)
384 {
385     setentry *entry;
386     setentry *table = so->table;
387     Py_ssize_t fill = so->fill;
388     Py_ssize_t used = so->used;
389     int table_is_malloced = table != so->smalltable;
390     setentry small_copy[PySet_MINSIZE];
391 
392     assert (PyAnySet_Check(so));
393     assert(table != NULL);
394 
395     /* This is delicate.  During the process of clearing the set,
396      * decrefs can cause the set to mutate.  To avoid fatal confusion
397      * (voice of experience), we have to make the set empty before
398      * clearing the slots, and never refer to anything via so->ref while
399      * clearing.
400      */
401     if (table_is_malloced)
402         set_empty_to_minsize(so);
403 
404     else if (fill > 0) {
405         /* It's a small table with something that needs to be cleared.
406          * Afraid the only safe way is to copy the set entries into
407          * another small table first.
408          */
409         memcpy(small_copy, table, sizeof(small_copy));
410         table = small_copy;
411         set_empty_to_minsize(so);
412     }
413     /* else it's a small table that's already empty */
414 
415     /* Now we can finally clear things.  If C had refcounts, we could
416      * assert that the refcount on table is 1 now, i.e. that this function
417      * has unique access to it, so decref side-effects can't alter it.
418      */
419     for (entry = table; used > 0; entry++) {
420         if (entry->key && entry->key != dummy) {
421             used--;
422             Py_DECREF(entry->key);
423         }
424     }
425 
426     if (table_is_malloced)
427         PyMem_DEL(table);
428     return 0;
429 }
430 
431 /*
432  * Iterate over a set table.  Use like so:
433  *
434  *     Py_ssize_t pos;
435  *     setentry *entry;
436  *     pos = 0;   # important!  pos should not otherwise be changed by you
437  *     while (set_next(yourset, &pos, &entry)) {
438  *              Refer to borrowed reference in entry->key.
439  *     }
440  *
441  * CAUTION:  In general, it isn't safe to use set_next in a loop that
442  * mutates the table.
443  */
444 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)445 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
446 {
447     Py_ssize_t i;
448     Py_ssize_t mask;
449     setentry *entry;
450 
451     assert (PyAnySet_Check(so));
452     i = *pos_ptr;
453     assert(i >= 0);
454     mask = so->mask;
455     entry = &so->table[i];
456     while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
457         i++;
458         entry++;
459     }
460     *pos_ptr = i+1;
461     if (i > mask)
462         return 0;
463     assert(entry != NULL);
464     *entry_ptr = entry;
465     return 1;
466 }
467 
468 static void
set_dealloc(PySetObject * so)469 set_dealloc(PySetObject *so)
470 {
471     setentry *entry;
472     Py_ssize_t used = so->used;
473 
474     /* bpo-31095: UnTrack is needed before calling any callbacks */
475     PyObject_GC_UnTrack(so);
476     Py_TRASHCAN_BEGIN(so, set_dealloc)
477     if (so->weakreflist != NULL)
478         PyObject_ClearWeakRefs((PyObject *) so);
479 
480     for (entry = so->table; used > 0; entry++) {
481         if (entry->key && entry->key != dummy) {
482                 used--;
483                 Py_DECREF(entry->key);
484         }
485     }
486     if (so->table != so->smalltable)
487         PyMem_DEL(so->table);
488     Py_TYPE(so)->tp_free(so);
489     Py_TRASHCAN_END
490 }
491 
492 static PyObject *
set_repr(PySetObject * so)493 set_repr(PySetObject *so)
494 {
495     PyObject *result=NULL, *keys, *listrepr, *tmp;
496     int status = Py_ReprEnter((PyObject*)so);
497 
498     if (status != 0) {
499         if (status < 0)
500             return NULL;
501         return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
502     }
503 
504     /* shortcut for the empty set */
505     if (!so->used) {
506         Py_ReprLeave((PyObject*)so);
507         return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
508     }
509 
510     keys = PySequence_List((PyObject *)so);
511     if (keys == NULL)
512         goto done;
513 
514     /* repr(keys)[1:-1] */
515     listrepr = PyObject_Repr(keys);
516     Py_DECREF(keys);
517     if (listrepr == NULL)
518         goto done;
519     tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
520     Py_DECREF(listrepr);
521     if (tmp == NULL)
522         goto done;
523     listrepr = tmp;
524 
525     if (!Py_IS_TYPE(so, &PySet_Type))
526         result = PyUnicode_FromFormat("%s({%U})",
527                                       Py_TYPE(so)->tp_name,
528                                       listrepr);
529     else
530         result = PyUnicode_FromFormat("{%U}", listrepr);
531     Py_DECREF(listrepr);
532 done:
533     Py_ReprLeave((PyObject*)so);
534     return result;
535 }
536 
537 static Py_ssize_t
set_len(PyObject * so)538 set_len(PyObject *so)
539 {
540     return ((PySetObject *)so)->used;
541 }
542 
543 static int
set_merge(PySetObject * so,PyObject * otherset)544 set_merge(PySetObject *so, PyObject *otherset)
545 {
546     PySetObject *other;
547     PyObject *key;
548     Py_ssize_t i;
549     setentry *so_entry;
550     setentry *other_entry;
551 
552     assert (PyAnySet_Check(so));
553     assert (PyAnySet_Check(otherset));
554 
555     other = (PySetObject*)otherset;
556     if (other == so || other->used == 0)
557         /* a.update(a) or a.update(set()); nothing to do */
558         return 0;
559     /* Do one big resize at the start, rather than
560      * incrementally resizing as we insert new keys.  Expect
561      * that there will be no (or few) overlapping keys.
562      */
563     if ((so->fill + other->used)*5 >= so->mask*3) {
564         if (set_table_resize(so, (so->used + other->used)*2) != 0)
565             return -1;
566     }
567     so_entry = so->table;
568     other_entry = other->table;
569 
570     /* If our table is empty, and both tables have the same size, and
571        there are no dummies to eliminate, then just copy the pointers. */
572     if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
573         for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
574             key = other_entry->key;
575             if (key != NULL) {
576                 assert(so_entry->key == NULL);
577                 Py_INCREF(key);
578                 so_entry->key = key;
579                 so_entry->hash = other_entry->hash;
580             }
581         }
582         so->fill = other->fill;
583         so->used = other->used;
584         return 0;
585     }
586 
587     /* If our table is empty, we can use set_insert_clean() */
588     if (so->fill == 0) {
589         setentry *newtable = so->table;
590         size_t newmask = (size_t)so->mask;
591         so->fill = other->used;
592         so->used = other->used;
593         for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
594             key = other_entry->key;
595             if (key != NULL && key != dummy) {
596                 Py_INCREF(key);
597                 set_insert_clean(newtable, newmask, key, other_entry->hash);
598             }
599         }
600         return 0;
601     }
602 
603     /* We can't assure there are no duplicates, so do normal insertions */
604     for (i = 0; i <= other->mask; i++) {
605         other_entry = &other->table[i];
606         key = other_entry->key;
607         if (key != NULL && key != dummy) {
608             if (set_add_entry(so, key, other_entry->hash))
609                 return -1;
610         }
611     }
612     return 0;
613 }
614 
615 static PyObject *
set_pop(PySetObject * so,PyObject * Py_UNUSED (ignored))616 set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored))
617 {
618     /* Make sure the search finger is in bounds */
619     setentry *entry = so->table + (so->finger & so->mask);
620     setentry *limit = so->table + so->mask;
621     PyObject *key;
622 
623     if (so->used == 0) {
624         PyErr_SetString(PyExc_KeyError, "pop from an empty set");
625         return NULL;
626     }
627     while (entry->key == NULL || entry->key==dummy) {
628         entry++;
629         if (entry > limit)
630             entry = so->table;
631     }
632     key = entry->key;
633     entry->key = dummy;
634     entry->hash = -1;
635     so->used--;
636     so->finger = entry - so->table + 1;   /* next place to start */
637     return key;
638 }
639 
640 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
641 Raises KeyError if the set is empty.");
642 
643 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)644 set_traverse(PySetObject *so, visitproc visit, void *arg)
645 {
646     Py_ssize_t pos = 0;
647     setentry *entry;
648 
649     while (set_next(so, &pos, &entry))
650         Py_VISIT(entry->key);
651     return 0;
652 }
653 
654 /* Work to increase the bit dispersion for closely spaced hash values.
655    This is important because some use cases have many combinations of a
656    small number of elements with nearby hashes so that many distinct
657    combinations collapse to only a handful of distinct hash values. */
658 
659 static Py_uhash_t
_shuffle_bits(Py_uhash_t h)660 _shuffle_bits(Py_uhash_t h)
661 {
662     return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
663 }
664 
665 /* Most of the constants in this hash algorithm are randomly chosen
666    large primes with "interesting bit patterns" and that passed tests
667    for good collision statistics on a variety of problematic datasets
668    including powersets and graph structures (such as David Eppstein's
669    graph recipes in Lib/test/test_set.py) */
670 
671 static Py_hash_t
frozenset_hash(PyObject * self)672 frozenset_hash(PyObject *self)
673 {
674     PySetObject *so = (PySetObject *)self;
675     Py_uhash_t hash = 0;
676     setentry *entry;
677 
678     if (so->hash != -1)
679         return so->hash;
680 
681     /* Xor-in shuffled bits from every entry's hash field because xor is
682        commutative and a frozenset hash should be independent of order.
683 
684        For speed, include null entries and dummy entries and then
685        subtract out their effect afterwards so that the final hash
686        depends only on active entries.  This allows the code to be
687        vectorized by the compiler and it saves the unpredictable
688        branches that would arise when trying to exclude null and dummy
689        entries on every iteration. */
690 
691     for (entry = so->table; entry <= &so->table[so->mask]; entry++)
692         hash ^= _shuffle_bits(entry->hash);
693 
694     /* Remove the effect of an odd number of NULL entries */
695     if ((so->mask + 1 - so->fill) & 1)
696         hash ^= _shuffle_bits(0);
697 
698     /* Remove the effect of an odd number of dummy entries */
699     if ((so->fill - so->used) & 1)
700         hash ^= _shuffle_bits(-1);
701 
702     /* Factor in the number of active entries */
703     hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
704 
705     /* Disperse patterns arising in nested frozensets */
706     hash ^= (hash >> 11) ^ (hash >> 25);
707     hash = hash * 69069U + 907133923UL;
708 
709     /* -1 is reserved as an error code */
710     if (hash == (Py_uhash_t)-1)
711         hash = 590923713UL;
712 
713     so->hash = hash;
714     return hash;
715 }
716 
717 /***** Set iterator type ***********************************************/
718 
719 typedef struct {
720     PyObject_HEAD
721     PySetObject *si_set; /* Set to NULL when iterator is exhausted */
722     Py_ssize_t si_used;
723     Py_ssize_t si_pos;
724     Py_ssize_t len;
725 } setiterobject;
726 
727 static void
setiter_dealloc(setiterobject * si)728 setiter_dealloc(setiterobject *si)
729 {
730     /* bpo-31095: UnTrack is needed before calling any callbacks */
731     _PyObject_GC_UNTRACK(si);
732     Py_XDECREF(si->si_set);
733     PyObject_GC_Del(si);
734 }
735 
736 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)737 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
738 {
739     Py_VISIT(si->si_set);
740     return 0;
741 }
742 
743 static PyObject *
setiter_len(setiterobject * si,PyObject * Py_UNUSED (ignored))744 setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored))
745 {
746     Py_ssize_t len = 0;
747     if (si->si_set != NULL && si->si_used == si->si_set->used)
748         len = si->len;
749     return PyLong_FromSsize_t(len);
750 }
751 
752 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
753 
754 static PyObject *setiter_iternext(setiterobject *si);
755 
756 static PyObject *
setiter_reduce(setiterobject * si,PyObject * Py_UNUSED (ignored))757 setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored))
758 {
759     _Py_IDENTIFIER(iter);
760     /* copy the iterator state */
761     setiterobject tmp = *si;
762     Py_XINCREF(tmp.si_set);
763 
764     /* iterate the temporary into a list */
765     PyObject *list = PySequence_List((PyObject*)&tmp);
766     Py_XDECREF(tmp.si_set);
767     if (list == NULL) {
768         return NULL;
769     }
770     return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), list);
771 }
772 
773 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
774 
775 static PyMethodDef setiter_methods[] = {
776     {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
777     {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
778     {NULL,              NULL}           /* sentinel */
779 };
780 
setiter_iternext(setiterobject * si)781 static PyObject *setiter_iternext(setiterobject *si)
782 {
783     PyObject *key;
784     Py_ssize_t i, mask;
785     setentry *entry;
786     PySetObject *so = si->si_set;
787 
788     if (so == NULL)
789         return NULL;
790     assert (PyAnySet_Check(so));
791 
792     if (si->si_used != so->used) {
793         PyErr_SetString(PyExc_RuntimeError,
794                         "Set changed size during iteration");
795         si->si_used = -1; /* Make this state sticky */
796         return NULL;
797     }
798 
799     i = si->si_pos;
800     assert(i>=0);
801     entry = so->table;
802     mask = so->mask;
803     while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
804         i++;
805     si->si_pos = i+1;
806     if (i > mask)
807         goto fail;
808     si->len--;
809     key = entry[i].key;
810     Py_INCREF(key);
811     return key;
812 
813 fail:
814     si->si_set = NULL;
815     Py_DECREF(so);
816     return NULL;
817 }
818 
819 PyTypeObject PySetIter_Type = {
820     PyVarObject_HEAD_INIT(&PyType_Type, 0)
821     "set_iterator",                             /* tp_name */
822     sizeof(setiterobject),                      /* tp_basicsize */
823     0,                                          /* tp_itemsize */
824     /* methods */
825     (destructor)setiter_dealloc,                /* tp_dealloc */
826     0,                                          /* tp_vectorcall_offset */
827     0,                                          /* tp_getattr */
828     0,                                          /* tp_setattr */
829     0,                                          /* tp_as_async */
830     0,                                          /* tp_repr */
831     0,                                          /* tp_as_number */
832     0,                                          /* tp_as_sequence */
833     0,                                          /* tp_as_mapping */
834     0,                                          /* tp_hash */
835     0,                                          /* tp_call */
836     0,                                          /* tp_str */
837     PyObject_GenericGetAttr,                    /* tp_getattro */
838     0,                                          /* tp_setattro */
839     0,                                          /* tp_as_buffer */
840     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
841     0,                                          /* tp_doc */
842     (traverseproc)setiter_traverse,             /* tp_traverse */
843     0,                                          /* tp_clear */
844     0,                                          /* tp_richcompare */
845     0,                                          /* tp_weaklistoffset */
846     PyObject_SelfIter,                          /* tp_iter */
847     (iternextfunc)setiter_iternext,             /* tp_iternext */
848     setiter_methods,                            /* tp_methods */
849     0,
850 };
851 
852 static PyObject *
set_iter(PySetObject * so)853 set_iter(PySetObject *so)
854 {
855     setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
856     if (si == NULL)
857         return NULL;
858     Py_INCREF(so);
859     si->si_set = so;
860     si->si_used = so->used;
861     si->si_pos = 0;
862     si->len = so->used;
863     _PyObject_GC_TRACK(si);
864     return (PyObject *)si;
865 }
866 
867 static int
set_update_internal(PySetObject * so,PyObject * other)868 set_update_internal(PySetObject *so, PyObject *other)
869 {
870     PyObject *key, *it;
871 
872     if (PyAnySet_Check(other))
873         return set_merge(so, other);
874 
875     if (PyDict_CheckExact(other)) {
876         PyObject *value;
877         Py_ssize_t pos = 0;
878         Py_hash_t hash;
879         Py_ssize_t dictsize = PyDict_GET_SIZE(other);
880 
881         /* Do one big resize at the start, rather than
882         * incrementally resizing as we insert new keys.  Expect
883         * that there will be no (or few) overlapping keys.
884         */
885         if (dictsize < 0)
886             return -1;
887         if ((so->fill + dictsize)*5 >= so->mask*3) {
888             if (set_table_resize(so, (so->used + dictsize)*2) != 0)
889                 return -1;
890         }
891         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
892             if (set_add_entry(so, key, hash))
893                 return -1;
894         }
895         return 0;
896     }
897 
898     it = PyObject_GetIter(other);
899     if (it == NULL)
900         return -1;
901 
902     while ((key = PyIter_Next(it)) != NULL) {
903         if (set_add_key(so, key)) {
904             Py_DECREF(it);
905             Py_DECREF(key);
906             return -1;
907         }
908         Py_DECREF(key);
909     }
910     Py_DECREF(it);
911     if (PyErr_Occurred())
912         return -1;
913     return 0;
914 }
915 
916 static PyObject *
set_update(PySetObject * so,PyObject * args)917 set_update(PySetObject *so, PyObject *args)
918 {
919     Py_ssize_t i;
920 
921     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
922         PyObject *other = PyTuple_GET_ITEM(args, i);
923         if (set_update_internal(so, other))
924             return NULL;
925     }
926     Py_RETURN_NONE;
927 }
928 
929 PyDoc_STRVAR(update_doc,
930 "Update a set with the union of itself and others.");
931 
932 /* XXX Todo:
933    If aligned memory allocations become available, make the
934    set object 64 byte aligned so that most of the fields
935    can be retrieved or updated in a single cache line.
936 */
937 
938 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)939 make_new_set(PyTypeObject *type, PyObject *iterable)
940 {
941     assert(PyType_Check(type));
942     PySetObject *so;
943 
944     so = (PySetObject *)type->tp_alloc(type, 0);
945     if (so == NULL)
946         return NULL;
947 
948     so->fill = 0;
949     so->used = 0;
950     so->mask = PySet_MINSIZE - 1;
951     so->table = so->smalltable;
952     so->hash = -1;
953     so->finger = 0;
954     so->weakreflist = NULL;
955 
956     if (iterable != NULL) {
957         if (set_update_internal(so, iterable)) {
958             Py_DECREF(so);
959             return NULL;
960         }
961     }
962 
963     return (PyObject *)so;
964 }
965 
966 static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)967 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
968 {
969     if (type != &PySet_Type && type != &PyFrozenSet_Type) {
970         if (PyType_IsSubtype(type, &PySet_Type))
971             type = &PySet_Type;
972         else
973             type = &PyFrozenSet_Type;
974     }
975     return make_new_set(type, iterable);
976 }
977 
978 /* The empty frozenset is a singleton */
979 static PyObject *emptyfrozenset = NULL;
980 
981 static PyObject *
make_new_frozenset(PyTypeObject * type,PyObject * iterable)982 make_new_frozenset(PyTypeObject *type, PyObject *iterable)
983 {
984     if (type != &PyFrozenSet_Type) {
985         return make_new_set(type, iterable);
986     }
987 
988     if (iterable != NULL) {
989         if (PyFrozenSet_CheckExact(iterable)) {
990             /* frozenset(f) is idempotent */
991             Py_INCREF(iterable);
992             return iterable;
993         }
994         PyObject *res = make_new_set((PyTypeObject *)type, iterable);
995         if (res == NULL || PySet_GET_SIZE(res) != 0) {
996             return res;
997         }
998         /* If the created frozenset is empty, return the empty frozenset singleton instead */
999         Py_DECREF(res);
1000     }
1001 
1002     // The empty frozenset is a singleton
1003     if (emptyfrozenset == NULL) {
1004         emptyfrozenset = make_new_set((PyTypeObject *)type, NULL);
1005     }
1006     Py_XINCREF(emptyfrozenset);
1007     return emptyfrozenset;
1008 }
1009 
1010 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1011 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1012 {
1013     PyObject *iterable = NULL;
1014 
1015     if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset", kwds)) {
1016         return NULL;
1017     }
1018 
1019     if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) {
1020         return NULL;
1021     }
1022 
1023     return make_new_frozenset(type, iterable);
1024 }
1025 
1026 static PyObject *
frozenset_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1027 frozenset_vectorcall(PyObject *type, PyObject * const*args,
1028                      size_t nargsf, PyObject *kwnames)
1029 {
1030     if (!_PyArg_NoKwnames("frozenset", kwnames)) {
1031         return NULL;
1032     }
1033 
1034     Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1035     if (!_PyArg_CheckPositional("frozenset", nargs, 0, 1)) {
1036         return NULL;
1037     }
1038 
1039     PyObject *iterable = (nargs ? args[0] : NULL);
1040     return make_new_frozenset((PyTypeObject *)type, iterable);
1041 }
1042 
1043 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1044 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1045 {
1046     return make_new_set(type, NULL);
1047 }
1048 
1049 /* set_swap_bodies() switches the contents of any two sets by moving their
1050    internal data pointers and, if needed, copying the internal smalltables.
1051    Semantically equivalent to:
1052 
1053      t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1054 
1055    The function always succeeds and it leaves both objects in a stable state.
1056    Useful for operations that update in-place (by allowing an intermediate
1057    result to be swapped into one of the original inputs).
1058 */
1059 
1060 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1061 set_swap_bodies(PySetObject *a, PySetObject *b)
1062 {
1063     Py_ssize_t t;
1064     setentry *u;
1065     setentry tab[PySet_MINSIZE];
1066     Py_hash_t h;
1067 
1068     t = a->fill;     a->fill   = b->fill;        b->fill  = t;
1069     t = a->used;     a->used   = b->used;        b->used  = t;
1070     t = a->mask;     a->mask   = b->mask;        b->mask  = t;
1071 
1072     u = a->table;
1073     if (a->table == a->smalltable)
1074         u = b->smalltable;
1075     a->table  = b->table;
1076     if (b->table == b->smalltable)
1077         a->table = a->smalltable;
1078     b->table = u;
1079 
1080     if (a->table == a->smalltable || b->table == b->smalltable) {
1081         memcpy(tab, a->smalltable, sizeof(tab));
1082         memcpy(a->smalltable, b->smalltable, sizeof(tab));
1083         memcpy(b->smalltable, tab, sizeof(tab));
1084     }
1085 
1086     if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type)  &&
1087         PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1088         h = a->hash;     a->hash = b->hash;  b->hash = h;
1089     } else {
1090         a->hash = -1;
1091         b->hash = -1;
1092     }
1093 }
1094 
1095 static PyObject *
set_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1096 set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1097 {
1098     return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1099 }
1100 
1101 static PyObject *
frozenset_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1102 frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1103 {
1104     if (PyFrozenSet_CheckExact(so)) {
1105         Py_INCREF(so);
1106         return (PyObject *)so;
1107     }
1108     return set_copy(so, NULL);
1109 }
1110 
1111 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1112 
1113 static PyObject *
set_clear(PySetObject * so,PyObject * Py_UNUSED (ignored))1114 set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored))
1115 {
1116     set_clear_internal(so);
1117     Py_RETURN_NONE;
1118 }
1119 
1120 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1121 
1122 static PyObject *
set_union(PySetObject * so,PyObject * args)1123 set_union(PySetObject *so, PyObject *args)
1124 {
1125     PySetObject *result;
1126     PyObject *other;
1127     Py_ssize_t i;
1128 
1129     result = (PySetObject *)set_copy(so, NULL);
1130     if (result == NULL)
1131         return NULL;
1132 
1133     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1134         other = PyTuple_GET_ITEM(args, i);
1135         if ((PyObject *)so == other)
1136             continue;
1137         if (set_update_internal(result, other)) {
1138             Py_DECREF(result);
1139             return NULL;
1140         }
1141     }
1142     return (PyObject *)result;
1143 }
1144 
1145 PyDoc_STRVAR(union_doc,
1146  "Return the union of sets as a new set.\n\
1147 \n\
1148 (i.e. all elements that are in either set.)");
1149 
1150 static PyObject *
set_or(PySetObject * so,PyObject * other)1151 set_or(PySetObject *so, PyObject *other)
1152 {
1153     PySetObject *result;
1154 
1155     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1156         Py_RETURN_NOTIMPLEMENTED;
1157 
1158     result = (PySetObject *)set_copy(so, NULL);
1159     if (result == NULL)
1160         return NULL;
1161     if ((PyObject *)so == other)
1162         return (PyObject *)result;
1163     if (set_update_internal(result, other)) {
1164         Py_DECREF(result);
1165         return NULL;
1166     }
1167     return (PyObject *)result;
1168 }
1169 
1170 static PyObject *
set_ior(PySetObject * so,PyObject * other)1171 set_ior(PySetObject *so, PyObject *other)
1172 {
1173     if (!PyAnySet_Check(other))
1174         Py_RETURN_NOTIMPLEMENTED;
1175 
1176     if (set_update_internal(so, other))
1177         return NULL;
1178     Py_INCREF(so);
1179     return (PyObject *)so;
1180 }
1181 
1182 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1183 set_intersection(PySetObject *so, PyObject *other)
1184 {
1185     PySetObject *result;
1186     PyObject *key, *it, *tmp;
1187     Py_hash_t hash;
1188     int rv;
1189 
1190     if ((PyObject *)so == other)
1191         return set_copy(so, NULL);
1192 
1193     result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1194     if (result == NULL)
1195         return NULL;
1196 
1197     if (PyAnySet_Check(other)) {
1198         Py_ssize_t pos = 0;
1199         setentry *entry;
1200 
1201         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1202             tmp = (PyObject *)so;
1203             so = (PySetObject *)other;
1204             other = tmp;
1205         }
1206 
1207         while (set_next((PySetObject *)other, &pos, &entry)) {
1208             key = entry->key;
1209             hash = entry->hash;
1210             rv = set_contains_entry(so, key, hash);
1211             if (rv < 0) {
1212                 Py_DECREF(result);
1213                 return NULL;
1214             }
1215             if (rv) {
1216                 if (set_add_entry(result, key, hash)) {
1217                     Py_DECREF(result);
1218                     return NULL;
1219                 }
1220             }
1221         }
1222         return (PyObject *)result;
1223     }
1224 
1225     it = PyObject_GetIter(other);
1226     if (it == NULL) {
1227         Py_DECREF(result);
1228         return NULL;
1229     }
1230 
1231     while ((key = PyIter_Next(it)) != NULL) {
1232         hash = PyObject_Hash(key);
1233         if (hash == -1)
1234             goto error;
1235         rv = set_contains_entry(so, key, hash);
1236         if (rv < 0)
1237             goto error;
1238         if (rv) {
1239             if (set_add_entry(result, key, hash))
1240                 goto error;
1241         }
1242         Py_DECREF(key);
1243     }
1244     Py_DECREF(it);
1245     if (PyErr_Occurred()) {
1246         Py_DECREF(result);
1247         return NULL;
1248     }
1249     return (PyObject *)result;
1250   error:
1251     Py_DECREF(it);
1252     Py_DECREF(result);
1253     Py_DECREF(key);
1254     return NULL;
1255 }
1256 
1257 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1258 set_intersection_multi(PySetObject *so, PyObject *args)
1259 {
1260     Py_ssize_t i;
1261     PyObject *result = (PyObject *)so;
1262 
1263     if (PyTuple_GET_SIZE(args) == 0)
1264         return set_copy(so, NULL);
1265 
1266     Py_INCREF(so);
1267     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1268         PyObject *other = PyTuple_GET_ITEM(args, i);
1269         PyObject *newresult = set_intersection((PySetObject *)result, other);
1270         if (newresult == NULL) {
1271             Py_DECREF(result);
1272             return NULL;
1273         }
1274         Py_DECREF(result);
1275         result = newresult;
1276     }
1277     return result;
1278 }
1279 
1280 PyDoc_STRVAR(intersection_doc,
1281 "Return the intersection of two sets as a new set.\n\
1282 \n\
1283 (i.e. all elements that are in both sets.)");
1284 
1285 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1286 set_intersection_update(PySetObject *so, PyObject *other)
1287 {
1288     PyObject *tmp;
1289 
1290     tmp = set_intersection(so, other);
1291     if (tmp == NULL)
1292         return NULL;
1293     set_swap_bodies(so, (PySetObject *)tmp);
1294     Py_DECREF(tmp);
1295     Py_RETURN_NONE;
1296 }
1297 
1298 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1299 set_intersection_update_multi(PySetObject *so, PyObject *args)
1300 {
1301     PyObject *tmp;
1302 
1303     tmp = set_intersection_multi(so, args);
1304     if (tmp == NULL)
1305         return NULL;
1306     set_swap_bodies(so, (PySetObject *)tmp);
1307     Py_DECREF(tmp);
1308     Py_RETURN_NONE;
1309 }
1310 
1311 PyDoc_STRVAR(intersection_update_doc,
1312 "Update a set with the intersection of itself and another.");
1313 
1314 static PyObject *
set_and(PySetObject * so,PyObject * other)1315 set_and(PySetObject *so, PyObject *other)
1316 {
1317     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1318         Py_RETURN_NOTIMPLEMENTED;
1319     return set_intersection(so, other);
1320 }
1321 
1322 static PyObject *
set_iand(PySetObject * so,PyObject * other)1323 set_iand(PySetObject *so, PyObject *other)
1324 {
1325     PyObject *result;
1326 
1327     if (!PyAnySet_Check(other))
1328         Py_RETURN_NOTIMPLEMENTED;
1329     result = set_intersection_update(so, other);
1330     if (result == NULL)
1331         return NULL;
1332     Py_DECREF(result);
1333     Py_INCREF(so);
1334     return (PyObject *)so;
1335 }
1336 
1337 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1338 set_isdisjoint(PySetObject *so, PyObject *other)
1339 {
1340     PyObject *key, *it, *tmp;
1341     int rv;
1342 
1343     if ((PyObject *)so == other) {
1344         if (PySet_GET_SIZE(so) == 0)
1345             Py_RETURN_TRUE;
1346         else
1347             Py_RETURN_FALSE;
1348     }
1349 
1350     if (PyAnySet_CheckExact(other)) {
1351         Py_ssize_t pos = 0;
1352         setentry *entry;
1353 
1354         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1355             tmp = (PyObject *)so;
1356             so = (PySetObject *)other;
1357             other = tmp;
1358         }
1359         while (set_next((PySetObject *)other, &pos, &entry)) {
1360             rv = set_contains_entry(so, entry->key, entry->hash);
1361             if (rv < 0)
1362                 return NULL;
1363             if (rv)
1364                 Py_RETURN_FALSE;
1365         }
1366         Py_RETURN_TRUE;
1367     }
1368 
1369     it = PyObject_GetIter(other);
1370     if (it == NULL)
1371         return NULL;
1372 
1373     while ((key = PyIter_Next(it)) != NULL) {
1374         Py_hash_t hash = PyObject_Hash(key);
1375 
1376         if (hash == -1) {
1377             Py_DECREF(key);
1378             Py_DECREF(it);
1379             return NULL;
1380         }
1381         rv = set_contains_entry(so, key, hash);
1382         Py_DECREF(key);
1383         if (rv < 0) {
1384             Py_DECREF(it);
1385             return NULL;
1386         }
1387         if (rv) {
1388             Py_DECREF(it);
1389             Py_RETURN_FALSE;
1390         }
1391     }
1392     Py_DECREF(it);
1393     if (PyErr_Occurred())
1394         return NULL;
1395     Py_RETURN_TRUE;
1396 }
1397 
1398 PyDoc_STRVAR(isdisjoint_doc,
1399 "Return True if two sets have a null intersection.");
1400 
1401 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1402 set_difference_update_internal(PySetObject *so, PyObject *other)
1403 {
1404     if ((PyObject *)so == other)
1405         return set_clear_internal(so);
1406 
1407     if (PyAnySet_Check(other)) {
1408         setentry *entry;
1409         Py_ssize_t pos = 0;
1410 
1411         /* Optimization:  When the other set is more than 8 times
1412            larger than the base set, replace the other set with
1413            interesection of the two sets.
1414         */
1415         if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) {
1416             other = set_intersection(so, other);
1417             if (other == NULL)
1418                 return -1;
1419         } else {
1420             Py_INCREF(other);
1421         }
1422 
1423         while (set_next((PySetObject *)other, &pos, &entry))
1424             if (set_discard_entry(so, entry->key, entry->hash) < 0) {
1425                 Py_DECREF(other);
1426                 return -1;
1427             }
1428 
1429         Py_DECREF(other);
1430     } else {
1431         PyObject *key, *it;
1432         it = PyObject_GetIter(other);
1433         if (it == NULL)
1434             return -1;
1435 
1436         while ((key = PyIter_Next(it)) != NULL) {
1437             if (set_discard_key(so, key) < 0) {
1438                 Py_DECREF(it);
1439                 Py_DECREF(key);
1440                 return -1;
1441             }
1442             Py_DECREF(key);
1443         }
1444         Py_DECREF(it);
1445         if (PyErr_Occurred())
1446             return -1;
1447     }
1448     /* If more than 1/4th are dummies, then resize them away. */
1449     if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1450         return 0;
1451     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1452 }
1453 
1454 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1455 set_difference_update(PySetObject *so, PyObject *args)
1456 {
1457     Py_ssize_t i;
1458 
1459     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1460         PyObject *other = PyTuple_GET_ITEM(args, i);
1461         if (set_difference_update_internal(so, other))
1462             return NULL;
1463     }
1464     Py_RETURN_NONE;
1465 }
1466 
1467 PyDoc_STRVAR(difference_update_doc,
1468 "Remove all elements of another set from this set.");
1469 
1470 static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1471 set_copy_and_difference(PySetObject *so, PyObject *other)
1472 {
1473     PyObject *result;
1474 
1475     result = set_copy(so, NULL);
1476     if (result == NULL)
1477         return NULL;
1478     if (set_difference_update_internal((PySetObject *) result, other) == 0)
1479         return result;
1480     Py_DECREF(result);
1481     return NULL;
1482 }
1483 
1484 static PyObject *
set_difference(PySetObject * so,PyObject * other)1485 set_difference(PySetObject *so, PyObject *other)
1486 {
1487     PyObject *result;
1488     PyObject *key;
1489     Py_hash_t hash;
1490     setentry *entry;
1491     Py_ssize_t pos = 0, other_size;
1492     int rv;
1493 
1494     if (PyAnySet_Check(other)) {
1495         other_size = PySet_GET_SIZE(other);
1496     }
1497     else if (PyDict_CheckExact(other)) {
1498         other_size = PyDict_GET_SIZE(other);
1499     }
1500     else {
1501         return set_copy_and_difference(so, other);
1502     }
1503 
1504     /* If len(so) much more than len(other), it's more efficient to simply copy
1505      * so and then iterate other looking for common elements. */
1506     if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1507         return set_copy_and_difference(so, other);
1508     }
1509 
1510     result = make_new_set_basetype(Py_TYPE(so), NULL);
1511     if (result == NULL)
1512         return NULL;
1513 
1514     if (PyDict_CheckExact(other)) {
1515         while (set_next(so, &pos, &entry)) {
1516             key = entry->key;
1517             hash = entry->hash;
1518             rv = _PyDict_Contains(other, key, hash);
1519             if (rv < 0) {
1520                 Py_DECREF(result);
1521                 return NULL;
1522             }
1523             if (!rv) {
1524                 if (set_add_entry((PySetObject *)result, key, hash)) {
1525                     Py_DECREF(result);
1526                     return NULL;
1527                 }
1528             }
1529         }
1530         return result;
1531     }
1532 
1533     /* Iterate over so, checking for common elements in other. */
1534     while (set_next(so, &pos, &entry)) {
1535         key = entry->key;
1536         hash = entry->hash;
1537         rv = set_contains_entry((PySetObject *)other, key, hash);
1538         if (rv < 0) {
1539             Py_DECREF(result);
1540             return NULL;
1541         }
1542         if (!rv) {
1543             if (set_add_entry((PySetObject *)result, key, hash)) {
1544                 Py_DECREF(result);
1545                 return NULL;
1546             }
1547         }
1548     }
1549     return result;
1550 }
1551 
1552 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1553 set_difference_multi(PySetObject *so, PyObject *args)
1554 {
1555     Py_ssize_t i;
1556     PyObject *result, *other;
1557 
1558     if (PyTuple_GET_SIZE(args) == 0)
1559         return set_copy(so, NULL);
1560 
1561     other = PyTuple_GET_ITEM(args, 0);
1562     result = set_difference(so, other);
1563     if (result == NULL)
1564         return NULL;
1565 
1566     for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1567         other = PyTuple_GET_ITEM(args, i);
1568         if (set_difference_update_internal((PySetObject *)result, other)) {
1569             Py_DECREF(result);
1570             return NULL;
1571         }
1572     }
1573     return result;
1574 }
1575 
1576 PyDoc_STRVAR(difference_doc,
1577 "Return the difference of two or more sets as a new set.\n\
1578 \n\
1579 (i.e. all elements that are in this set but not the others.)");
1580 static PyObject *
set_sub(PySetObject * so,PyObject * other)1581 set_sub(PySetObject *so, PyObject *other)
1582 {
1583     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1584         Py_RETURN_NOTIMPLEMENTED;
1585     return set_difference(so, other);
1586 }
1587 
1588 static PyObject *
set_isub(PySetObject * so,PyObject * other)1589 set_isub(PySetObject *so, PyObject *other)
1590 {
1591     if (!PyAnySet_Check(other))
1592         Py_RETURN_NOTIMPLEMENTED;
1593     if (set_difference_update_internal(so, other))
1594         return NULL;
1595     Py_INCREF(so);
1596     return (PyObject *)so;
1597 }
1598 
1599 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1600 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1601 {
1602     PySetObject *otherset;
1603     PyObject *key;
1604     Py_ssize_t pos = 0;
1605     Py_hash_t hash;
1606     setentry *entry;
1607     int rv;
1608 
1609     if ((PyObject *)so == other)
1610         return set_clear(so, NULL);
1611 
1612     if (PyDict_CheckExact(other)) {
1613         PyObject *value;
1614         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1615             Py_INCREF(key);
1616             rv = set_discard_entry(so, key, hash);
1617             if (rv < 0) {
1618                 Py_DECREF(key);
1619                 return NULL;
1620             }
1621             if (rv == DISCARD_NOTFOUND) {
1622                 if (set_add_entry(so, key, hash)) {
1623                     Py_DECREF(key);
1624                     return NULL;
1625                 }
1626             }
1627             Py_DECREF(key);
1628         }
1629         Py_RETURN_NONE;
1630     }
1631 
1632     if (PyAnySet_Check(other)) {
1633         Py_INCREF(other);
1634         otherset = (PySetObject *)other;
1635     } else {
1636         otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1637         if (otherset == NULL)
1638             return NULL;
1639     }
1640 
1641     while (set_next(otherset, &pos, &entry)) {
1642         key = entry->key;
1643         hash = entry->hash;
1644         rv = set_discard_entry(so, key, hash);
1645         if (rv < 0) {
1646             Py_DECREF(otherset);
1647             return NULL;
1648         }
1649         if (rv == DISCARD_NOTFOUND) {
1650             if (set_add_entry(so, key, hash)) {
1651                 Py_DECREF(otherset);
1652                 return NULL;
1653             }
1654         }
1655     }
1656     Py_DECREF(otherset);
1657     Py_RETURN_NONE;
1658 }
1659 
1660 PyDoc_STRVAR(symmetric_difference_update_doc,
1661 "Update a set with the symmetric difference of itself and another.");
1662 
1663 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1664 set_symmetric_difference(PySetObject *so, PyObject *other)
1665 {
1666     PyObject *rv;
1667     PySetObject *otherset;
1668 
1669     otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1670     if (otherset == NULL)
1671         return NULL;
1672     rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1673     if (rv == NULL) {
1674         Py_DECREF(otherset);
1675         return NULL;
1676     }
1677     Py_DECREF(rv);
1678     return (PyObject *)otherset;
1679 }
1680 
1681 PyDoc_STRVAR(symmetric_difference_doc,
1682 "Return the symmetric difference of two sets as a new set.\n\
1683 \n\
1684 (i.e. all elements that are in exactly one of the sets.)");
1685 
1686 static PyObject *
set_xor(PySetObject * so,PyObject * other)1687 set_xor(PySetObject *so, PyObject *other)
1688 {
1689     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1690         Py_RETURN_NOTIMPLEMENTED;
1691     return set_symmetric_difference(so, other);
1692 }
1693 
1694 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1695 set_ixor(PySetObject *so, PyObject *other)
1696 {
1697     PyObject *result;
1698 
1699     if (!PyAnySet_Check(other))
1700         Py_RETURN_NOTIMPLEMENTED;
1701     result = set_symmetric_difference_update(so, other);
1702     if (result == NULL)
1703         return NULL;
1704     Py_DECREF(result);
1705     Py_INCREF(so);
1706     return (PyObject *)so;
1707 }
1708 
1709 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1710 set_issubset(PySetObject *so, PyObject *other)
1711 {
1712     setentry *entry;
1713     Py_ssize_t pos = 0;
1714     int rv;
1715 
1716     if (!PyAnySet_Check(other)) {
1717         PyObject *tmp, *result;
1718         tmp = make_new_set(&PySet_Type, other);
1719         if (tmp == NULL)
1720             return NULL;
1721         result = set_issubset(so, tmp);
1722         Py_DECREF(tmp);
1723         return result;
1724     }
1725     if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1726         Py_RETURN_FALSE;
1727 
1728     while (set_next(so, &pos, &entry)) {
1729         rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
1730         if (rv < 0)
1731             return NULL;
1732         if (!rv)
1733             Py_RETURN_FALSE;
1734     }
1735     Py_RETURN_TRUE;
1736 }
1737 
1738 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1739 
1740 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1741 set_issuperset(PySetObject *so, PyObject *other)
1742 {
1743     PyObject *tmp, *result;
1744 
1745     if (!PyAnySet_Check(other)) {
1746         tmp = make_new_set(&PySet_Type, other);
1747         if (tmp == NULL)
1748             return NULL;
1749         result = set_issuperset(so, tmp);
1750         Py_DECREF(tmp);
1751         return result;
1752     }
1753     return set_issubset((PySetObject *)other, (PyObject *)so);
1754 }
1755 
1756 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1757 
1758 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1759 set_richcompare(PySetObject *v, PyObject *w, int op)
1760 {
1761     PyObject *r1;
1762     int r2;
1763 
1764     if(!PyAnySet_Check(w))
1765         Py_RETURN_NOTIMPLEMENTED;
1766 
1767     switch (op) {
1768     case Py_EQ:
1769         if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1770             Py_RETURN_FALSE;
1771         if (v->hash != -1  &&
1772             ((PySetObject *)w)->hash != -1 &&
1773             v->hash != ((PySetObject *)w)->hash)
1774             Py_RETURN_FALSE;
1775         return set_issubset(v, w);
1776     case Py_NE:
1777         r1 = set_richcompare(v, w, Py_EQ);
1778         if (r1 == NULL)
1779             return NULL;
1780         r2 = PyObject_IsTrue(r1);
1781         Py_DECREF(r1);
1782         if (r2 < 0)
1783             return NULL;
1784         return PyBool_FromLong(!r2);
1785     case Py_LE:
1786         return set_issubset(v, w);
1787     case Py_GE:
1788         return set_issuperset(v, w);
1789     case Py_LT:
1790         if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1791             Py_RETURN_FALSE;
1792         return set_issubset(v, w);
1793     case Py_GT:
1794         if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1795             Py_RETURN_FALSE;
1796         return set_issuperset(v, w);
1797     }
1798     Py_RETURN_NOTIMPLEMENTED;
1799 }
1800 
1801 static PyObject *
set_add(PySetObject * so,PyObject * key)1802 set_add(PySetObject *so, PyObject *key)
1803 {
1804     if (set_add_key(so, key))
1805         return NULL;
1806     Py_RETURN_NONE;
1807 }
1808 
1809 PyDoc_STRVAR(add_doc,
1810 "Add an element to a set.\n\
1811 \n\
1812 This has no effect if the element is already present.");
1813 
1814 static int
set_contains(PySetObject * so,PyObject * key)1815 set_contains(PySetObject *so, PyObject *key)
1816 {
1817     PyObject *tmpkey;
1818     int rv;
1819 
1820     rv = set_contains_key(so, key);
1821     if (rv < 0) {
1822         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1823             return -1;
1824         PyErr_Clear();
1825         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1826         if (tmpkey == NULL)
1827             return -1;
1828         rv = set_contains_key(so, tmpkey);
1829         Py_DECREF(tmpkey);
1830     }
1831     return rv;
1832 }
1833 
1834 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1835 set_direct_contains(PySetObject *so, PyObject *key)
1836 {
1837     long result;
1838 
1839     result = set_contains(so, key);
1840     if (result < 0)
1841         return NULL;
1842     return PyBool_FromLong(result);
1843 }
1844 
1845 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1846 
1847 static PyObject *
set_remove(PySetObject * so,PyObject * key)1848 set_remove(PySetObject *so, PyObject *key)
1849 {
1850     PyObject *tmpkey;
1851     int rv;
1852 
1853     rv = set_discard_key(so, key);
1854     if (rv < 0) {
1855         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1856             return NULL;
1857         PyErr_Clear();
1858         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1859         if (tmpkey == NULL)
1860             return NULL;
1861         rv = set_discard_key(so, tmpkey);
1862         Py_DECREF(tmpkey);
1863         if (rv < 0)
1864             return NULL;
1865     }
1866 
1867     if (rv == DISCARD_NOTFOUND) {
1868         _PyErr_SetKeyError(key);
1869         return NULL;
1870     }
1871     Py_RETURN_NONE;
1872 }
1873 
1874 PyDoc_STRVAR(remove_doc,
1875 "Remove an element from a set; it must be a member.\n\
1876 \n\
1877 If the element is not a member, raise a KeyError.");
1878 
1879 static PyObject *
set_discard(PySetObject * so,PyObject * key)1880 set_discard(PySetObject *so, PyObject *key)
1881 {
1882     PyObject *tmpkey;
1883     int rv;
1884 
1885     rv = set_discard_key(so, key);
1886     if (rv < 0) {
1887         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1888             return NULL;
1889         PyErr_Clear();
1890         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1891         if (tmpkey == NULL)
1892             return NULL;
1893         rv = set_discard_key(so, tmpkey);
1894         Py_DECREF(tmpkey);
1895         if (rv < 0)
1896             return NULL;
1897     }
1898     Py_RETURN_NONE;
1899 }
1900 
1901 PyDoc_STRVAR(discard_doc,
1902 "Remove an element from a set if it is a member.\n\
1903 \n\
1904 If the element is not a member, do nothing.");
1905 
1906 static PyObject *
set_reduce(PySetObject * so,PyObject * Py_UNUSED (ignored))1907 set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored))
1908 {
1909     PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1910     _Py_IDENTIFIER(__dict__);
1911 
1912     keys = PySequence_List((PyObject *)so);
1913     if (keys == NULL)
1914         goto done;
1915     args = PyTuple_Pack(1, keys);
1916     if (args == NULL)
1917         goto done;
1918     if (_PyObject_LookupAttrId((PyObject *)so, &PyId___dict__, &dict) < 0) {
1919         goto done;
1920     }
1921     if (dict == NULL) {
1922         dict = Py_None;
1923         Py_INCREF(dict);
1924     }
1925     result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1926 done:
1927     Py_XDECREF(args);
1928     Py_XDECREF(keys);
1929     Py_XDECREF(dict);
1930     return result;
1931 }
1932 
1933 static PyObject *
set_sizeof(PySetObject * so,PyObject * Py_UNUSED (ignored))1934 set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored))
1935 {
1936     Py_ssize_t res;
1937 
1938     res = _PyObject_SIZE(Py_TYPE(so));
1939     if (so->table != so->smalltable)
1940         res = res + (so->mask + 1) * sizeof(setentry);
1941     return PyLong_FromSsize_t(res);
1942 }
1943 
1944 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1945 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1946 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1947 {
1948     PyObject *iterable = NULL;
1949 
1950     if (!_PyArg_NoKeywords("set", kwds))
1951         return -1;
1952     if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
1953         return -1;
1954     if (self->fill)
1955         set_clear_internal(self);
1956     self->hash = -1;
1957     if (iterable == NULL)
1958         return 0;
1959     return set_update_internal(self, iterable);
1960 }
1961 
1962 static PyObject*
set_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1963 set_vectorcall(PyObject *type, PyObject * const*args,
1964                size_t nargsf, PyObject *kwnames)
1965 {
1966     assert(PyType_Check(type));
1967 
1968     if (!_PyArg_NoKwnames("set", kwnames)) {
1969         return NULL;
1970     }
1971 
1972     Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1973     if (!_PyArg_CheckPositional("set", nargs, 0, 1)) {
1974         return NULL;
1975     }
1976 
1977     if (nargs) {
1978         return make_new_set((PyTypeObject *)type, args[0]);
1979     }
1980 
1981     return make_new_set((PyTypeObject *)type, NULL);
1982 }
1983 
1984 static PySequenceMethods set_as_sequence = {
1985     set_len,                            /* sq_length */
1986     0,                                  /* sq_concat */
1987     0,                                  /* sq_repeat */
1988     0,                                  /* sq_item */
1989     0,                                  /* sq_slice */
1990     0,                                  /* sq_ass_item */
1991     0,                                  /* sq_ass_slice */
1992     (objobjproc)set_contains,           /* sq_contains */
1993 };
1994 
1995 /* set object ********************************************************/
1996 
1997 #ifdef Py_DEBUG
1998 static PyObject *test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored));
1999 
2000 PyDoc_STRVAR(test_c_api_doc, "Exercises C API.  Returns True.\n\
2001 All is well if assertions don't fail.");
2002 #endif
2003 
2004 static PyMethodDef set_methods[] = {
2005     {"add",             (PyCFunction)set_add,           METH_O,
2006      add_doc},
2007     {"clear",           (PyCFunction)set_clear,         METH_NOARGS,
2008      clear_doc},
2009     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2010      contains_doc},
2011     {"copy",            (PyCFunction)set_copy,          METH_NOARGS,
2012      copy_doc},
2013     {"discard",         (PyCFunction)set_discard,       METH_O,
2014      discard_doc},
2015     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2016      difference_doc},
2017     {"difference_update",       (PyCFunction)set_difference_update,     METH_VARARGS,
2018      difference_update_doc},
2019     {"intersection",(PyCFunction)set_intersection_multi,        METH_VARARGS,
2020      intersection_doc},
2021     {"intersection_update",(PyCFunction)set_intersection_update_multi,          METH_VARARGS,
2022      intersection_update_doc},
2023     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2024      isdisjoint_doc},
2025     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2026      issubset_doc},
2027     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2028      issuperset_doc},
2029     {"pop",             (PyCFunction)set_pop,           METH_NOARGS,
2030      pop_doc},
2031     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2032      reduce_doc},
2033     {"remove",          (PyCFunction)set_remove,        METH_O,
2034      remove_doc},
2035     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2036      sizeof_doc},
2037     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2038      symmetric_difference_doc},
2039     {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update,        METH_O,
2040      symmetric_difference_update_doc},
2041 #ifdef Py_DEBUG
2042     {"test_c_api",      (PyCFunction)test_c_api,        METH_NOARGS,
2043      test_c_api_doc},
2044 #endif
2045     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2046      union_doc},
2047     {"update",          (PyCFunction)set_update,        METH_VARARGS,
2048      update_doc},
2049     {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2050     {NULL,              NULL}   /* sentinel */
2051 };
2052 
2053 static PyNumberMethods set_as_number = {
2054     0,                                  /*nb_add*/
2055     (binaryfunc)set_sub,                /*nb_subtract*/
2056     0,                                  /*nb_multiply*/
2057     0,                                  /*nb_remainder*/
2058     0,                                  /*nb_divmod*/
2059     0,                                  /*nb_power*/
2060     0,                                  /*nb_negative*/
2061     0,                                  /*nb_positive*/
2062     0,                                  /*nb_absolute*/
2063     0,                                  /*nb_bool*/
2064     0,                                  /*nb_invert*/
2065     0,                                  /*nb_lshift*/
2066     0,                                  /*nb_rshift*/
2067     (binaryfunc)set_and,                /*nb_and*/
2068     (binaryfunc)set_xor,                /*nb_xor*/
2069     (binaryfunc)set_or,                 /*nb_or*/
2070     0,                                  /*nb_int*/
2071     0,                                  /*nb_reserved*/
2072     0,                                  /*nb_float*/
2073     0,                                  /*nb_inplace_add*/
2074     (binaryfunc)set_isub,               /*nb_inplace_subtract*/
2075     0,                                  /*nb_inplace_multiply*/
2076     0,                                  /*nb_inplace_remainder*/
2077     0,                                  /*nb_inplace_power*/
2078     0,                                  /*nb_inplace_lshift*/
2079     0,                                  /*nb_inplace_rshift*/
2080     (binaryfunc)set_iand,               /*nb_inplace_and*/
2081     (binaryfunc)set_ixor,               /*nb_inplace_xor*/
2082     (binaryfunc)set_ior,                /*nb_inplace_or*/
2083 };
2084 
2085 PyDoc_STRVAR(set_doc,
2086 "set() -> new empty set object\n\
2087 set(iterable) -> new set object\n\
2088 \n\
2089 Build an unordered collection of unique elements.");
2090 
2091 PyTypeObject PySet_Type = {
2092     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2093     "set",                              /* tp_name */
2094     sizeof(PySetObject),                /* tp_basicsize */
2095     0,                                  /* tp_itemsize */
2096     /* methods */
2097     (destructor)set_dealloc,            /* tp_dealloc */
2098     0,                                  /* tp_vectorcall_offset */
2099     0,                                  /* tp_getattr */
2100     0,                                  /* tp_setattr */
2101     0,                                  /* tp_as_async */
2102     (reprfunc)set_repr,                 /* tp_repr */
2103     &set_as_number,                     /* tp_as_number */
2104     &set_as_sequence,                   /* tp_as_sequence */
2105     0,                                  /* tp_as_mapping */
2106     PyObject_HashNotImplemented,        /* tp_hash */
2107     0,                                  /* tp_call */
2108     0,                                  /* tp_str */
2109     PyObject_GenericGetAttr,            /* tp_getattro */
2110     0,                                  /* tp_setattro */
2111     0,                                  /* tp_as_buffer */
2112     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2113         Py_TPFLAGS_BASETYPE,            /* tp_flags */
2114     set_doc,                            /* tp_doc */
2115     (traverseproc)set_traverse,         /* tp_traverse */
2116     (inquiry)set_clear_internal,        /* tp_clear */
2117     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2118     offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2119     (getiterfunc)set_iter,              /* tp_iter */
2120     0,                                  /* tp_iternext */
2121     set_methods,                        /* tp_methods */
2122     0,                                  /* tp_members */
2123     0,                                  /* tp_getset */
2124     0,                                  /* tp_base */
2125     0,                                  /* tp_dict */
2126     0,                                  /* tp_descr_get */
2127     0,                                  /* tp_descr_set */
2128     0,                                  /* tp_dictoffset */
2129     (initproc)set_init,                 /* tp_init */
2130     PyType_GenericAlloc,                /* tp_alloc */
2131     set_new,                            /* tp_new */
2132     PyObject_GC_Del,                    /* tp_free */
2133     .tp_vectorcall = set_vectorcall,
2134 };
2135 
2136 /* frozenset object ********************************************************/
2137 
2138 
2139 static PyMethodDef frozenset_methods[] = {
2140     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2141      contains_doc},
2142     {"copy",            (PyCFunction)frozenset_copy,    METH_NOARGS,
2143      copy_doc},
2144     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2145      difference_doc},
2146     {"intersection",    (PyCFunction)set_intersection_multi,    METH_VARARGS,
2147      intersection_doc},
2148     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2149      isdisjoint_doc},
2150     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2151      issubset_doc},
2152     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2153      issuperset_doc},
2154     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2155      reduce_doc},
2156     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2157      sizeof_doc},
2158     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2159      symmetric_difference_doc},
2160     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2161      union_doc},
2162     {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2163     {NULL,              NULL}   /* sentinel */
2164 };
2165 
2166 static PyNumberMethods frozenset_as_number = {
2167     0,                                  /*nb_add*/
2168     (binaryfunc)set_sub,                /*nb_subtract*/
2169     0,                                  /*nb_multiply*/
2170     0,                                  /*nb_remainder*/
2171     0,                                  /*nb_divmod*/
2172     0,                                  /*nb_power*/
2173     0,                                  /*nb_negative*/
2174     0,                                  /*nb_positive*/
2175     0,                                  /*nb_absolute*/
2176     0,                                  /*nb_bool*/
2177     0,                                  /*nb_invert*/
2178     0,                                  /*nb_lshift*/
2179     0,                                  /*nb_rshift*/
2180     (binaryfunc)set_and,                /*nb_and*/
2181     (binaryfunc)set_xor,                /*nb_xor*/
2182     (binaryfunc)set_or,                 /*nb_or*/
2183 };
2184 
2185 PyDoc_STRVAR(frozenset_doc,
2186 "frozenset() -> empty frozenset object\n\
2187 frozenset(iterable) -> frozenset object\n\
2188 \n\
2189 Build an immutable unordered collection of unique elements.");
2190 
2191 PyTypeObject PyFrozenSet_Type = {
2192     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2193     "frozenset",                        /* tp_name */
2194     sizeof(PySetObject),                /* tp_basicsize */
2195     0,                                  /* tp_itemsize */
2196     /* methods */
2197     (destructor)set_dealloc,            /* tp_dealloc */
2198     0,                                  /* tp_vectorcall_offset */
2199     0,                                  /* tp_getattr */
2200     0,                                  /* tp_setattr */
2201     0,                                  /* tp_as_async */
2202     (reprfunc)set_repr,                 /* tp_repr */
2203     &frozenset_as_number,               /* tp_as_number */
2204     &set_as_sequence,                   /* tp_as_sequence */
2205     0,                                  /* tp_as_mapping */
2206     frozenset_hash,                     /* tp_hash */
2207     0,                                  /* tp_call */
2208     0,                                  /* tp_str */
2209     PyObject_GenericGetAttr,            /* tp_getattro */
2210     0,                                  /* tp_setattro */
2211     0,                                  /* tp_as_buffer */
2212     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2213         Py_TPFLAGS_BASETYPE,            /* tp_flags */
2214     frozenset_doc,                      /* tp_doc */
2215     (traverseproc)set_traverse,         /* tp_traverse */
2216     (inquiry)set_clear_internal,        /* tp_clear */
2217     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2218     offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2219     (getiterfunc)set_iter,              /* tp_iter */
2220     0,                                  /* tp_iternext */
2221     frozenset_methods,                  /* tp_methods */
2222     0,                                  /* tp_members */
2223     0,                                  /* tp_getset */
2224     0,                                  /* tp_base */
2225     0,                                  /* tp_dict */
2226     0,                                  /* tp_descr_get */
2227     0,                                  /* tp_descr_set */
2228     0,                                  /* tp_dictoffset */
2229     0,                                  /* tp_init */
2230     PyType_GenericAlloc,                /* tp_alloc */
2231     frozenset_new,                      /* tp_new */
2232     PyObject_GC_Del,                    /* tp_free */
2233     .tp_vectorcall = frozenset_vectorcall,
2234 };
2235 
2236 
2237 /***** C API functions *************************************************/
2238 
2239 PyObject *
PySet_New(PyObject * iterable)2240 PySet_New(PyObject *iterable)
2241 {
2242     return make_new_set(&PySet_Type, iterable);
2243 }
2244 
2245 PyObject *
PyFrozenSet_New(PyObject * iterable)2246 PyFrozenSet_New(PyObject *iterable)
2247 {
2248     return make_new_set(&PyFrozenSet_Type, iterable);
2249 }
2250 
2251 Py_ssize_t
PySet_Size(PyObject * anyset)2252 PySet_Size(PyObject *anyset)
2253 {
2254     if (!PyAnySet_Check(anyset)) {
2255         PyErr_BadInternalCall();
2256         return -1;
2257     }
2258     return PySet_GET_SIZE(anyset);
2259 }
2260 
2261 int
PySet_Clear(PyObject * set)2262 PySet_Clear(PyObject *set)
2263 {
2264     if (!PySet_Check(set)) {
2265         PyErr_BadInternalCall();
2266         return -1;
2267     }
2268     return set_clear_internal((PySetObject *)set);
2269 }
2270 
2271 int
PySet_Contains(PyObject * anyset,PyObject * key)2272 PySet_Contains(PyObject *anyset, PyObject *key)
2273 {
2274     if (!PyAnySet_Check(anyset)) {
2275         PyErr_BadInternalCall();
2276         return -1;
2277     }
2278     return set_contains_key((PySetObject *)anyset, key);
2279 }
2280 
2281 int
PySet_Discard(PyObject * set,PyObject * key)2282 PySet_Discard(PyObject *set, PyObject *key)
2283 {
2284     if (!PySet_Check(set)) {
2285         PyErr_BadInternalCall();
2286         return -1;
2287     }
2288     return set_discard_key((PySetObject *)set, key);
2289 }
2290 
2291 int
PySet_Add(PyObject * anyset,PyObject * key)2292 PySet_Add(PyObject *anyset, PyObject *key)
2293 {
2294     if (!PySet_Check(anyset) &&
2295         (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2296         PyErr_BadInternalCall();
2297         return -1;
2298     }
2299     return set_add_key((PySetObject *)anyset, key);
2300 }
2301 
2302 void
_PySet_Fini(void)2303 _PySet_Fini(void)
2304 {
2305     Py_CLEAR(emptyfrozenset);
2306 }
2307 
2308 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2309 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2310 {
2311     setentry *entry;
2312 
2313     if (!PyAnySet_Check(set)) {
2314         PyErr_BadInternalCall();
2315         return -1;
2316     }
2317     if (set_next((PySetObject *)set, pos, &entry) == 0)
2318         return 0;
2319     *key = entry->key;
2320     *hash = entry->hash;
2321     return 1;
2322 }
2323 
2324 PyObject *
PySet_Pop(PyObject * set)2325 PySet_Pop(PyObject *set)
2326 {
2327     if (!PySet_Check(set)) {
2328         PyErr_BadInternalCall();
2329         return NULL;
2330     }
2331     return set_pop((PySetObject *)set, NULL);
2332 }
2333 
2334 int
_PySet_Update(PyObject * set,PyObject * iterable)2335 _PySet_Update(PyObject *set, PyObject *iterable)
2336 {
2337     if (!PySet_Check(set)) {
2338         PyErr_BadInternalCall();
2339         return -1;
2340     }
2341     return set_update_internal((PySetObject *)set, iterable);
2342 }
2343 
2344 /* Exported for the gdb plugin's benefit. */
2345 PyObject *_PySet_Dummy = dummy;
2346 
2347 #ifdef Py_DEBUG
2348 
2349 /* Test code to be called with any three element set.
2350    Returns True and original set is restored. */
2351 
2352 #define assertRaises(call_return_value, exception)              \
2353     do {                                                        \
2354         assert(call_return_value);                              \
2355         assert(PyErr_ExceptionMatches(exception));              \
2356         PyErr_Clear();                                          \
2357     } while(0)
2358 
2359 static PyObject *
test_c_api(PySetObject * so,PyObject * Py_UNUSED (ignored))2360 test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored))
2361 {
2362     Py_ssize_t count;
2363     const char *s;
2364     Py_ssize_t i;
2365     PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2366     PyObject *ob = (PyObject *)so;
2367     Py_hash_t hash;
2368     PyObject *str;
2369 
2370     /* Verify preconditions */
2371     assert(PyAnySet_Check(ob));
2372     assert(PyAnySet_CheckExact(ob));
2373     assert(!PyFrozenSet_CheckExact(ob));
2374 
2375     /* so.clear(); so |= set("abc"); */
2376     str = PyUnicode_FromString("abc");
2377     if (str == NULL)
2378         return NULL;
2379     set_clear_internal(so);
2380     if (set_update_internal(so, str)) {
2381         Py_DECREF(str);
2382         return NULL;
2383     }
2384     Py_DECREF(str);
2385 
2386     /* Exercise type/size checks */
2387     assert(PySet_Size(ob) == 3);
2388     assert(PySet_GET_SIZE(ob) == 3);
2389 
2390     /* Raise TypeError for non-iterable constructor arguments */
2391     assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2392     assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2393 
2394     /* Raise TypeError for unhashable key */
2395     dup = PySet_New(ob);
2396     assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2397     assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2398     assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2399 
2400     /* Exercise successful pop, contains, add, and discard */
2401     elem = PySet_Pop(ob);
2402     assert(PySet_Contains(ob, elem) == 0);
2403     assert(PySet_GET_SIZE(ob) == 2);
2404     assert(PySet_Add(ob, elem) == 0);
2405     assert(PySet_Contains(ob, elem) == 1);
2406     assert(PySet_GET_SIZE(ob) == 3);
2407     assert(PySet_Discard(ob, elem) == 1);
2408     assert(PySet_GET_SIZE(ob) == 2);
2409     assert(PySet_Discard(ob, elem) == 0);
2410     assert(PySet_GET_SIZE(ob) == 2);
2411 
2412     /* Exercise clear */
2413     dup2 = PySet_New(dup);
2414     assert(PySet_Clear(dup2) == 0);
2415     assert(PySet_Size(dup2) == 0);
2416     Py_DECREF(dup2);
2417 
2418     /* Raise SystemError on clear or update of frozen set */
2419     f = PyFrozenSet_New(dup);
2420     assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2421     assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2422     assert(PySet_Add(f, elem) == 0);
2423     Py_INCREF(f);
2424     assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2425     Py_DECREF(f);
2426     Py_DECREF(f);
2427 
2428     /* Exercise direct iteration */
2429     i = 0, count = 0;
2430     while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2431         s = PyUnicode_AsUTF8(x);
2432         assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2433         count++;
2434     }
2435     assert(count == 3);
2436 
2437     /* Exercise updates */
2438     dup2 = PySet_New(NULL);
2439     assert(_PySet_Update(dup2, dup) == 0);
2440     assert(PySet_Size(dup2) == 3);
2441     assert(_PySet_Update(dup2, dup) == 0);
2442     assert(PySet_Size(dup2) == 3);
2443     Py_DECREF(dup2);
2444 
2445     /* Raise SystemError when self argument is not a set or frozenset. */
2446     t = PyTuple_New(0);
2447     assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2448     assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2449     Py_DECREF(t);
2450 
2451     /* Raise SystemError when self argument is not a set. */
2452     f = PyFrozenSet_New(dup);
2453     assert(PySet_Size(f) == 3);
2454     assert(PyFrozenSet_CheckExact(f));
2455     assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2456     assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2457     Py_DECREF(f);
2458 
2459     /* Raise KeyError when popping from an empty set */
2460     assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2461     Py_DECREF(ob);
2462     assert(PySet_GET_SIZE(ob) == 0);
2463     assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2464 
2465     /* Restore the set from the copy using the PyNumber API */
2466     assert(PyNumber_InPlaceOr(ob, dup) == ob);
2467     Py_DECREF(ob);
2468 
2469     /* Verify constructors accept NULL arguments */
2470     f = PySet_New(NULL);
2471     assert(f != NULL);
2472     assert(PySet_GET_SIZE(f) == 0);
2473     Py_DECREF(f);
2474     f = PyFrozenSet_New(NULL);
2475     assert(f != NULL);
2476     assert(PyFrozenSet_CheckExact(f));
2477     assert(PySet_GET_SIZE(f) == 0);
2478     Py_DECREF(f);
2479 
2480     Py_DECREF(elem);
2481     Py_DECREF(dup);
2482     Py_RETURN_TRUE;
2483 }
2484 
2485 #undef assertRaises
2486 
2487 #endif
2488 
2489 /***** Dummy Struct  *************************************************/
2490 
2491 static PyObject *
dummy_repr(PyObject * op)2492 dummy_repr(PyObject *op)
2493 {
2494     return PyUnicode_FromString("<dummy key>");
2495 }
2496 
2497 static void _Py_NO_RETURN
dummy_dealloc(PyObject * ignore)2498 dummy_dealloc(PyObject* ignore)
2499 {
2500     Py_FatalError("deallocating <dummy key>");
2501 }
2502 
2503 static PyTypeObject _PySetDummy_Type = {
2504     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2505     "<dummy key> type",
2506     0,
2507     0,
2508     dummy_dealloc,      /*tp_dealloc*/ /*never called*/
2509     0,                  /*tp_vectorcall_offset*/
2510     0,                  /*tp_getattr*/
2511     0,                  /*tp_setattr*/
2512     0,                  /*tp_as_async*/
2513     dummy_repr,         /*tp_repr*/
2514     0,                  /*tp_as_number*/
2515     0,                  /*tp_as_sequence*/
2516     0,                  /*tp_as_mapping*/
2517     0,                  /*tp_hash */
2518     0,                  /*tp_call */
2519     0,                  /*tp_str */
2520     0,                  /*tp_getattro */
2521     0,                  /*tp_setattro */
2522     0,                  /*tp_as_buffer */
2523     Py_TPFLAGS_DEFAULT, /*tp_flags */
2524 };
2525 
2526 static PyObject _dummy_struct = {
2527   _PyObject_EXTRA_INIT
2528   2, &_PySetDummy_Type
2529 };
2530 
2531