• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* set object implementation
3 
4    Written and maintained by Raymond D. Hettinger <python@rcn.com>
5    Derived from Lib/sets.py and Objects/dictobject.c.
6 
7    The basic lookup function used by all operations.
8    This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9 
10    The initial probe index is computed as hash mod the table size.
11    Subsequent probe indices are computed as explained in Objects/dictobject.c.
12 
13    To improve cache locality, each probe inspects a series of consecutive
14    nearby entries before moving on to probes elsewhere in memory.  This leaves
15    us with a hybrid of linear probing and randomized probing.  The linear probing
16    reduces the cost of hash collisions because consecutive memory accesses
17    tend to be much cheaper than scattered probes.  After LINEAR_PROBES steps,
18    we then use more of the upper bits from the hash value and apply a simple
19    linear congruential random number generator.  This helps break-up long
20    chains of collisions.
21 
22    All arithmetic on hash should ignore overflow.
23 
24    Unlike the dictionary implementation, the lookkey function can return
25    NULL if the rich comparison returns an error.
26 
27    Use cases for sets differ considerably from dictionaries where looked-up
28    keys are more likely to be present.  In contrast, sets are primarily
29    about membership testing where the presence of an element is not known in
30    advance.  Accordingly, the set implementation needs to optimize for both
31    the found and not-found case.
32 */
33 
34 #include "Python.h"
35 #include "pycore_object.h"        // _PyObject_GC_UNTRACK()
36 #include <stddef.h>               // offsetof()
37 
38 /* Object used as dummy key to fill deleted entries */
39 static PyObject _dummy_struct;
40 
41 #define dummy (&_dummy_struct)
42 
43 
44 /* ======================================================================== */
45 /* ======= Begin logic for probing the hash table ========================= */
46 
47 /* Set this to zero to turn-off linear probing */
48 #ifndef LINEAR_PROBES
49 #define LINEAR_PROBES 9
50 #endif
51 
52 /* This must be >= 1 */
53 #define PERTURB_SHIFT 5
54 
55 static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)56 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
57 {
58     setentry *table;
59     setentry *entry;
60     size_t perturb = hash;
61     size_t mask = so->mask;
62     size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
63     int probes;
64     int cmp;
65 
66     while (1) {
67         entry = &so->table[i];
68         probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
69         do {
70             if (entry->hash == 0 && entry->key == NULL)
71                 return entry;
72             if (entry->hash == hash) {
73                 PyObject *startkey = entry->key;
74                 assert(startkey != dummy);
75                 if (startkey == key)
76                     return entry;
77                 if (PyUnicode_CheckExact(startkey)
78                     && PyUnicode_CheckExact(key)
79                     && _PyUnicode_EQ(startkey, key))
80                     return entry;
81                 table = so->table;
82                 Py_INCREF(startkey);
83                 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
84                 Py_DECREF(startkey);
85                 if (cmp < 0)
86                     return NULL;
87                 if (table != so->table || entry->key != startkey)
88                     return set_lookkey(so, key, hash);
89                 if (cmp > 0)
90                     return entry;
91                 mask = so->mask;
92             }
93             entry++;
94         } while (probes--);
95         perturb >>= PERTURB_SHIFT;
96         i = (i * 5 + 1 + perturb) & mask;
97     }
98 }
99 
100 static int set_table_resize(PySetObject *, Py_ssize_t);
101 
102 static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)103 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
104 {
105     setentry *table;
106     setentry *freeslot;
107     setentry *entry;
108     size_t perturb;
109     size_t mask;
110     size_t i;                       /* Unsigned for defined overflow behavior */
111     int probes;
112     int cmp;
113 
114     /* Pre-increment is necessary to prevent arbitrary code in the rich
115        comparison from deallocating the key just before the insertion. */
116     Py_INCREF(key);
117 
118   restart:
119 
120     mask = so->mask;
121     i = (size_t)hash & mask;
122     freeslot = NULL;
123     perturb = hash;
124 
125     while (1) {
126         entry = &so->table[i];
127         probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
128         do {
129             if (entry->hash == 0 && entry->key == NULL)
130                 goto found_unused_or_dummy;
131             if (entry->hash == hash) {
132                 PyObject *startkey = entry->key;
133                 assert(startkey != dummy);
134                 if (startkey == key)
135                     goto found_active;
136                 if (PyUnicode_CheckExact(startkey)
137                     && PyUnicode_CheckExact(key)
138                     && _PyUnicode_EQ(startkey, key))
139                     goto found_active;
140                 table = so->table;
141                 Py_INCREF(startkey);
142                 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
143                 Py_DECREF(startkey);
144                 if (cmp > 0)
145                     goto found_active;
146                 if (cmp < 0)
147                     goto comparison_error;
148                 if (table != so->table || entry->key != startkey)
149                     goto restart;
150                 mask = so->mask;
151             }
152             else if (entry->hash == -1) {
153                 assert (entry->key == dummy);
154                 freeslot = entry;
155             }
156             entry++;
157         } while (probes--);
158         perturb >>= PERTURB_SHIFT;
159         i = (i * 5 + 1 + perturb) & mask;
160     }
161 
162   found_unused_or_dummy:
163     if (freeslot == NULL)
164         goto found_unused;
165     so->used++;
166     freeslot->key = key;
167     freeslot->hash = hash;
168     return 0;
169 
170   found_unused:
171     so->fill++;
172     so->used++;
173     entry->key = key;
174     entry->hash = hash;
175     if ((size_t)so->fill*5 < mask*3)
176         return 0;
177     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
178 
179   found_active:
180     Py_DECREF(key);
181     return 0;
182 
183   comparison_error:
184     Py_DECREF(key);
185     return -1;
186 }
187 
188 /*
189 Internal routine used by set_table_resize() to insert an item which is
190 known to be absent from the set.  Besides the performance benefit,
191 there is also safety benefit since using set_add_entry() risks making
192 a callback in the middle of a set_table_resize(), see issue 1456209.
193 The caller is responsible for updating the key's reference count and
194 the setobject's fill and used fields.
195 */
196 static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)197 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
198 {
199     setentry *entry;
200     size_t perturb = hash;
201     size_t i = (size_t)hash & mask;
202     size_t j;
203 
204     while (1) {
205         entry = &table[i];
206         if (entry->key == NULL)
207             goto found_null;
208         if (i + LINEAR_PROBES <= mask) {
209             for (j = 0; j < LINEAR_PROBES; j++) {
210                 entry++;
211                 if (entry->key == NULL)
212                     goto found_null;
213             }
214         }
215         perturb >>= PERTURB_SHIFT;
216         i = (i * 5 + 1 + perturb) & mask;
217     }
218   found_null:
219     entry->key = key;
220     entry->hash = hash;
221 }
222 
223 /* ======== End logic for probing the hash table ========================== */
224 /* ======================================================================== */
225 
226 /*
227 Restructure the table by allocating a new table and reinserting all
228 keys again.  When entries have been deleted, the new table may
229 actually be smaller than the old one.
230 */
231 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)232 set_table_resize(PySetObject *so, Py_ssize_t minused)
233 {
234     setentry *oldtable, *newtable, *entry;
235     Py_ssize_t oldmask = so->mask;
236     size_t newmask;
237     int is_oldtable_malloced;
238     setentry small_copy[PySet_MINSIZE];
239 
240     assert(minused >= 0);
241 
242     /* Find the smallest table size > minused. */
243     /* XXX speed-up with intrinsics */
244     size_t newsize = PySet_MINSIZE;
245     while (newsize <= (size_t)minused) {
246         newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
247     }
248 
249     /* Get space for a new table. */
250     oldtable = so->table;
251     assert(oldtable != NULL);
252     is_oldtable_malloced = oldtable != so->smalltable;
253 
254     if (newsize == PySet_MINSIZE) {
255         /* A large table is shrinking, or we can't get any smaller. */
256         newtable = so->smalltable;
257         if (newtable == oldtable) {
258             if (so->fill == so->used) {
259                 /* No dummies, so no point doing anything. */
260                 return 0;
261             }
262             /* We're not going to resize it, but rebuild the
263                table anyway to purge old dummy entries.
264                Subtle:  This is *necessary* if fill==size,
265                as set_lookkey needs at least one virgin slot to
266                terminate failing searches.  If fill < size, it's
267                merely desirable, as dummies slow searches. */
268             assert(so->fill > so->used);
269             memcpy(small_copy, oldtable, sizeof(small_copy));
270             oldtable = small_copy;
271         }
272     }
273     else {
274         newtable = PyMem_NEW(setentry, newsize);
275         if (newtable == NULL) {
276             PyErr_NoMemory();
277             return -1;
278         }
279     }
280 
281     /* Make the set empty, using the new table. */
282     assert(newtable != oldtable);
283     memset(newtable, 0, sizeof(setentry) * newsize);
284     so->mask = newsize - 1;
285     so->table = newtable;
286 
287     /* Copy the data over; this is refcount-neutral for active entries;
288        dummy entries aren't copied over, of course */
289     newmask = (size_t)so->mask;
290     if (so->fill == so->used) {
291         for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
292             if (entry->key != NULL) {
293                 set_insert_clean(newtable, newmask, entry->key, entry->hash);
294             }
295         }
296     } else {
297         so->fill = so->used;
298         for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
299             if (entry->key != NULL && entry->key != dummy) {
300                 set_insert_clean(newtable, newmask, entry->key, entry->hash);
301             }
302         }
303     }
304 
305     if (is_oldtable_malloced)
306         PyMem_Free(oldtable);
307     return 0;
308 }
309 
310 static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)311 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
312 {
313     setentry *entry;
314 
315     entry = set_lookkey(so, key, hash);
316     if (entry != NULL)
317         return entry->key != NULL;
318     return -1;
319 }
320 
321 #define DISCARD_NOTFOUND 0
322 #define DISCARD_FOUND 1
323 
324 static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)325 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
326 {
327     setentry *entry;
328     PyObject *old_key;
329 
330     entry = set_lookkey(so, key, hash);
331     if (entry == NULL)
332         return -1;
333     if (entry->key == NULL)
334         return DISCARD_NOTFOUND;
335     old_key = entry->key;
336     entry->key = dummy;
337     entry->hash = -1;
338     so->used--;
339     Py_DECREF(old_key);
340     return DISCARD_FOUND;
341 }
342 
343 static int
set_add_key(PySetObject * so,PyObject * key)344 set_add_key(PySetObject *so, PyObject *key)
345 {
346     Py_hash_t hash;
347 
348     if (!PyUnicode_CheckExact(key) ||
349         (hash = ((PyASCIIObject *) key)->hash) == -1) {
350         hash = PyObject_Hash(key);
351         if (hash == -1)
352             return -1;
353     }
354     return set_add_entry(so, key, hash);
355 }
356 
357 static int
set_contains_key(PySetObject * so,PyObject * key)358 set_contains_key(PySetObject *so, PyObject *key)
359 {
360     Py_hash_t hash;
361 
362     if (!PyUnicode_CheckExact(key) ||
363         (hash = ((PyASCIIObject *) key)->hash) == -1) {
364         hash = PyObject_Hash(key);
365         if (hash == -1)
366             return -1;
367     }
368     return set_contains_entry(so, key, hash);
369 }
370 
371 static int
set_discard_key(PySetObject * so,PyObject * key)372 set_discard_key(PySetObject *so, PyObject *key)
373 {
374     Py_hash_t hash;
375 
376     if (!PyUnicode_CheckExact(key) ||
377         (hash = ((PyASCIIObject *) key)->hash) == -1) {
378         hash = PyObject_Hash(key);
379         if (hash == -1)
380             return -1;
381     }
382     return set_discard_entry(so, key, hash);
383 }
384 
385 static void
set_empty_to_minsize(PySetObject * so)386 set_empty_to_minsize(PySetObject *so)
387 {
388     memset(so->smalltable, 0, sizeof(so->smalltable));
389     so->fill = 0;
390     so->used = 0;
391     so->mask = PySet_MINSIZE - 1;
392     so->table = so->smalltable;
393     so->hash = -1;
394 }
395 
396 static int
set_clear_internal(PySetObject * so)397 set_clear_internal(PySetObject *so)
398 {
399     setentry *entry;
400     setentry *table = so->table;
401     Py_ssize_t fill = so->fill;
402     Py_ssize_t used = so->used;
403     int table_is_malloced = table != so->smalltable;
404     setentry small_copy[PySet_MINSIZE];
405 
406     assert (PyAnySet_Check(so));
407     assert(table != NULL);
408 
409     /* This is delicate.  During the process of clearing the set,
410      * decrefs can cause the set to mutate.  To avoid fatal confusion
411      * (voice of experience), we have to make the set empty before
412      * clearing the slots, and never refer to anything via so->ref while
413      * clearing.
414      */
415     if (table_is_malloced)
416         set_empty_to_minsize(so);
417 
418     else if (fill > 0) {
419         /* It's a small table with something that needs to be cleared.
420          * Afraid the only safe way is to copy the set entries into
421          * another small table first.
422          */
423         memcpy(small_copy, table, sizeof(small_copy));
424         table = small_copy;
425         set_empty_to_minsize(so);
426     }
427     /* else it's a small table that's already empty */
428 
429     /* Now we can finally clear things.  If C had refcounts, we could
430      * assert that the refcount on table is 1 now, i.e. that this function
431      * has unique access to it, so decref side-effects can't alter it.
432      */
433     for (entry = table; used > 0; entry++) {
434         if (entry->key && entry->key != dummy) {
435             used--;
436             Py_DECREF(entry->key);
437         }
438     }
439 
440     if (table_is_malloced)
441         PyMem_Free(table);
442     return 0;
443 }
444 
445 /*
446  * Iterate over a set table.  Use like so:
447  *
448  *     Py_ssize_t pos;
449  *     setentry *entry;
450  *     pos = 0;   # important!  pos should not otherwise be changed by you
451  *     while (set_next(yourset, &pos, &entry)) {
452  *              Refer to borrowed reference in entry->key.
453  *     }
454  *
455  * CAUTION:  In general, it isn't safe to use set_next in a loop that
456  * mutates the table.
457  */
458 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)459 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
460 {
461     Py_ssize_t i;
462     Py_ssize_t mask;
463     setentry *entry;
464 
465     assert (PyAnySet_Check(so));
466     i = *pos_ptr;
467     assert(i >= 0);
468     mask = so->mask;
469     entry = &so->table[i];
470     while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
471         i++;
472         entry++;
473     }
474     *pos_ptr = i+1;
475     if (i > mask)
476         return 0;
477     assert(entry != NULL);
478     *entry_ptr = entry;
479     return 1;
480 }
481 
482 static void
set_dealloc(PySetObject * so)483 set_dealloc(PySetObject *so)
484 {
485     setentry *entry;
486     Py_ssize_t used = so->used;
487 
488     /* bpo-31095: UnTrack is needed before calling any callbacks */
489     PyObject_GC_UnTrack(so);
490     Py_TRASHCAN_BEGIN(so, set_dealloc)
491     if (so->weakreflist != NULL)
492         PyObject_ClearWeakRefs((PyObject *) so);
493 
494     for (entry = so->table; used > 0; entry++) {
495         if (entry->key && entry->key != dummy) {
496                 used--;
497                 Py_DECREF(entry->key);
498         }
499     }
500     if (so->table != so->smalltable)
501         PyMem_Free(so->table);
502     Py_TYPE(so)->tp_free(so);
503     Py_TRASHCAN_END
504 }
505 
506 static PyObject *
set_repr(PySetObject * so)507 set_repr(PySetObject *so)
508 {
509     PyObject *result=NULL, *keys, *listrepr, *tmp;
510     int status = Py_ReprEnter((PyObject*)so);
511 
512     if (status != 0) {
513         if (status < 0)
514             return NULL;
515         return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
516     }
517 
518     /* shortcut for the empty set */
519     if (!so->used) {
520         Py_ReprLeave((PyObject*)so);
521         return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
522     }
523 
524     keys = PySequence_List((PyObject *)so);
525     if (keys == NULL)
526         goto done;
527 
528     /* repr(keys)[1:-1] */
529     listrepr = PyObject_Repr(keys);
530     Py_DECREF(keys);
531     if (listrepr == NULL)
532         goto done;
533     tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
534     Py_DECREF(listrepr);
535     if (tmp == NULL)
536         goto done;
537     listrepr = tmp;
538 
539     if (!PySet_CheckExact(so))
540         result = PyUnicode_FromFormat("%s({%U})",
541                                       Py_TYPE(so)->tp_name,
542                                       listrepr);
543     else
544         result = PyUnicode_FromFormat("{%U}", listrepr);
545     Py_DECREF(listrepr);
546 done:
547     Py_ReprLeave((PyObject*)so);
548     return result;
549 }
550 
551 static Py_ssize_t
set_len(PyObject * so)552 set_len(PyObject *so)
553 {
554     return ((PySetObject *)so)->used;
555 }
556 
557 static int
set_merge(PySetObject * so,PyObject * otherset)558 set_merge(PySetObject *so, PyObject *otherset)
559 {
560     PySetObject *other;
561     PyObject *key;
562     Py_ssize_t i;
563     setentry *so_entry;
564     setentry *other_entry;
565 
566     assert (PyAnySet_Check(so));
567     assert (PyAnySet_Check(otherset));
568 
569     other = (PySetObject*)otherset;
570     if (other == so || other->used == 0)
571         /* a.update(a) or a.update(set()); nothing to do */
572         return 0;
573     /* Do one big resize at the start, rather than
574      * incrementally resizing as we insert new keys.  Expect
575      * that there will be no (or few) overlapping keys.
576      */
577     if ((so->fill + other->used)*5 >= so->mask*3) {
578         if (set_table_resize(so, (so->used + other->used)*2) != 0)
579             return -1;
580     }
581     so_entry = so->table;
582     other_entry = other->table;
583 
584     /* If our table is empty, and both tables have the same size, and
585        there are no dummies to eliminate, then just copy the pointers. */
586     if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
587         for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
588             key = other_entry->key;
589             if (key != NULL) {
590                 assert(so_entry->key == NULL);
591                 Py_INCREF(key);
592                 so_entry->key = key;
593                 so_entry->hash = other_entry->hash;
594             }
595         }
596         so->fill = other->fill;
597         so->used = other->used;
598         return 0;
599     }
600 
601     /* If our table is empty, we can use set_insert_clean() */
602     if (so->fill == 0) {
603         setentry *newtable = so->table;
604         size_t newmask = (size_t)so->mask;
605         so->fill = other->used;
606         so->used = other->used;
607         for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
608             key = other_entry->key;
609             if (key != NULL && key != dummy) {
610                 Py_INCREF(key);
611                 set_insert_clean(newtable, newmask, key, other_entry->hash);
612             }
613         }
614         return 0;
615     }
616 
617     /* We can't assure there are no duplicates, so do normal insertions */
618     for (i = 0; i <= other->mask; i++) {
619         other_entry = &other->table[i];
620         key = other_entry->key;
621         if (key != NULL && key != dummy) {
622             if (set_add_entry(so, key, other_entry->hash))
623                 return -1;
624         }
625     }
626     return 0;
627 }
628 
629 static PyObject *
set_pop(PySetObject * so,PyObject * Py_UNUSED (ignored))630 set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored))
631 {
632     /* Make sure the search finger is in bounds */
633     setentry *entry = so->table + (so->finger & so->mask);
634     setentry *limit = so->table + so->mask;
635     PyObject *key;
636 
637     if (so->used == 0) {
638         PyErr_SetString(PyExc_KeyError, "pop from an empty set");
639         return NULL;
640     }
641     while (entry->key == NULL || entry->key==dummy) {
642         entry++;
643         if (entry > limit)
644             entry = so->table;
645     }
646     key = entry->key;
647     entry->key = dummy;
648     entry->hash = -1;
649     so->used--;
650     so->finger = entry - so->table + 1;   /* next place to start */
651     return key;
652 }
653 
654 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
655 Raises KeyError if the set is empty.");
656 
657 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)658 set_traverse(PySetObject *so, visitproc visit, void *arg)
659 {
660     Py_ssize_t pos = 0;
661     setentry *entry;
662 
663     while (set_next(so, &pos, &entry))
664         Py_VISIT(entry->key);
665     return 0;
666 }
667 
668 /* Work to increase the bit dispersion for closely spaced hash values.
669    This is important because some use cases have many combinations of a
670    small number of elements with nearby hashes so that many distinct
671    combinations collapse to only a handful of distinct hash values. */
672 
673 static Py_uhash_t
_shuffle_bits(Py_uhash_t h)674 _shuffle_bits(Py_uhash_t h)
675 {
676     return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
677 }
678 
679 /* Most of the constants in this hash algorithm are randomly chosen
680    large primes with "interesting bit patterns" and that passed tests
681    for good collision statistics on a variety of problematic datasets
682    including powersets and graph structures (such as David Eppstein's
683    graph recipes in Lib/test/test_set.py) */
684 
685 static Py_hash_t
frozenset_hash(PyObject * self)686 frozenset_hash(PyObject *self)
687 {
688     PySetObject *so = (PySetObject *)self;
689     Py_uhash_t hash = 0;
690     setentry *entry;
691 
692     if (so->hash != -1)
693         return so->hash;
694 
695     /* Xor-in shuffled bits from every entry's hash field because xor is
696        commutative and a frozenset hash should be independent of order.
697 
698        For speed, include null entries and dummy entries and then
699        subtract out their effect afterwards so that the final hash
700        depends only on active entries.  This allows the code to be
701        vectorized by the compiler and it saves the unpredictable
702        branches that would arise when trying to exclude null and dummy
703        entries on every iteration. */
704 
705     for (entry = so->table; entry <= &so->table[so->mask]; entry++)
706         hash ^= _shuffle_bits(entry->hash);
707 
708     /* Remove the effect of an odd number of NULL entries */
709     if ((so->mask + 1 - so->fill) & 1)
710         hash ^= _shuffle_bits(0);
711 
712     /* Remove the effect of an odd number of dummy entries */
713     if ((so->fill - so->used) & 1)
714         hash ^= _shuffle_bits(-1);
715 
716     /* Factor in the number of active entries */
717     hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
718 
719     /* Disperse patterns arising in nested frozensets */
720     hash ^= (hash >> 11) ^ (hash >> 25);
721     hash = hash * 69069U + 907133923UL;
722 
723     /* -1 is reserved as an error code */
724     if (hash == (Py_uhash_t)-1)
725         hash = 590923713UL;
726 
727     so->hash = hash;
728     return hash;
729 }
730 
731 /***** Set iterator type ***********************************************/
732 
733 typedef struct {
734     PyObject_HEAD
735     PySetObject *si_set; /* Set to NULL when iterator is exhausted */
736     Py_ssize_t si_used;
737     Py_ssize_t si_pos;
738     Py_ssize_t len;
739 } setiterobject;
740 
741 static void
setiter_dealloc(setiterobject * si)742 setiter_dealloc(setiterobject *si)
743 {
744     /* bpo-31095: UnTrack is needed before calling any callbacks */
745     _PyObject_GC_UNTRACK(si);
746     Py_XDECREF(si->si_set);
747     PyObject_GC_Del(si);
748 }
749 
750 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)751 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
752 {
753     Py_VISIT(si->si_set);
754     return 0;
755 }
756 
757 static PyObject *
setiter_len(setiterobject * si,PyObject * Py_UNUSED (ignored))758 setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored))
759 {
760     Py_ssize_t len = 0;
761     if (si->si_set != NULL && si->si_used == si->si_set->used)
762         len = si->len;
763     return PyLong_FromSsize_t(len);
764 }
765 
766 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
767 
768 static PyObject *setiter_iternext(setiterobject *si);
769 
770 static PyObject *
setiter_reduce(setiterobject * si,PyObject * Py_UNUSED (ignored))771 setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored))
772 {
773     _Py_IDENTIFIER(iter);
774     /* copy the iterator state */
775     setiterobject tmp = *si;
776     Py_XINCREF(tmp.si_set);
777 
778     /* iterate the temporary into a list */
779     PyObject *list = PySequence_List((PyObject*)&tmp);
780     Py_XDECREF(tmp.si_set);
781     if (list == NULL) {
782         return NULL;
783     }
784     return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), list);
785 }
786 
787 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
788 
789 static PyMethodDef setiter_methods[] = {
790     {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
791     {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
792     {NULL,              NULL}           /* sentinel */
793 };
794 
setiter_iternext(setiterobject * si)795 static PyObject *setiter_iternext(setiterobject *si)
796 {
797     PyObject *key;
798     Py_ssize_t i, mask;
799     setentry *entry;
800     PySetObject *so = si->si_set;
801 
802     if (so == NULL)
803         return NULL;
804     assert (PyAnySet_Check(so));
805 
806     if (si->si_used != so->used) {
807         PyErr_SetString(PyExc_RuntimeError,
808                         "Set changed size during iteration");
809         si->si_used = -1; /* Make this state sticky */
810         return NULL;
811     }
812 
813     i = si->si_pos;
814     assert(i>=0);
815     entry = so->table;
816     mask = so->mask;
817     while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
818         i++;
819     si->si_pos = i+1;
820     if (i > mask)
821         goto fail;
822     si->len--;
823     key = entry[i].key;
824     Py_INCREF(key);
825     return key;
826 
827 fail:
828     si->si_set = NULL;
829     Py_DECREF(so);
830     return NULL;
831 }
832 
833 PyTypeObject PySetIter_Type = {
834     PyVarObject_HEAD_INIT(&PyType_Type, 0)
835     "set_iterator",                             /* tp_name */
836     sizeof(setiterobject),                      /* tp_basicsize */
837     0,                                          /* tp_itemsize */
838     /* methods */
839     (destructor)setiter_dealloc,                /* tp_dealloc */
840     0,                                          /* tp_vectorcall_offset */
841     0,                                          /* tp_getattr */
842     0,                                          /* tp_setattr */
843     0,                                          /* tp_as_async */
844     0,                                          /* tp_repr */
845     0,                                          /* tp_as_number */
846     0,                                          /* tp_as_sequence */
847     0,                                          /* tp_as_mapping */
848     0,                                          /* tp_hash */
849     0,                                          /* tp_call */
850     0,                                          /* tp_str */
851     PyObject_GenericGetAttr,                    /* tp_getattro */
852     0,                                          /* tp_setattro */
853     0,                                          /* tp_as_buffer */
854     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
855     0,                                          /* tp_doc */
856     (traverseproc)setiter_traverse,             /* tp_traverse */
857     0,                                          /* tp_clear */
858     0,                                          /* tp_richcompare */
859     0,                                          /* tp_weaklistoffset */
860     PyObject_SelfIter,                          /* tp_iter */
861     (iternextfunc)setiter_iternext,             /* tp_iternext */
862     setiter_methods,                            /* tp_methods */
863     0,
864 };
865 
866 static PyObject *
set_iter(PySetObject * so)867 set_iter(PySetObject *so)
868 {
869     setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
870     if (si == NULL)
871         return NULL;
872     Py_INCREF(so);
873     si->si_set = so;
874     si->si_used = so->used;
875     si->si_pos = 0;
876     si->len = so->used;
877     _PyObject_GC_TRACK(si);
878     return (PyObject *)si;
879 }
880 
881 static int
set_update_internal(PySetObject * so,PyObject * other)882 set_update_internal(PySetObject *so, PyObject *other)
883 {
884     PyObject *key, *it;
885 
886     if (PyAnySet_Check(other))
887         return set_merge(so, other);
888 
889     if (PyDict_CheckExact(other)) {
890         PyObject *value;
891         Py_ssize_t pos = 0;
892         Py_hash_t hash;
893         Py_ssize_t dictsize = PyDict_GET_SIZE(other);
894 
895         /* Do one big resize at the start, rather than
896         * incrementally resizing as we insert new keys.  Expect
897         * that there will be no (or few) overlapping keys.
898         */
899         if (dictsize < 0)
900             return -1;
901         if ((so->fill + dictsize)*5 >= so->mask*3) {
902             if (set_table_resize(so, (so->used + dictsize)*2) != 0)
903                 return -1;
904         }
905         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
906             if (set_add_entry(so, key, hash))
907                 return -1;
908         }
909         return 0;
910     }
911 
912     it = PyObject_GetIter(other);
913     if (it == NULL)
914         return -1;
915 
916     while ((key = PyIter_Next(it)) != NULL) {
917         if (set_add_key(so, key)) {
918             Py_DECREF(it);
919             Py_DECREF(key);
920             return -1;
921         }
922         Py_DECREF(key);
923     }
924     Py_DECREF(it);
925     if (PyErr_Occurred())
926         return -1;
927     return 0;
928 }
929 
930 static PyObject *
set_update(PySetObject * so,PyObject * args)931 set_update(PySetObject *so, PyObject *args)
932 {
933     Py_ssize_t i;
934 
935     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
936         PyObject *other = PyTuple_GET_ITEM(args, i);
937         if (set_update_internal(so, other))
938             return NULL;
939     }
940     Py_RETURN_NONE;
941 }
942 
943 PyDoc_STRVAR(update_doc,
944 "Update a set with the union of itself and others.");
945 
946 /* XXX Todo:
947    If aligned memory allocations become available, make the
948    set object 64 byte aligned so that most of the fields
949    can be retrieved or updated in a single cache line.
950 */
951 
952 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)953 make_new_set(PyTypeObject *type, PyObject *iterable)
954 {
955     assert(PyType_Check(type));
956     PySetObject *so;
957 
958     so = (PySetObject *)type->tp_alloc(type, 0);
959     if (so == NULL)
960         return NULL;
961 
962     so->fill = 0;
963     so->used = 0;
964     so->mask = PySet_MINSIZE - 1;
965     so->table = so->smalltable;
966     so->hash = -1;
967     so->finger = 0;
968     so->weakreflist = NULL;
969 
970     if (iterable != NULL) {
971         if (set_update_internal(so, iterable)) {
972             Py_DECREF(so);
973             return NULL;
974         }
975     }
976 
977     return (PyObject *)so;
978 }
979 
980 static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)981 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
982 {
983     if (type != &PySet_Type && type != &PyFrozenSet_Type) {
984         if (PyType_IsSubtype(type, &PySet_Type))
985             type = &PySet_Type;
986         else
987             type = &PyFrozenSet_Type;
988     }
989     return make_new_set(type, iterable);
990 }
991 
992 static PyObject *
make_new_frozenset(PyTypeObject * type,PyObject * iterable)993 make_new_frozenset(PyTypeObject *type, PyObject *iterable)
994 {
995     if (type != &PyFrozenSet_Type) {
996         return make_new_set(type, iterable);
997     }
998 
999     if (iterable != NULL && PyFrozenSet_CheckExact(iterable)) {
1000         /* frozenset(f) is idempotent */
1001         Py_INCREF(iterable);
1002         return iterable;
1003     }
1004     return make_new_set((PyTypeObject *)type, iterable);
1005 }
1006 
1007 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1008 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1009 {
1010     PyObject *iterable = NULL;
1011 
1012     if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset", kwds)) {
1013         return NULL;
1014     }
1015 
1016     if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) {
1017         return NULL;
1018     }
1019 
1020     return make_new_frozenset(type, iterable);
1021 }
1022 
1023 static PyObject *
frozenset_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1024 frozenset_vectorcall(PyObject *type, PyObject * const*args,
1025                      size_t nargsf, PyObject *kwnames)
1026 {
1027     if (!_PyArg_NoKwnames("frozenset", kwnames)) {
1028         return NULL;
1029     }
1030 
1031     Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1032     if (!_PyArg_CheckPositional("frozenset", nargs, 0, 1)) {
1033         return NULL;
1034     }
1035 
1036     PyObject *iterable = (nargs ? args[0] : NULL);
1037     return make_new_frozenset((PyTypeObject *)type, iterable);
1038 }
1039 
1040 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1041 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1042 {
1043     return make_new_set(type, NULL);
1044 }
1045 
1046 /* set_swap_bodies() switches the contents of any two sets by moving their
1047    internal data pointers and, if needed, copying the internal smalltables.
1048    Semantically equivalent to:
1049 
1050      t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1051 
1052    The function always succeeds and it leaves both objects in a stable state.
1053    Useful for operations that update in-place (by allowing an intermediate
1054    result to be swapped into one of the original inputs).
1055 */
1056 
1057 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1058 set_swap_bodies(PySetObject *a, PySetObject *b)
1059 {
1060     Py_ssize_t t;
1061     setentry *u;
1062     setentry tab[PySet_MINSIZE];
1063     Py_hash_t h;
1064 
1065     t = a->fill;     a->fill   = b->fill;        b->fill  = t;
1066     t = a->used;     a->used   = b->used;        b->used  = t;
1067     t = a->mask;     a->mask   = b->mask;        b->mask  = t;
1068 
1069     u = a->table;
1070     if (a->table == a->smalltable)
1071         u = b->smalltable;
1072     a->table  = b->table;
1073     if (b->table == b->smalltable)
1074         a->table = a->smalltable;
1075     b->table = u;
1076 
1077     if (a->table == a->smalltable || b->table == b->smalltable) {
1078         memcpy(tab, a->smalltable, sizeof(tab));
1079         memcpy(a->smalltable, b->smalltable, sizeof(tab));
1080         memcpy(b->smalltable, tab, sizeof(tab));
1081     }
1082 
1083     if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type)  &&
1084         PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1085         h = a->hash;     a->hash = b->hash;  b->hash = h;
1086     } else {
1087         a->hash = -1;
1088         b->hash = -1;
1089     }
1090 }
1091 
1092 static PyObject *
set_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1093 set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1094 {
1095     return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1096 }
1097 
1098 static PyObject *
frozenset_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1099 frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1100 {
1101     if (PyFrozenSet_CheckExact(so)) {
1102         Py_INCREF(so);
1103         return (PyObject *)so;
1104     }
1105     return set_copy(so, NULL);
1106 }
1107 
1108 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1109 
1110 static PyObject *
set_clear(PySetObject * so,PyObject * Py_UNUSED (ignored))1111 set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored))
1112 {
1113     set_clear_internal(so);
1114     Py_RETURN_NONE;
1115 }
1116 
1117 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1118 
1119 static PyObject *
set_union(PySetObject * so,PyObject * args)1120 set_union(PySetObject *so, PyObject *args)
1121 {
1122     PySetObject *result;
1123     PyObject *other;
1124     Py_ssize_t i;
1125 
1126     result = (PySetObject *)set_copy(so, NULL);
1127     if (result == NULL)
1128         return NULL;
1129 
1130     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1131         other = PyTuple_GET_ITEM(args, i);
1132         if ((PyObject *)so == other)
1133             continue;
1134         if (set_update_internal(result, other)) {
1135             Py_DECREF(result);
1136             return NULL;
1137         }
1138     }
1139     return (PyObject *)result;
1140 }
1141 
1142 PyDoc_STRVAR(union_doc,
1143  "Return the union of sets as a new set.\n\
1144 \n\
1145 (i.e. all elements that are in either set.)");
1146 
1147 static PyObject *
set_or(PySetObject * so,PyObject * other)1148 set_or(PySetObject *so, PyObject *other)
1149 {
1150     PySetObject *result;
1151 
1152     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1153         Py_RETURN_NOTIMPLEMENTED;
1154 
1155     result = (PySetObject *)set_copy(so, NULL);
1156     if (result == NULL)
1157         return NULL;
1158     if ((PyObject *)so == other)
1159         return (PyObject *)result;
1160     if (set_update_internal(result, other)) {
1161         Py_DECREF(result);
1162         return NULL;
1163     }
1164     return (PyObject *)result;
1165 }
1166 
1167 static PyObject *
set_ior(PySetObject * so,PyObject * other)1168 set_ior(PySetObject *so, PyObject *other)
1169 {
1170     if (!PyAnySet_Check(other))
1171         Py_RETURN_NOTIMPLEMENTED;
1172 
1173     if (set_update_internal(so, other))
1174         return NULL;
1175     Py_INCREF(so);
1176     return (PyObject *)so;
1177 }
1178 
1179 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1180 set_intersection(PySetObject *so, PyObject *other)
1181 {
1182     PySetObject *result;
1183     PyObject *key, *it, *tmp;
1184     Py_hash_t hash;
1185     int rv;
1186 
1187     if ((PyObject *)so == other)
1188         return set_copy(so, NULL);
1189 
1190     result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1191     if (result == NULL)
1192         return NULL;
1193 
1194     if (PyAnySet_Check(other)) {
1195         Py_ssize_t pos = 0;
1196         setentry *entry;
1197 
1198         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1199             tmp = (PyObject *)so;
1200             so = (PySetObject *)other;
1201             other = tmp;
1202         }
1203 
1204         while (set_next((PySetObject *)other, &pos, &entry)) {
1205             key = entry->key;
1206             hash = entry->hash;
1207             rv = set_contains_entry(so, key, hash);
1208             if (rv < 0) {
1209                 Py_DECREF(result);
1210                 return NULL;
1211             }
1212             if (rv) {
1213                 if (set_add_entry(result, key, hash)) {
1214                     Py_DECREF(result);
1215                     return NULL;
1216                 }
1217             }
1218         }
1219         return (PyObject *)result;
1220     }
1221 
1222     it = PyObject_GetIter(other);
1223     if (it == NULL) {
1224         Py_DECREF(result);
1225         return NULL;
1226     }
1227 
1228     while ((key = PyIter_Next(it)) != NULL) {
1229         hash = PyObject_Hash(key);
1230         if (hash == -1)
1231             goto error;
1232         rv = set_contains_entry(so, key, hash);
1233         if (rv < 0)
1234             goto error;
1235         if (rv) {
1236             if (set_add_entry(result, key, hash))
1237                 goto error;
1238         }
1239         Py_DECREF(key);
1240     }
1241     Py_DECREF(it);
1242     if (PyErr_Occurred()) {
1243         Py_DECREF(result);
1244         return NULL;
1245     }
1246     return (PyObject *)result;
1247   error:
1248     Py_DECREF(it);
1249     Py_DECREF(result);
1250     Py_DECREF(key);
1251     return NULL;
1252 }
1253 
1254 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1255 set_intersection_multi(PySetObject *so, PyObject *args)
1256 {
1257     Py_ssize_t i;
1258     PyObject *result = (PyObject *)so;
1259 
1260     if (PyTuple_GET_SIZE(args) == 0)
1261         return set_copy(so, NULL);
1262 
1263     Py_INCREF(so);
1264     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1265         PyObject *other = PyTuple_GET_ITEM(args, i);
1266         PyObject *newresult = set_intersection((PySetObject *)result, other);
1267         if (newresult == NULL) {
1268             Py_DECREF(result);
1269             return NULL;
1270         }
1271         Py_DECREF(result);
1272         result = newresult;
1273     }
1274     return result;
1275 }
1276 
1277 PyDoc_STRVAR(intersection_doc,
1278 "Return the intersection of two sets as a new set.\n\
1279 \n\
1280 (i.e. all elements that are in both sets.)");
1281 
1282 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1283 set_intersection_update(PySetObject *so, PyObject *other)
1284 {
1285     PyObject *tmp;
1286 
1287     tmp = set_intersection(so, other);
1288     if (tmp == NULL)
1289         return NULL;
1290     set_swap_bodies(so, (PySetObject *)tmp);
1291     Py_DECREF(tmp);
1292     Py_RETURN_NONE;
1293 }
1294 
1295 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1296 set_intersection_update_multi(PySetObject *so, PyObject *args)
1297 {
1298     PyObject *tmp;
1299 
1300     tmp = set_intersection_multi(so, args);
1301     if (tmp == NULL)
1302         return NULL;
1303     set_swap_bodies(so, (PySetObject *)tmp);
1304     Py_DECREF(tmp);
1305     Py_RETURN_NONE;
1306 }
1307 
1308 PyDoc_STRVAR(intersection_update_doc,
1309 "Update a set with the intersection of itself and another.");
1310 
1311 static PyObject *
set_and(PySetObject * so,PyObject * other)1312 set_and(PySetObject *so, PyObject *other)
1313 {
1314     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1315         Py_RETURN_NOTIMPLEMENTED;
1316     return set_intersection(so, other);
1317 }
1318 
1319 static PyObject *
set_iand(PySetObject * so,PyObject * other)1320 set_iand(PySetObject *so, PyObject *other)
1321 {
1322     PyObject *result;
1323 
1324     if (!PyAnySet_Check(other))
1325         Py_RETURN_NOTIMPLEMENTED;
1326     result = set_intersection_update(so, other);
1327     if (result == NULL)
1328         return NULL;
1329     Py_DECREF(result);
1330     Py_INCREF(so);
1331     return (PyObject *)so;
1332 }
1333 
1334 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1335 set_isdisjoint(PySetObject *so, PyObject *other)
1336 {
1337     PyObject *key, *it, *tmp;
1338     int rv;
1339 
1340     if ((PyObject *)so == other) {
1341         if (PySet_GET_SIZE(so) == 0)
1342             Py_RETURN_TRUE;
1343         else
1344             Py_RETURN_FALSE;
1345     }
1346 
1347     if (PyAnySet_CheckExact(other)) {
1348         Py_ssize_t pos = 0;
1349         setentry *entry;
1350 
1351         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1352             tmp = (PyObject *)so;
1353             so = (PySetObject *)other;
1354             other = tmp;
1355         }
1356         while (set_next((PySetObject *)other, &pos, &entry)) {
1357             rv = set_contains_entry(so, entry->key, entry->hash);
1358             if (rv < 0)
1359                 return NULL;
1360             if (rv)
1361                 Py_RETURN_FALSE;
1362         }
1363         Py_RETURN_TRUE;
1364     }
1365 
1366     it = PyObject_GetIter(other);
1367     if (it == NULL)
1368         return NULL;
1369 
1370     while ((key = PyIter_Next(it)) != NULL) {
1371         Py_hash_t hash = PyObject_Hash(key);
1372 
1373         if (hash == -1) {
1374             Py_DECREF(key);
1375             Py_DECREF(it);
1376             return NULL;
1377         }
1378         rv = set_contains_entry(so, key, hash);
1379         Py_DECREF(key);
1380         if (rv < 0) {
1381             Py_DECREF(it);
1382             return NULL;
1383         }
1384         if (rv) {
1385             Py_DECREF(it);
1386             Py_RETURN_FALSE;
1387         }
1388     }
1389     Py_DECREF(it);
1390     if (PyErr_Occurred())
1391         return NULL;
1392     Py_RETURN_TRUE;
1393 }
1394 
1395 PyDoc_STRVAR(isdisjoint_doc,
1396 "Return True if two sets have a null intersection.");
1397 
1398 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1399 set_difference_update_internal(PySetObject *so, PyObject *other)
1400 {
1401     if ((PyObject *)so == other)
1402         return set_clear_internal(so);
1403 
1404     if (PyAnySet_Check(other)) {
1405         setentry *entry;
1406         Py_ssize_t pos = 0;
1407 
1408         /* Optimization:  When the other set is more than 8 times
1409            larger than the base set, replace the other set with
1410            intersection of the two sets.
1411         */
1412         if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) {
1413             other = set_intersection(so, other);
1414             if (other == NULL)
1415                 return -1;
1416         } else {
1417             Py_INCREF(other);
1418         }
1419 
1420         while (set_next((PySetObject *)other, &pos, &entry))
1421             if (set_discard_entry(so, entry->key, entry->hash) < 0) {
1422                 Py_DECREF(other);
1423                 return -1;
1424             }
1425 
1426         Py_DECREF(other);
1427     } else {
1428         PyObject *key, *it;
1429         it = PyObject_GetIter(other);
1430         if (it == NULL)
1431             return -1;
1432 
1433         while ((key = PyIter_Next(it)) != NULL) {
1434             if (set_discard_key(so, key) < 0) {
1435                 Py_DECREF(it);
1436                 Py_DECREF(key);
1437                 return -1;
1438             }
1439             Py_DECREF(key);
1440         }
1441         Py_DECREF(it);
1442         if (PyErr_Occurred())
1443             return -1;
1444     }
1445     /* If more than 1/4th are dummies, then resize them away. */
1446     if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1447         return 0;
1448     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1449 }
1450 
1451 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1452 set_difference_update(PySetObject *so, PyObject *args)
1453 {
1454     Py_ssize_t i;
1455 
1456     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1457         PyObject *other = PyTuple_GET_ITEM(args, i);
1458         if (set_difference_update_internal(so, other))
1459             return NULL;
1460     }
1461     Py_RETURN_NONE;
1462 }
1463 
1464 PyDoc_STRVAR(difference_update_doc,
1465 "Remove all elements of another set from this set.");
1466 
1467 static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1468 set_copy_and_difference(PySetObject *so, PyObject *other)
1469 {
1470     PyObject *result;
1471 
1472     result = set_copy(so, NULL);
1473     if (result == NULL)
1474         return NULL;
1475     if (set_difference_update_internal((PySetObject *) result, other) == 0)
1476         return result;
1477     Py_DECREF(result);
1478     return NULL;
1479 }
1480 
1481 static PyObject *
set_difference(PySetObject * so,PyObject * other)1482 set_difference(PySetObject *so, PyObject *other)
1483 {
1484     PyObject *result;
1485     PyObject *key;
1486     Py_hash_t hash;
1487     setentry *entry;
1488     Py_ssize_t pos = 0, other_size;
1489     int rv;
1490 
1491     if (PyAnySet_Check(other)) {
1492         other_size = PySet_GET_SIZE(other);
1493     }
1494     else if (PyDict_CheckExact(other)) {
1495         other_size = PyDict_GET_SIZE(other);
1496     }
1497     else {
1498         return set_copy_and_difference(so, other);
1499     }
1500 
1501     /* If len(so) much more than len(other), it's more efficient to simply copy
1502      * so and then iterate other looking for common elements. */
1503     if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1504         return set_copy_and_difference(so, other);
1505     }
1506 
1507     result = make_new_set_basetype(Py_TYPE(so), NULL);
1508     if (result == NULL)
1509         return NULL;
1510 
1511     if (PyDict_CheckExact(other)) {
1512         while (set_next(so, &pos, &entry)) {
1513             key = entry->key;
1514             hash = entry->hash;
1515             rv = _PyDict_Contains_KnownHash(other, key, hash);
1516             if (rv < 0) {
1517                 Py_DECREF(result);
1518                 return NULL;
1519             }
1520             if (!rv) {
1521                 if (set_add_entry((PySetObject *)result, key, hash)) {
1522                     Py_DECREF(result);
1523                     return NULL;
1524                 }
1525             }
1526         }
1527         return result;
1528     }
1529 
1530     /* Iterate over so, checking for common elements in other. */
1531     while (set_next(so, &pos, &entry)) {
1532         key = entry->key;
1533         hash = entry->hash;
1534         rv = set_contains_entry((PySetObject *)other, key, hash);
1535         if (rv < 0) {
1536             Py_DECREF(result);
1537             return NULL;
1538         }
1539         if (!rv) {
1540             if (set_add_entry((PySetObject *)result, key, hash)) {
1541                 Py_DECREF(result);
1542                 return NULL;
1543             }
1544         }
1545     }
1546     return result;
1547 }
1548 
1549 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1550 set_difference_multi(PySetObject *so, PyObject *args)
1551 {
1552     Py_ssize_t i;
1553     PyObject *result, *other;
1554 
1555     if (PyTuple_GET_SIZE(args) == 0)
1556         return set_copy(so, NULL);
1557 
1558     other = PyTuple_GET_ITEM(args, 0);
1559     result = set_difference(so, other);
1560     if (result == NULL)
1561         return NULL;
1562 
1563     for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1564         other = PyTuple_GET_ITEM(args, i);
1565         if (set_difference_update_internal((PySetObject *)result, other)) {
1566             Py_DECREF(result);
1567             return NULL;
1568         }
1569     }
1570     return result;
1571 }
1572 
1573 PyDoc_STRVAR(difference_doc,
1574 "Return the difference of two or more sets as a new set.\n\
1575 \n\
1576 (i.e. all elements that are in this set but not the others.)");
1577 static PyObject *
set_sub(PySetObject * so,PyObject * other)1578 set_sub(PySetObject *so, PyObject *other)
1579 {
1580     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1581         Py_RETURN_NOTIMPLEMENTED;
1582     return set_difference(so, other);
1583 }
1584 
1585 static PyObject *
set_isub(PySetObject * so,PyObject * other)1586 set_isub(PySetObject *so, PyObject *other)
1587 {
1588     if (!PyAnySet_Check(other))
1589         Py_RETURN_NOTIMPLEMENTED;
1590     if (set_difference_update_internal(so, other))
1591         return NULL;
1592     Py_INCREF(so);
1593     return (PyObject *)so;
1594 }
1595 
1596 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1597 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1598 {
1599     PySetObject *otherset;
1600     PyObject *key;
1601     Py_ssize_t pos = 0;
1602     Py_hash_t hash;
1603     setentry *entry;
1604     int rv;
1605 
1606     if ((PyObject *)so == other)
1607         return set_clear(so, NULL);
1608 
1609     if (PyDict_CheckExact(other)) {
1610         PyObject *value;
1611         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1612             Py_INCREF(key);
1613             rv = set_discard_entry(so, key, hash);
1614             if (rv < 0) {
1615                 Py_DECREF(key);
1616                 return NULL;
1617             }
1618             if (rv == DISCARD_NOTFOUND) {
1619                 if (set_add_entry(so, key, hash)) {
1620                     Py_DECREF(key);
1621                     return NULL;
1622                 }
1623             }
1624             Py_DECREF(key);
1625         }
1626         Py_RETURN_NONE;
1627     }
1628 
1629     if (PyAnySet_Check(other)) {
1630         Py_INCREF(other);
1631         otherset = (PySetObject *)other;
1632     } else {
1633         otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1634         if (otherset == NULL)
1635             return NULL;
1636     }
1637 
1638     while (set_next(otherset, &pos, &entry)) {
1639         key = entry->key;
1640         hash = entry->hash;
1641         rv = set_discard_entry(so, key, hash);
1642         if (rv < 0) {
1643             Py_DECREF(otherset);
1644             return NULL;
1645         }
1646         if (rv == DISCARD_NOTFOUND) {
1647             if (set_add_entry(so, key, hash)) {
1648                 Py_DECREF(otherset);
1649                 return NULL;
1650             }
1651         }
1652     }
1653     Py_DECREF(otherset);
1654     Py_RETURN_NONE;
1655 }
1656 
1657 PyDoc_STRVAR(symmetric_difference_update_doc,
1658 "Update a set with the symmetric difference of itself and another.");
1659 
1660 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1661 set_symmetric_difference(PySetObject *so, PyObject *other)
1662 {
1663     PyObject *rv;
1664     PySetObject *otherset;
1665 
1666     otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1667     if (otherset == NULL)
1668         return NULL;
1669     rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1670     if (rv == NULL) {
1671         Py_DECREF(otherset);
1672         return NULL;
1673     }
1674     Py_DECREF(rv);
1675     return (PyObject *)otherset;
1676 }
1677 
1678 PyDoc_STRVAR(symmetric_difference_doc,
1679 "Return the symmetric difference of two sets as a new set.\n\
1680 \n\
1681 (i.e. all elements that are in exactly one of the sets.)");
1682 
1683 static PyObject *
set_xor(PySetObject * so,PyObject * other)1684 set_xor(PySetObject *so, PyObject *other)
1685 {
1686     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1687         Py_RETURN_NOTIMPLEMENTED;
1688     return set_symmetric_difference(so, other);
1689 }
1690 
1691 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1692 set_ixor(PySetObject *so, PyObject *other)
1693 {
1694     PyObject *result;
1695 
1696     if (!PyAnySet_Check(other))
1697         Py_RETURN_NOTIMPLEMENTED;
1698     result = set_symmetric_difference_update(so, other);
1699     if (result == NULL)
1700         return NULL;
1701     Py_DECREF(result);
1702     Py_INCREF(so);
1703     return (PyObject *)so;
1704 }
1705 
1706 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1707 set_issubset(PySetObject *so, PyObject *other)
1708 {
1709     setentry *entry;
1710     Py_ssize_t pos = 0;
1711     int rv;
1712 
1713     if (!PyAnySet_Check(other)) {
1714         PyObject *tmp, *result;
1715         tmp = make_new_set(&PySet_Type, other);
1716         if (tmp == NULL)
1717             return NULL;
1718         result = set_issubset(so, tmp);
1719         Py_DECREF(tmp);
1720         return result;
1721     }
1722     if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1723         Py_RETURN_FALSE;
1724 
1725     while (set_next(so, &pos, &entry)) {
1726         rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
1727         if (rv < 0)
1728             return NULL;
1729         if (!rv)
1730             Py_RETURN_FALSE;
1731     }
1732     Py_RETURN_TRUE;
1733 }
1734 
1735 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1736 
1737 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1738 set_issuperset(PySetObject *so, PyObject *other)
1739 {
1740     PyObject *tmp, *result;
1741 
1742     if (!PyAnySet_Check(other)) {
1743         tmp = make_new_set(&PySet_Type, other);
1744         if (tmp == NULL)
1745             return NULL;
1746         result = set_issuperset(so, tmp);
1747         Py_DECREF(tmp);
1748         return result;
1749     }
1750     return set_issubset((PySetObject *)other, (PyObject *)so);
1751 }
1752 
1753 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1754 
1755 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1756 set_richcompare(PySetObject *v, PyObject *w, int op)
1757 {
1758     PyObject *r1;
1759     int r2;
1760 
1761     if(!PyAnySet_Check(w))
1762         Py_RETURN_NOTIMPLEMENTED;
1763 
1764     switch (op) {
1765     case Py_EQ:
1766         if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1767             Py_RETURN_FALSE;
1768         if (v->hash != -1  &&
1769             ((PySetObject *)w)->hash != -1 &&
1770             v->hash != ((PySetObject *)w)->hash)
1771             Py_RETURN_FALSE;
1772         return set_issubset(v, w);
1773     case Py_NE:
1774         r1 = set_richcompare(v, w, Py_EQ);
1775         if (r1 == NULL)
1776             return NULL;
1777         r2 = PyObject_IsTrue(r1);
1778         Py_DECREF(r1);
1779         if (r2 < 0)
1780             return NULL;
1781         return PyBool_FromLong(!r2);
1782     case Py_LE:
1783         return set_issubset(v, w);
1784     case Py_GE:
1785         return set_issuperset(v, w);
1786     case Py_LT:
1787         if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1788             Py_RETURN_FALSE;
1789         return set_issubset(v, w);
1790     case Py_GT:
1791         if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1792             Py_RETURN_FALSE;
1793         return set_issuperset(v, w);
1794     }
1795     Py_RETURN_NOTIMPLEMENTED;
1796 }
1797 
1798 static PyObject *
set_add(PySetObject * so,PyObject * key)1799 set_add(PySetObject *so, PyObject *key)
1800 {
1801     if (set_add_key(so, key))
1802         return NULL;
1803     Py_RETURN_NONE;
1804 }
1805 
1806 PyDoc_STRVAR(add_doc,
1807 "Add an element to a set.\n\
1808 \n\
1809 This has no effect if the element is already present.");
1810 
1811 static int
set_contains(PySetObject * so,PyObject * key)1812 set_contains(PySetObject *so, PyObject *key)
1813 {
1814     PyObject *tmpkey;
1815     int rv;
1816 
1817     rv = set_contains_key(so, key);
1818     if (rv < 0) {
1819         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1820             return -1;
1821         PyErr_Clear();
1822         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1823         if (tmpkey == NULL)
1824             return -1;
1825         rv = set_contains_key(so, tmpkey);
1826         Py_DECREF(tmpkey);
1827     }
1828     return rv;
1829 }
1830 
1831 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1832 set_direct_contains(PySetObject *so, PyObject *key)
1833 {
1834     long result;
1835 
1836     result = set_contains(so, key);
1837     if (result < 0)
1838         return NULL;
1839     return PyBool_FromLong(result);
1840 }
1841 
1842 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1843 
1844 static PyObject *
set_remove(PySetObject * so,PyObject * key)1845 set_remove(PySetObject *so, PyObject *key)
1846 {
1847     PyObject *tmpkey;
1848     int rv;
1849 
1850     rv = set_discard_key(so, key);
1851     if (rv < 0) {
1852         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1853             return NULL;
1854         PyErr_Clear();
1855         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1856         if (tmpkey == NULL)
1857             return NULL;
1858         rv = set_discard_key(so, tmpkey);
1859         Py_DECREF(tmpkey);
1860         if (rv < 0)
1861             return NULL;
1862     }
1863 
1864     if (rv == DISCARD_NOTFOUND) {
1865         _PyErr_SetKeyError(key);
1866         return NULL;
1867     }
1868     Py_RETURN_NONE;
1869 }
1870 
1871 PyDoc_STRVAR(remove_doc,
1872 "Remove an element from a set; it must be a member.\n\
1873 \n\
1874 If the element is not a member, raise a KeyError.");
1875 
1876 static PyObject *
set_discard(PySetObject * so,PyObject * key)1877 set_discard(PySetObject *so, PyObject *key)
1878 {
1879     PyObject *tmpkey;
1880     int rv;
1881 
1882     rv = set_discard_key(so, key);
1883     if (rv < 0) {
1884         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1885             return NULL;
1886         PyErr_Clear();
1887         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1888         if (tmpkey == NULL)
1889             return NULL;
1890         rv = set_discard_key(so, tmpkey);
1891         Py_DECREF(tmpkey);
1892         if (rv < 0)
1893             return NULL;
1894     }
1895     Py_RETURN_NONE;
1896 }
1897 
1898 PyDoc_STRVAR(discard_doc,
1899 "Remove an element from a set if it is a member.\n\
1900 \n\
1901 If the element is not a member, do nothing.");
1902 
1903 static PyObject *
set_reduce(PySetObject * so,PyObject * Py_UNUSED (ignored))1904 set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored))
1905 {
1906     PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1907     _Py_IDENTIFIER(__dict__);
1908 
1909     keys = PySequence_List((PyObject *)so);
1910     if (keys == NULL)
1911         goto done;
1912     args = PyTuple_Pack(1, keys);
1913     if (args == NULL)
1914         goto done;
1915     if (_PyObject_LookupAttrId((PyObject *)so, &PyId___dict__, &dict) < 0) {
1916         goto done;
1917     }
1918     if (dict == NULL) {
1919         dict = Py_None;
1920         Py_INCREF(dict);
1921     }
1922     result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1923 done:
1924     Py_XDECREF(args);
1925     Py_XDECREF(keys);
1926     Py_XDECREF(dict);
1927     return result;
1928 }
1929 
1930 static PyObject *
set_sizeof(PySetObject * so,PyObject * Py_UNUSED (ignored))1931 set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored))
1932 {
1933     Py_ssize_t res;
1934 
1935     res = _PyObject_SIZE(Py_TYPE(so));
1936     if (so->table != so->smalltable)
1937         res = res + (so->mask + 1) * sizeof(setentry);
1938     return PyLong_FromSsize_t(res);
1939 }
1940 
1941 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1942 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1943 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1944 {
1945     PyObject *iterable = NULL;
1946 
1947     if (!_PyArg_NoKeywords("set", kwds))
1948         return -1;
1949     if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
1950         return -1;
1951     if (self->fill)
1952         set_clear_internal(self);
1953     self->hash = -1;
1954     if (iterable == NULL)
1955         return 0;
1956     return set_update_internal(self, iterable);
1957 }
1958 
1959 static PyObject*
set_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1960 set_vectorcall(PyObject *type, PyObject * const*args,
1961                size_t nargsf, PyObject *kwnames)
1962 {
1963     assert(PyType_Check(type));
1964 
1965     if (!_PyArg_NoKwnames("set", kwnames)) {
1966         return NULL;
1967     }
1968 
1969     Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1970     if (!_PyArg_CheckPositional("set", nargs, 0, 1)) {
1971         return NULL;
1972     }
1973 
1974     if (nargs) {
1975         return make_new_set((PyTypeObject *)type, args[0]);
1976     }
1977 
1978     return make_new_set((PyTypeObject *)type, NULL);
1979 }
1980 
1981 static PySequenceMethods set_as_sequence = {
1982     set_len,                            /* sq_length */
1983     0,                                  /* sq_concat */
1984     0,                                  /* sq_repeat */
1985     0,                                  /* sq_item */
1986     0,                                  /* sq_slice */
1987     0,                                  /* sq_ass_item */
1988     0,                                  /* sq_ass_slice */
1989     (objobjproc)set_contains,           /* sq_contains */
1990 };
1991 
1992 /* set object ********************************************************/
1993 
1994 #ifdef Py_DEBUG
1995 static PyObject *test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored));
1996 
1997 PyDoc_STRVAR(test_c_api_doc, "Exercises C API.  Returns True.\n\
1998 All is well if assertions don't fail.");
1999 #endif
2000 
2001 static PyMethodDef set_methods[] = {
2002     {"add",             (PyCFunction)set_add,           METH_O,
2003      add_doc},
2004     {"clear",           (PyCFunction)set_clear,         METH_NOARGS,
2005      clear_doc},
2006     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2007      contains_doc},
2008     {"copy",            (PyCFunction)set_copy,          METH_NOARGS,
2009      copy_doc},
2010     {"discard",         (PyCFunction)set_discard,       METH_O,
2011      discard_doc},
2012     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2013      difference_doc},
2014     {"difference_update",       (PyCFunction)set_difference_update,     METH_VARARGS,
2015      difference_update_doc},
2016     {"intersection",(PyCFunction)set_intersection_multi,        METH_VARARGS,
2017      intersection_doc},
2018     {"intersection_update",(PyCFunction)set_intersection_update_multi,          METH_VARARGS,
2019      intersection_update_doc},
2020     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2021      isdisjoint_doc},
2022     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2023      issubset_doc},
2024     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2025      issuperset_doc},
2026     {"pop",             (PyCFunction)set_pop,           METH_NOARGS,
2027      pop_doc},
2028     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2029      reduce_doc},
2030     {"remove",          (PyCFunction)set_remove,        METH_O,
2031      remove_doc},
2032     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2033      sizeof_doc},
2034     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2035      symmetric_difference_doc},
2036     {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update,        METH_O,
2037      symmetric_difference_update_doc},
2038 #ifdef Py_DEBUG
2039     {"test_c_api",      (PyCFunction)test_c_api,        METH_NOARGS,
2040      test_c_api_doc},
2041 #endif
2042     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2043      union_doc},
2044     {"update",          (PyCFunction)set_update,        METH_VARARGS,
2045      update_doc},
2046     {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2047     {NULL,              NULL}   /* sentinel */
2048 };
2049 
2050 static PyNumberMethods set_as_number = {
2051     0,                                  /*nb_add*/
2052     (binaryfunc)set_sub,                /*nb_subtract*/
2053     0,                                  /*nb_multiply*/
2054     0,                                  /*nb_remainder*/
2055     0,                                  /*nb_divmod*/
2056     0,                                  /*nb_power*/
2057     0,                                  /*nb_negative*/
2058     0,                                  /*nb_positive*/
2059     0,                                  /*nb_absolute*/
2060     0,                                  /*nb_bool*/
2061     0,                                  /*nb_invert*/
2062     0,                                  /*nb_lshift*/
2063     0,                                  /*nb_rshift*/
2064     (binaryfunc)set_and,                /*nb_and*/
2065     (binaryfunc)set_xor,                /*nb_xor*/
2066     (binaryfunc)set_or,                 /*nb_or*/
2067     0,                                  /*nb_int*/
2068     0,                                  /*nb_reserved*/
2069     0,                                  /*nb_float*/
2070     0,                                  /*nb_inplace_add*/
2071     (binaryfunc)set_isub,               /*nb_inplace_subtract*/
2072     0,                                  /*nb_inplace_multiply*/
2073     0,                                  /*nb_inplace_remainder*/
2074     0,                                  /*nb_inplace_power*/
2075     0,                                  /*nb_inplace_lshift*/
2076     0,                                  /*nb_inplace_rshift*/
2077     (binaryfunc)set_iand,               /*nb_inplace_and*/
2078     (binaryfunc)set_ixor,               /*nb_inplace_xor*/
2079     (binaryfunc)set_ior,                /*nb_inplace_or*/
2080 };
2081 
2082 PyDoc_STRVAR(set_doc,
2083 "set() -> new empty set object\n\
2084 set(iterable) -> new set object\n\
2085 \n\
2086 Build an unordered collection of unique elements.");
2087 
2088 PyTypeObject PySet_Type = {
2089     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2090     "set",                              /* tp_name */
2091     sizeof(PySetObject),                /* tp_basicsize */
2092     0,                                  /* tp_itemsize */
2093     /* methods */
2094     (destructor)set_dealloc,            /* tp_dealloc */
2095     0,                                  /* tp_vectorcall_offset */
2096     0,                                  /* tp_getattr */
2097     0,                                  /* tp_setattr */
2098     0,                                  /* tp_as_async */
2099     (reprfunc)set_repr,                 /* tp_repr */
2100     &set_as_number,                     /* tp_as_number */
2101     &set_as_sequence,                   /* tp_as_sequence */
2102     0,                                  /* tp_as_mapping */
2103     PyObject_HashNotImplemented,        /* tp_hash */
2104     0,                                  /* tp_call */
2105     0,                                  /* tp_str */
2106     PyObject_GenericGetAttr,            /* tp_getattro */
2107     0,                                  /* tp_setattro */
2108     0,                                  /* tp_as_buffer */
2109     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2110         Py_TPFLAGS_BASETYPE |
2111         _Py_TPFLAGS_MATCH_SELF,       /* tp_flags */
2112     set_doc,                            /* tp_doc */
2113     (traverseproc)set_traverse,         /* tp_traverse */
2114     (inquiry)set_clear_internal,        /* tp_clear */
2115     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2116     offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2117     (getiterfunc)set_iter,              /* tp_iter */
2118     0,                                  /* tp_iternext */
2119     set_methods,                        /* tp_methods */
2120     0,                                  /* tp_members */
2121     0,                                  /* tp_getset */
2122     0,                                  /* tp_base */
2123     0,                                  /* tp_dict */
2124     0,                                  /* tp_descr_get */
2125     0,                                  /* tp_descr_set */
2126     0,                                  /* tp_dictoffset */
2127     (initproc)set_init,                 /* tp_init */
2128     PyType_GenericAlloc,                /* tp_alloc */
2129     set_new,                            /* tp_new */
2130     PyObject_GC_Del,                    /* tp_free */
2131     .tp_vectorcall = set_vectorcall,
2132 };
2133 
2134 /* frozenset object ********************************************************/
2135 
2136 
2137 static PyMethodDef frozenset_methods[] = {
2138     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2139      contains_doc},
2140     {"copy",            (PyCFunction)frozenset_copy,    METH_NOARGS,
2141      copy_doc},
2142     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2143      difference_doc},
2144     {"intersection",    (PyCFunction)set_intersection_multi,    METH_VARARGS,
2145      intersection_doc},
2146     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2147      isdisjoint_doc},
2148     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2149      issubset_doc},
2150     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2151      issuperset_doc},
2152     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2153      reduce_doc},
2154     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2155      sizeof_doc},
2156     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2157      symmetric_difference_doc},
2158     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2159      union_doc},
2160     {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2161     {NULL,              NULL}   /* sentinel */
2162 };
2163 
2164 static PyNumberMethods frozenset_as_number = {
2165     0,                                  /*nb_add*/
2166     (binaryfunc)set_sub,                /*nb_subtract*/
2167     0,                                  /*nb_multiply*/
2168     0,                                  /*nb_remainder*/
2169     0,                                  /*nb_divmod*/
2170     0,                                  /*nb_power*/
2171     0,                                  /*nb_negative*/
2172     0,                                  /*nb_positive*/
2173     0,                                  /*nb_absolute*/
2174     0,                                  /*nb_bool*/
2175     0,                                  /*nb_invert*/
2176     0,                                  /*nb_lshift*/
2177     0,                                  /*nb_rshift*/
2178     (binaryfunc)set_and,                /*nb_and*/
2179     (binaryfunc)set_xor,                /*nb_xor*/
2180     (binaryfunc)set_or,                 /*nb_or*/
2181 };
2182 
2183 PyDoc_STRVAR(frozenset_doc,
2184 "frozenset() -> empty frozenset object\n\
2185 frozenset(iterable) -> frozenset object\n\
2186 \n\
2187 Build an immutable unordered collection of unique elements.");
2188 
2189 PyTypeObject PyFrozenSet_Type = {
2190     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2191     "frozenset",                        /* tp_name */
2192     sizeof(PySetObject),                /* tp_basicsize */
2193     0,                                  /* tp_itemsize */
2194     /* methods */
2195     (destructor)set_dealloc,            /* tp_dealloc */
2196     0,                                  /* tp_vectorcall_offset */
2197     0,                                  /* tp_getattr */
2198     0,                                  /* tp_setattr */
2199     0,                                  /* tp_as_async */
2200     (reprfunc)set_repr,                 /* tp_repr */
2201     &frozenset_as_number,               /* tp_as_number */
2202     &set_as_sequence,                   /* tp_as_sequence */
2203     0,                                  /* tp_as_mapping */
2204     frozenset_hash,                     /* tp_hash */
2205     0,                                  /* tp_call */
2206     0,                                  /* tp_str */
2207     PyObject_GenericGetAttr,            /* tp_getattro */
2208     0,                                  /* tp_setattro */
2209     0,                                  /* tp_as_buffer */
2210     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2211         Py_TPFLAGS_BASETYPE |
2212         _Py_TPFLAGS_MATCH_SELF,       /* tp_flags */
2213     frozenset_doc,                      /* tp_doc */
2214     (traverseproc)set_traverse,         /* tp_traverse */
2215     (inquiry)set_clear_internal,        /* tp_clear */
2216     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2217     offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2218     (getiterfunc)set_iter,              /* tp_iter */
2219     0,                                  /* tp_iternext */
2220     frozenset_methods,                  /* tp_methods */
2221     0,                                  /* tp_members */
2222     0,                                  /* tp_getset */
2223     0,                                  /* tp_base */
2224     0,                                  /* tp_dict */
2225     0,                                  /* tp_descr_get */
2226     0,                                  /* tp_descr_set */
2227     0,                                  /* tp_dictoffset */
2228     0,                                  /* tp_init */
2229     PyType_GenericAlloc,                /* tp_alloc */
2230     frozenset_new,                      /* tp_new */
2231     PyObject_GC_Del,                    /* tp_free */
2232     .tp_vectorcall = frozenset_vectorcall,
2233 };
2234 
2235 
2236 /***** C API functions *************************************************/
2237 
2238 PyObject *
PySet_New(PyObject * iterable)2239 PySet_New(PyObject *iterable)
2240 {
2241     return make_new_set(&PySet_Type, iterable);
2242 }
2243 
2244 PyObject *
PyFrozenSet_New(PyObject * iterable)2245 PyFrozenSet_New(PyObject *iterable)
2246 {
2247     return make_new_set(&PyFrozenSet_Type, iterable);
2248 }
2249 
2250 Py_ssize_t
PySet_Size(PyObject * anyset)2251 PySet_Size(PyObject *anyset)
2252 {
2253     if (!PyAnySet_Check(anyset)) {
2254         PyErr_BadInternalCall();
2255         return -1;
2256     }
2257     return PySet_GET_SIZE(anyset);
2258 }
2259 
2260 int
PySet_Clear(PyObject * set)2261 PySet_Clear(PyObject *set)
2262 {
2263     if (!PySet_Check(set)) {
2264         PyErr_BadInternalCall();
2265         return -1;
2266     }
2267     return set_clear_internal((PySetObject *)set);
2268 }
2269 
2270 int
PySet_Contains(PyObject * anyset,PyObject * key)2271 PySet_Contains(PyObject *anyset, PyObject *key)
2272 {
2273     if (!PyAnySet_Check(anyset)) {
2274         PyErr_BadInternalCall();
2275         return -1;
2276     }
2277     return set_contains_key((PySetObject *)anyset, key);
2278 }
2279 
2280 int
PySet_Discard(PyObject * set,PyObject * key)2281 PySet_Discard(PyObject *set, PyObject *key)
2282 {
2283     if (!PySet_Check(set)) {
2284         PyErr_BadInternalCall();
2285         return -1;
2286     }
2287     return set_discard_key((PySetObject *)set, key);
2288 }
2289 
2290 int
PySet_Add(PyObject * anyset,PyObject * key)2291 PySet_Add(PyObject *anyset, PyObject *key)
2292 {
2293     if (!PySet_Check(anyset) &&
2294         (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2295         PyErr_BadInternalCall();
2296         return -1;
2297     }
2298     return set_add_key((PySetObject *)anyset, key);
2299 }
2300 
2301 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2302 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2303 {
2304     setentry *entry;
2305 
2306     if (!PyAnySet_Check(set)) {
2307         PyErr_BadInternalCall();
2308         return -1;
2309     }
2310     if (set_next((PySetObject *)set, pos, &entry) == 0)
2311         return 0;
2312     *key = entry->key;
2313     *hash = entry->hash;
2314     return 1;
2315 }
2316 
2317 PyObject *
PySet_Pop(PyObject * set)2318 PySet_Pop(PyObject *set)
2319 {
2320     if (!PySet_Check(set)) {
2321         PyErr_BadInternalCall();
2322         return NULL;
2323     }
2324     return set_pop((PySetObject *)set, NULL);
2325 }
2326 
2327 int
_PySet_Update(PyObject * set,PyObject * iterable)2328 _PySet_Update(PyObject *set, PyObject *iterable)
2329 {
2330     if (!PySet_Check(set)) {
2331         PyErr_BadInternalCall();
2332         return -1;
2333     }
2334     return set_update_internal((PySetObject *)set, iterable);
2335 }
2336 
2337 /* Exported for the gdb plugin's benefit. */
2338 PyObject *_PySet_Dummy = dummy;
2339 
2340 #ifdef Py_DEBUG
2341 
2342 /* Test code to be called with any three element set.
2343    Returns True and original set is restored. */
2344 
2345 #define assertRaises(call_return_value, exception)              \
2346     do {                                                        \
2347         assert(call_return_value);                              \
2348         assert(PyErr_ExceptionMatches(exception));              \
2349         PyErr_Clear();                                          \
2350     } while(0)
2351 
2352 static PyObject *
test_c_api(PySetObject * so,PyObject * Py_UNUSED (ignored))2353 test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored))
2354 {
2355     Py_ssize_t count;
2356     const char *s;
2357     Py_ssize_t i;
2358     PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2359     PyObject *ob = (PyObject *)so;
2360     Py_hash_t hash;
2361     PyObject *str;
2362 
2363     /* Verify preconditions */
2364     assert(PyAnySet_Check(ob));
2365     assert(PyAnySet_CheckExact(ob));
2366     assert(!PyFrozenSet_CheckExact(ob));
2367 
2368     /* so.clear(); so |= set("abc"); */
2369     str = PyUnicode_FromString("abc");
2370     if (str == NULL)
2371         return NULL;
2372     set_clear_internal(so);
2373     if (set_update_internal(so, str)) {
2374         Py_DECREF(str);
2375         return NULL;
2376     }
2377     Py_DECREF(str);
2378 
2379     /* Exercise type/size checks */
2380     assert(PySet_Size(ob) == 3);
2381     assert(PySet_GET_SIZE(ob) == 3);
2382 
2383     /* Raise TypeError for non-iterable constructor arguments */
2384     assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2385     assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2386 
2387     /* Raise TypeError for unhashable key */
2388     dup = PySet_New(ob);
2389     assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2390     assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2391     assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2392 
2393     /* Exercise successful pop, contains, add, and discard */
2394     elem = PySet_Pop(ob);
2395     assert(PySet_Contains(ob, elem) == 0);
2396     assert(PySet_GET_SIZE(ob) == 2);
2397     assert(PySet_Add(ob, elem) == 0);
2398     assert(PySet_Contains(ob, elem) == 1);
2399     assert(PySet_GET_SIZE(ob) == 3);
2400     assert(PySet_Discard(ob, elem) == 1);
2401     assert(PySet_GET_SIZE(ob) == 2);
2402     assert(PySet_Discard(ob, elem) == 0);
2403     assert(PySet_GET_SIZE(ob) == 2);
2404 
2405     /* Exercise clear */
2406     dup2 = PySet_New(dup);
2407     assert(PySet_Clear(dup2) == 0);
2408     assert(PySet_Size(dup2) == 0);
2409     Py_DECREF(dup2);
2410 
2411     /* Raise SystemError on clear or update of frozen set */
2412     f = PyFrozenSet_New(dup);
2413     assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2414     assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2415     assert(PySet_Add(f, elem) == 0);
2416     Py_INCREF(f);
2417     assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2418     Py_DECREF(f);
2419     Py_DECREF(f);
2420 
2421     /* Exercise direct iteration */
2422     i = 0, count = 0;
2423     while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2424         s = PyUnicode_AsUTF8(x);
2425         assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2426         count++;
2427     }
2428     assert(count == 3);
2429 
2430     /* Exercise updates */
2431     dup2 = PySet_New(NULL);
2432     assert(_PySet_Update(dup2, dup) == 0);
2433     assert(PySet_Size(dup2) == 3);
2434     assert(_PySet_Update(dup2, dup) == 0);
2435     assert(PySet_Size(dup2) == 3);
2436     Py_DECREF(dup2);
2437 
2438     /* Raise SystemError when self argument is not a set or frozenset. */
2439     t = PyTuple_New(0);
2440     assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2441     assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2442     Py_DECREF(t);
2443 
2444     /* Raise SystemError when self argument is not a set. */
2445     f = PyFrozenSet_New(dup);
2446     assert(PySet_Size(f) == 3);
2447     assert(PyFrozenSet_CheckExact(f));
2448     assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2449     assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2450     Py_DECREF(f);
2451 
2452     /* Raise KeyError when popping from an empty set */
2453     assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2454     Py_DECREF(ob);
2455     assert(PySet_GET_SIZE(ob) == 0);
2456     assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2457 
2458     /* Restore the set from the copy using the PyNumber API */
2459     assert(PyNumber_InPlaceOr(ob, dup) == ob);
2460     Py_DECREF(ob);
2461 
2462     /* Verify constructors accept NULL arguments */
2463     f = PySet_New(NULL);
2464     assert(f != NULL);
2465     assert(PySet_GET_SIZE(f) == 0);
2466     Py_DECREF(f);
2467     f = PyFrozenSet_New(NULL);
2468     assert(f != NULL);
2469     assert(PyFrozenSet_CheckExact(f));
2470     assert(PySet_GET_SIZE(f) == 0);
2471     Py_DECREF(f);
2472 
2473     Py_DECREF(elem);
2474     Py_DECREF(dup);
2475     Py_RETURN_TRUE;
2476 }
2477 
2478 #undef assertRaises
2479 
2480 #endif
2481 
2482 /***** Dummy Struct  *************************************************/
2483 
2484 static PyObject *
dummy_repr(PyObject * op)2485 dummy_repr(PyObject *op)
2486 {
2487     return PyUnicode_FromString("<dummy key>");
2488 }
2489 
2490 static void _Py_NO_RETURN
dummy_dealloc(PyObject * ignore)2491 dummy_dealloc(PyObject* ignore)
2492 {
2493     Py_FatalError("deallocating <dummy key>");
2494 }
2495 
2496 static PyTypeObject _PySetDummy_Type = {
2497     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2498     "<dummy key> type",
2499     0,
2500     0,
2501     dummy_dealloc,      /*tp_dealloc*/ /*never called*/
2502     0,                  /*tp_vectorcall_offset*/
2503     0,                  /*tp_getattr*/
2504     0,                  /*tp_setattr*/
2505     0,                  /*tp_as_async*/
2506     dummy_repr,         /*tp_repr*/
2507     0,                  /*tp_as_number*/
2508     0,                  /*tp_as_sequence*/
2509     0,                  /*tp_as_mapping*/
2510     0,                  /*tp_hash */
2511     0,                  /*tp_call */
2512     0,                  /*tp_str */
2513     0,                  /*tp_getattro */
2514     0,                  /*tp_setattro */
2515     0,                  /*tp_as_buffer */
2516     Py_TPFLAGS_DEFAULT, /*tp_flags */
2517 };
2518 
2519 static PyObject _dummy_struct = {
2520   _PyObject_EXTRA_INIT
2521   2, &_PySetDummy_Type
2522 };
2523