• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* set object implementation
3 
4    Written and maintained by Raymond D. Hettinger <python@rcn.com>
5    Derived from Lib/sets.py and Objects/dictobject.c.
6 
7    The basic lookup function used by all operations.
8    This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9 
10    The initial probe index is computed as hash mod the table size.
11    Subsequent probe indices are computed as explained in Objects/dictobject.c.
12 
13    To improve cache locality, each probe inspects a series of consecutive
14    nearby entries before moving on to probes elsewhere in memory.  This leaves
15    us with a hybrid of linear probing and randomized probing.  The linear probing
16    reduces the cost of hash collisions because consecutive memory accesses
17    tend to be much cheaper than scattered probes.  After LINEAR_PROBES steps,
18    we then use more of the upper bits from the hash value and apply a simple
19    linear congruential random number genearator.  This helps break-up long
20    chains of collisions.
21 
22    All arithmetic on hash should ignore overflow.
23 
24    Unlike the dictionary implementation, the lookkey function can return
25    NULL if the rich comparison returns an error.
26 
27    Use cases for sets differ considerably from dictionaries where looked-up
28    keys are more likely to be present.  In contrast, sets are primarily
29    about membership testing where the presence of an element is not known in
30    advance.  Accordingly, the set implementation needs to optimize for both
31    the found and not-found case.
32 */
33 
34 #include "Python.h"
35 #include "pycore_object.h"
36 #include "pycore_pystate.h"
37 #include "structmember.h"
38 
39 /* Object used as dummy key to fill deleted entries */
40 static PyObject _dummy_struct;
41 
42 #define dummy (&_dummy_struct)
43 
44 
45 /* ======================================================================== */
46 /* ======= Begin logic for probing the hash table ========================= */
47 
48 /* Set this to zero to turn-off linear probing */
49 #ifndef LINEAR_PROBES
50 #define LINEAR_PROBES 9
51 #endif
52 
53 /* This must be >= 1 */
54 #define PERTURB_SHIFT 5
55 
56 static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)57 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
58 {
59     setentry *table;
60     setentry *entry;
61     size_t perturb;
62     size_t mask = so->mask;
63     size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
64     size_t j;
65     int cmp;
66 
67     entry = &so->table[i];
68     if (entry->key == NULL)
69         return entry;
70 
71     perturb = hash;
72 
73     while (1) {
74         if (entry->hash == hash) {
75             PyObject *startkey = entry->key;
76             /* startkey cannot be a dummy because the dummy hash field is -1 */
77             assert(startkey != dummy);
78             if (startkey == key)
79                 return entry;
80             if (PyUnicode_CheckExact(startkey)
81                 && PyUnicode_CheckExact(key)
82                 && _PyUnicode_EQ(startkey, key))
83                 return entry;
84             table = so->table;
85             Py_INCREF(startkey);
86             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
87             Py_DECREF(startkey);
88             if (cmp < 0)                                          /* unlikely */
89                 return NULL;
90             if (table != so->table || entry->key != startkey)     /* unlikely */
91                 return set_lookkey(so, key, hash);
92             if (cmp > 0)                                          /* likely */
93                 return entry;
94             mask = so->mask;                 /* help avoid a register spill */
95         }
96 
97         if (i + LINEAR_PROBES <= mask) {
98             for (j = 0 ; j < LINEAR_PROBES ; j++) {
99                 entry++;
100                 if (entry->hash == 0 && entry->key == NULL)
101                     return entry;
102                 if (entry->hash == hash) {
103                     PyObject *startkey = entry->key;
104                     assert(startkey != dummy);
105                     if (startkey == key)
106                         return entry;
107                     if (PyUnicode_CheckExact(startkey)
108                         && PyUnicode_CheckExact(key)
109                         && _PyUnicode_EQ(startkey, key))
110                         return entry;
111                     table = so->table;
112                     Py_INCREF(startkey);
113                     cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
114                     Py_DECREF(startkey);
115                     if (cmp < 0)
116                         return NULL;
117                     if (table != so->table || entry->key != startkey)
118                         return set_lookkey(so, key, hash);
119                     if (cmp > 0)
120                         return entry;
121                     mask = so->mask;
122                 }
123             }
124         }
125 
126         perturb >>= PERTURB_SHIFT;
127         i = (i * 5 + 1 + perturb) & mask;
128 
129         entry = &so->table[i];
130         if (entry->key == NULL)
131             return entry;
132     }
133 }
134 
135 static int set_table_resize(PySetObject *, Py_ssize_t);
136 
137 static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)138 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
139 {
140     setentry *table;
141     setentry *freeslot;
142     setentry *entry;
143     size_t perturb;
144     size_t mask;
145     size_t i;                       /* Unsigned for defined overflow behavior */
146     size_t j;
147     int cmp;
148 
149     /* Pre-increment is necessary to prevent arbitrary code in the rich
150        comparison from deallocating the key just before the insertion. */
151     Py_INCREF(key);
152 
153   restart:
154 
155     mask = so->mask;
156     i = (size_t)hash & mask;
157 
158     entry = &so->table[i];
159     if (entry->key == NULL)
160         goto found_unused;
161 
162     freeslot = NULL;
163     perturb = hash;
164 
165     while (1) {
166         if (entry->hash == hash) {
167             PyObject *startkey = entry->key;
168             /* startkey cannot be a dummy because the dummy hash field is -1 */
169             assert(startkey != dummy);
170             if (startkey == key)
171                 goto found_active;
172             if (PyUnicode_CheckExact(startkey)
173                 && PyUnicode_CheckExact(key)
174                 && _PyUnicode_EQ(startkey, key))
175                 goto found_active;
176             table = so->table;
177             Py_INCREF(startkey);
178             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
179             Py_DECREF(startkey);
180             if (cmp > 0)                                          /* likely */
181                 goto found_active;
182             if (cmp < 0)
183                 goto comparison_error;
184             /* Continuing the search from the current entry only makes
185                sense if the table and entry are unchanged; otherwise,
186                we have to restart from the beginning */
187             if (table != so->table || entry->key != startkey)
188                 goto restart;
189             mask = so->mask;                 /* help avoid a register spill */
190         }
191         else if (entry->hash == -1)
192             freeslot = entry;
193 
194         if (i + LINEAR_PROBES <= mask) {
195             for (j = 0 ; j < LINEAR_PROBES ; j++) {
196                 entry++;
197                 if (entry->hash == 0 && entry->key == NULL)
198                     goto found_unused_or_dummy;
199                 if (entry->hash == hash) {
200                     PyObject *startkey = entry->key;
201                     assert(startkey != dummy);
202                     if (startkey == key)
203                         goto found_active;
204                     if (PyUnicode_CheckExact(startkey)
205                         && PyUnicode_CheckExact(key)
206                         && _PyUnicode_EQ(startkey, key))
207                         goto found_active;
208                     table = so->table;
209                     Py_INCREF(startkey);
210                     cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
211                     Py_DECREF(startkey);
212                     if (cmp > 0)
213                         goto found_active;
214                     if (cmp < 0)
215                         goto comparison_error;
216                     if (table != so->table || entry->key != startkey)
217                         goto restart;
218                     mask = so->mask;
219                 }
220                 else if (entry->hash == -1)
221                     freeslot = entry;
222             }
223         }
224 
225         perturb >>= PERTURB_SHIFT;
226         i = (i * 5 + 1 + perturb) & mask;
227 
228         entry = &so->table[i];
229         if (entry->key == NULL)
230             goto found_unused_or_dummy;
231     }
232 
233   found_unused_or_dummy:
234     if (freeslot == NULL)
235         goto found_unused;
236     so->used++;
237     freeslot->key = key;
238     freeslot->hash = hash;
239     return 0;
240 
241   found_unused:
242     so->fill++;
243     so->used++;
244     entry->key = key;
245     entry->hash = hash;
246     if ((size_t)so->fill*5 < mask*3)
247         return 0;
248     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
249 
250   found_active:
251     Py_DECREF(key);
252     return 0;
253 
254   comparison_error:
255     Py_DECREF(key);
256     return -1;
257 }
258 
259 /*
260 Internal routine used by set_table_resize() to insert an item which is
261 known to be absent from the set.  This routine also assumes that
262 the set contains no deleted entries.  Besides the performance benefit,
263 there is also safety benefit since using set_add_entry() risks making
264 a callback in the middle of a set_table_resize(), see issue 1456209.
265 The caller is responsible for updating the key's reference count and
266 the setobject's fill and used fields.
267 */
268 static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)269 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
270 {
271     setentry *entry;
272     size_t perturb = hash;
273     size_t i = (size_t)hash & mask;
274     size_t j;
275 
276     while (1) {
277         entry = &table[i];
278         if (entry->key == NULL)
279             goto found_null;
280         if (i + LINEAR_PROBES <= mask) {
281             for (j = 0; j < LINEAR_PROBES; j++) {
282                 entry++;
283                 if (entry->key == NULL)
284                     goto found_null;
285             }
286         }
287         perturb >>= PERTURB_SHIFT;
288         i = (i * 5 + 1 + perturb) & mask;
289     }
290   found_null:
291     entry->key = key;
292     entry->hash = hash;
293 }
294 
295 /* ======== End logic for probing the hash table ========================== */
296 /* ======================================================================== */
297 
298 /*
299 Restructure the table by allocating a new table and reinserting all
300 keys again.  When entries have been deleted, the new table may
301 actually be smaller than the old one.
302 */
303 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)304 set_table_resize(PySetObject *so, Py_ssize_t minused)
305 {
306     setentry *oldtable, *newtable, *entry;
307     Py_ssize_t oldmask = so->mask;
308     size_t newmask;
309     int is_oldtable_malloced;
310     setentry small_copy[PySet_MINSIZE];
311 
312     assert(minused >= 0);
313 
314     /* Find the smallest table size > minused. */
315     /* XXX speed-up with intrinsics */
316     size_t newsize = PySet_MINSIZE;
317     while (newsize <= (size_t)minused) {
318         newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
319     }
320 
321     /* Get space for a new table. */
322     oldtable = so->table;
323     assert(oldtable != NULL);
324     is_oldtable_malloced = oldtable != so->smalltable;
325 
326     if (newsize == PySet_MINSIZE) {
327         /* A large table is shrinking, or we can't get any smaller. */
328         newtable = so->smalltable;
329         if (newtable == oldtable) {
330             if (so->fill == so->used) {
331                 /* No dummies, so no point doing anything. */
332                 return 0;
333             }
334             /* We're not going to resize it, but rebuild the
335                table anyway to purge old dummy entries.
336                Subtle:  This is *necessary* if fill==size,
337                as set_lookkey needs at least one virgin slot to
338                terminate failing searches.  If fill < size, it's
339                merely desirable, as dummies slow searches. */
340             assert(so->fill > so->used);
341             memcpy(small_copy, oldtable, sizeof(small_copy));
342             oldtable = small_copy;
343         }
344     }
345     else {
346         newtable = PyMem_NEW(setentry, newsize);
347         if (newtable == NULL) {
348             PyErr_NoMemory();
349             return -1;
350         }
351     }
352 
353     /* Make the set empty, using the new table. */
354     assert(newtable != oldtable);
355     memset(newtable, 0, sizeof(setentry) * newsize);
356     so->mask = newsize - 1;
357     so->table = newtable;
358 
359     /* Copy the data over; this is refcount-neutral for active entries;
360        dummy entries aren't copied over, of course */
361     newmask = (size_t)so->mask;
362     if (so->fill == so->used) {
363         for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
364             if (entry->key != NULL) {
365                 set_insert_clean(newtable, newmask, entry->key, entry->hash);
366             }
367         }
368     } else {
369         so->fill = so->used;
370         for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
371             if (entry->key != NULL && entry->key != dummy) {
372                 set_insert_clean(newtable, newmask, entry->key, entry->hash);
373             }
374         }
375     }
376 
377     if (is_oldtable_malloced)
378         PyMem_DEL(oldtable);
379     return 0;
380 }
381 
382 static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)383 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
384 {
385     setentry *entry;
386 
387     entry = set_lookkey(so, key, hash);
388     if (entry != NULL)
389         return entry->key != NULL;
390     return -1;
391 }
392 
393 #define DISCARD_NOTFOUND 0
394 #define DISCARD_FOUND 1
395 
396 static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)397 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
398 {
399     setentry *entry;
400     PyObject *old_key;
401 
402     entry = set_lookkey(so, key, hash);
403     if (entry == NULL)
404         return -1;
405     if (entry->key == NULL)
406         return DISCARD_NOTFOUND;
407     old_key = entry->key;
408     entry->key = dummy;
409     entry->hash = -1;
410     so->used--;
411     Py_DECREF(old_key);
412     return DISCARD_FOUND;
413 }
414 
415 static int
set_add_key(PySetObject * so,PyObject * key)416 set_add_key(PySetObject *so, PyObject *key)
417 {
418     Py_hash_t hash;
419 
420     if (!PyUnicode_CheckExact(key) ||
421         (hash = ((PyASCIIObject *) key)->hash) == -1) {
422         hash = PyObject_Hash(key);
423         if (hash == -1)
424             return -1;
425     }
426     return set_add_entry(so, key, hash);
427 }
428 
429 static int
set_contains_key(PySetObject * so,PyObject * key)430 set_contains_key(PySetObject *so, PyObject *key)
431 {
432     Py_hash_t hash;
433 
434     if (!PyUnicode_CheckExact(key) ||
435         (hash = ((PyASCIIObject *) key)->hash) == -1) {
436         hash = PyObject_Hash(key);
437         if (hash == -1)
438             return -1;
439     }
440     return set_contains_entry(so, key, hash);
441 }
442 
443 static int
set_discard_key(PySetObject * so,PyObject * key)444 set_discard_key(PySetObject *so, PyObject *key)
445 {
446     Py_hash_t hash;
447 
448     if (!PyUnicode_CheckExact(key) ||
449         (hash = ((PyASCIIObject *) key)->hash) == -1) {
450         hash = PyObject_Hash(key);
451         if (hash == -1)
452             return -1;
453     }
454     return set_discard_entry(so, key, hash);
455 }
456 
457 static void
set_empty_to_minsize(PySetObject * so)458 set_empty_to_minsize(PySetObject *so)
459 {
460     memset(so->smalltable, 0, sizeof(so->smalltable));
461     so->fill = 0;
462     so->used = 0;
463     so->mask = PySet_MINSIZE - 1;
464     so->table = so->smalltable;
465     so->hash = -1;
466 }
467 
468 static int
set_clear_internal(PySetObject * so)469 set_clear_internal(PySetObject *so)
470 {
471     setentry *entry;
472     setentry *table = so->table;
473     Py_ssize_t fill = so->fill;
474     Py_ssize_t used = so->used;
475     int table_is_malloced = table != so->smalltable;
476     setentry small_copy[PySet_MINSIZE];
477 
478     assert (PyAnySet_Check(so));
479     assert(table != NULL);
480 
481     /* This is delicate.  During the process of clearing the set,
482      * decrefs can cause the set to mutate.  To avoid fatal confusion
483      * (voice of experience), we have to make the set empty before
484      * clearing the slots, and never refer to anything via so->ref while
485      * clearing.
486      */
487     if (table_is_malloced)
488         set_empty_to_minsize(so);
489 
490     else if (fill > 0) {
491         /* It's a small table with something that needs to be cleared.
492          * Afraid the only safe way is to copy the set entries into
493          * another small table first.
494          */
495         memcpy(small_copy, table, sizeof(small_copy));
496         table = small_copy;
497         set_empty_to_minsize(so);
498     }
499     /* else it's a small table that's already empty */
500 
501     /* Now we can finally clear things.  If C had refcounts, we could
502      * assert that the refcount on table is 1 now, i.e. that this function
503      * has unique access to it, so decref side-effects can't alter it.
504      */
505     for (entry = table; used > 0; entry++) {
506         if (entry->key && entry->key != dummy) {
507             used--;
508             Py_DECREF(entry->key);
509         }
510     }
511 
512     if (table_is_malloced)
513         PyMem_DEL(table);
514     return 0;
515 }
516 
517 /*
518  * Iterate over a set table.  Use like so:
519  *
520  *     Py_ssize_t pos;
521  *     setentry *entry;
522  *     pos = 0;   # important!  pos should not otherwise be changed by you
523  *     while (set_next(yourset, &pos, &entry)) {
524  *              Refer to borrowed reference in entry->key.
525  *     }
526  *
527  * CAUTION:  In general, it isn't safe to use set_next in a loop that
528  * mutates the table.
529  */
530 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)531 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
532 {
533     Py_ssize_t i;
534     Py_ssize_t mask;
535     setentry *entry;
536 
537     assert (PyAnySet_Check(so));
538     i = *pos_ptr;
539     assert(i >= 0);
540     mask = so->mask;
541     entry = &so->table[i];
542     while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
543         i++;
544         entry++;
545     }
546     *pos_ptr = i+1;
547     if (i > mask)
548         return 0;
549     assert(entry != NULL);
550     *entry_ptr = entry;
551     return 1;
552 }
553 
554 static void
set_dealloc(PySetObject * so)555 set_dealloc(PySetObject *so)
556 {
557     setentry *entry;
558     Py_ssize_t used = so->used;
559 
560     /* bpo-31095: UnTrack is needed before calling any callbacks */
561     PyObject_GC_UnTrack(so);
562     Py_TRASHCAN_BEGIN(so, set_dealloc)
563     if (so->weakreflist != NULL)
564         PyObject_ClearWeakRefs((PyObject *) so);
565 
566     for (entry = so->table; used > 0; entry++) {
567         if (entry->key && entry->key != dummy) {
568                 used--;
569                 Py_DECREF(entry->key);
570         }
571     }
572     if (so->table != so->smalltable)
573         PyMem_DEL(so->table);
574     Py_TYPE(so)->tp_free(so);
575     Py_TRASHCAN_END
576 }
577 
578 static PyObject *
set_repr(PySetObject * so)579 set_repr(PySetObject *so)
580 {
581     PyObject *result=NULL, *keys, *listrepr, *tmp;
582     int status = Py_ReprEnter((PyObject*)so);
583 
584     if (status != 0) {
585         if (status < 0)
586             return NULL;
587         return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
588     }
589 
590     /* shortcut for the empty set */
591     if (!so->used) {
592         Py_ReprLeave((PyObject*)so);
593         return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
594     }
595 
596     keys = PySequence_List((PyObject *)so);
597     if (keys == NULL)
598         goto done;
599 
600     /* repr(keys)[1:-1] */
601     listrepr = PyObject_Repr(keys);
602     Py_DECREF(keys);
603     if (listrepr == NULL)
604         goto done;
605     tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
606     Py_DECREF(listrepr);
607     if (tmp == NULL)
608         goto done;
609     listrepr = tmp;
610 
611     if (Py_TYPE(so) != &PySet_Type)
612         result = PyUnicode_FromFormat("%s({%U})",
613                                       Py_TYPE(so)->tp_name,
614                                       listrepr);
615     else
616         result = PyUnicode_FromFormat("{%U}", listrepr);
617     Py_DECREF(listrepr);
618 done:
619     Py_ReprLeave((PyObject*)so);
620     return result;
621 }
622 
623 static Py_ssize_t
set_len(PyObject * so)624 set_len(PyObject *so)
625 {
626     return ((PySetObject *)so)->used;
627 }
628 
629 static int
set_merge(PySetObject * so,PyObject * otherset)630 set_merge(PySetObject *so, PyObject *otherset)
631 {
632     PySetObject *other;
633     PyObject *key;
634     Py_ssize_t i;
635     setentry *so_entry;
636     setentry *other_entry;
637 
638     assert (PyAnySet_Check(so));
639     assert (PyAnySet_Check(otherset));
640 
641     other = (PySetObject*)otherset;
642     if (other == so || other->used == 0)
643         /* a.update(a) or a.update(set()); nothing to do */
644         return 0;
645     /* Do one big resize at the start, rather than
646      * incrementally resizing as we insert new keys.  Expect
647      * that there will be no (or few) overlapping keys.
648      */
649     if ((so->fill + other->used)*5 >= so->mask*3) {
650         if (set_table_resize(so, (so->used + other->used)*2) != 0)
651             return -1;
652     }
653     so_entry = so->table;
654     other_entry = other->table;
655 
656     /* If our table is empty, and both tables have the same size, and
657        there are no dummies to eliminate, then just copy the pointers. */
658     if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
659         for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
660             key = other_entry->key;
661             if (key != NULL) {
662                 assert(so_entry->key == NULL);
663                 Py_INCREF(key);
664                 so_entry->key = key;
665                 so_entry->hash = other_entry->hash;
666             }
667         }
668         so->fill = other->fill;
669         so->used = other->used;
670         return 0;
671     }
672 
673     /* If our table is empty, we can use set_insert_clean() */
674     if (so->fill == 0) {
675         setentry *newtable = so->table;
676         size_t newmask = (size_t)so->mask;
677         so->fill = other->used;
678         so->used = other->used;
679         for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
680             key = other_entry->key;
681             if (key != NULL && key != dummy) {
682                 Py_INCREF(key);
683                 set_insert_clean(newtable, newmask, key, other_entry->hash);
684             }
685         }
686         return 0;
687     }
688 
689     /* We can't assure there are no duplicates, so do normal insertions */
690     for (i = 0; i <= other->mask; i++) {
691         other_entry = &other->table[i];
692         key = other_entry->key;
693         if (key != NULL && key != dummy) {
694             if (set_add_entry(so, key, other_entry->hash))
695                 return -1;
696         }
697     }
698     return 0;
699 }
700 
701 static PyObject *
set_pop(PySetObject * so,PyObject * Py_UNUSED (ignored))702 set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored))
703 {
704     /* Make sure the search finger is in bounds */
705     setentry *entry = so->table + (so->finger & so->mask);
706     setentry *limit = so->table + so->mask;
707     PyObject *key;
708 
709     if (so->used == 0) {
710         PyErr_SetString(PyExc_KeyError, "pop from an empty set");
711         return NULL;
712     }
713     while (entry->key == NULL || entry->key==dummy) {
714         entry++;
715         if (entry > limit)
716             entry = so->table;
717     }
718     key = entry->key;
719     entry->key = dummy;
720     entry->hash = -1;
721     so->used--;
722     so->finger = entry - so->table + 1;   /* next place to start */
723     return key;
724 }
725 
726 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
727 Raises KeyError if the set is empty.");
728 
729 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)730 set_traverse(PySetObject *so, visitproc visit, void *arg)
731 {
732     Py_ssize_t pos = 0;
733     setentry *entry;
734 
735     while (set_next(so, &pos, &entry))
736         Py_VISIT(entry->key);
737     return 0;
738 }
739 
740 /* Work to increase the bit dispersion for closely spaced hash values.
741    This is important because some use cases have many combinations of a
742    small number of elements with nearby hashes so that many distinct
743    combinations collapse to only a handful of distinct hash values. */
744 
745 static Py_uhash_t
_shuffle_bits(Py_uhash_t h)746 _shuffle_bits(Py_uhash_t h)
747 {
748     return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
749 }
750 
751 /* Most of the constants in this hash algorithm are randomly chosen
752    large primes with "interesting bit patterns" and that passed tests
753    for good collision statistics on a variety of problematic datasets
754    including powersets and graph structures (such as David Eppstein's
755    graph recipes in Lib/test/test_set.py) */
756 
757 static Py_hash_t
frozenset_hash(PyObject * self)758 frozenset_hash(PyObject *self)
759 {
760     PySetObject *so = (PySetObject *)self;
761     Py_uhash_t hash = 0;
762     setentry *entry;
763 
764     if (so->hash != -1)
765         return so->hash;
766 
767     /* Xor-in shuffled bits from every entry's hash field because xor is
768        commutative and a frozenset hash should be independent of order.
769 
770        For speed, include null entries and dummy entries and then
771        subtract out their effect afterwards so that the final hash
772        depends only on active entries.  This allows the code to be
773        vectorized by the compiler and it saves the unpredictable
774        branches that would arise when trying to exclude null and dummy
775        entries on every iteration. */
776 
777     for (entry = so->table; entry <= &so->table[so->mask]; entry++)
778         hash ^= _shuffle_bits(entry->hash);
779 
780     /* Remove the effect of an odd number of NULL entries */
781     if ((so->mask + 1 - so->fill) & 1)
782         hash ^= _shuffle_bits(0);
783 
784     /* Remove the effect of an odd number of dummy entries */
785     if ((so->fill - so->used) & 1)
786         hash ^= _shuffle_bits(-1);
787 
788     /* Factor in the number of active entries */
789     hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
790 
791     /* Disperse patterns arising in nested frozensets */
792     hash ^= (hash >> 11) ^ (hash >> 25);
793     hash = hash * 69069U + 907133923UL;
794 
795     /* -1 is reserved as an error code */
796     if (hash == (Py_uhash_t)-1)
797         hash = 590923713UL;
798 
799     so->hash = hash;
800     return hash;
801 }
802 
803 /***** Set iterator type ***********************************************/
804 
805 typedef struct {
806     PyObject_HEAD
807     PySetObject *si_set; /* Set to NULL when iterator is exhausted */
808     Py_ssize_t si_used;
809     Py_ssize_t si_pos;
810     Py_ssize_t len;
811 } setiterobject;
812 
813 static void
setiter_dealloc(setiterobject * si)814 setiter_dealloc(setiterobject *si)
815 {
816     /* bpo-31095: UnTrack is needed before calling any callbacks */
817     _PyObject_GC_UNTRACK(si);
818     Py_XDECREF(si->si_set);
819     PyObject_GC_Del(si);
820 }
821 
822 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)823 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
824 {
825     Py_VISIT(si->si_set);
826     return 0;
827 }
828 
829 static PyObject *
setiter_len(setiterobject * si,PyObject * Py_UNUSED (ignored))830 setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored))
831 {
832     Py_ssize_t len = 0;
833     if (si->si_set != NULL && si->si_used == si->si_set->used)
834         len = si->len;
835     return PyLong_FromSsize_t(len);
836 }
837 
838 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
839 
840 static PyObject *setiter_iternext(setiterobject *si);
841 
842 static PyObject *
setiter_reduce(setiterobject * si,PyObject * Py_UNUSED (ignored))843 setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored))
844 {
845     _Py_IDENTIFIER(iter);
846     /* copy the iterator state */
847     setiterobject tmp = *si;
848     Py_XINCREF(tmp.si_set);
849 
850     /* iterate the temporary into a list */
851     PyObject *list = PySequence_List((PyObject*)&tmp);
852     Py_XDECREF(tmp.si_set);
853     if (list == NULL) {
854         return NULL;
855     }
856     return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), list);
857 }
858 
859 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
860 
861 static PyMethodDef setiter_methods[] = {
862     {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
863     {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
864     {NULL,              NULL}           /* sentinel */
865 };
866 
setiter_iternext(setiterobject * si)867 static PyObject *setiter_iternext(setiterobject *si)
868 {
869     PyObject *key;
870     Py_ssize_t i, mask;
871     setentry *entry;
872     PySetObject *so = si->si_set;
873 
874     if (so == NULL)
875         return NULL;
876     assert (PyAnySet_Check(so));
877 
878     if (si->si_used != so->used) {
879         PyErr_SetString(PyExc_RuntimeError,
880                         "Set changed size during iteration");
881         si->si_used = -1; /* Make this state sticky */
882         return NULL;
883     }
884 
885     i = si->si_pos;
886     assert(i>=0);
887     entry = so->table;
888     mask = so->mask;
889     while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
890         i++;
891     si->si_pos = i+1;
892     if (i > mask)
893         goto fail;
894     si->len--;
895     key = entry[i].key;
896     Py_INCREF(key);
897     return key;
898 
899 fail:
900     si->si_set = NULL;
901     Py_DECREF(so);
902     return NULL;
903 }
904 
905 PyTypeObject PySetIter_Type = {
906     PyVarObject_HEAD_INIT(&PyType_Type, 0)
907     "set_iterator",                             /* tp_name */
908     sizeof(setiterobject),                      /* tp_basicsize */
909     0,                                          /* tp_itemsize */
910     /* methods */
911     (destructor)setiter_dealloc,                /* tp_dealloc */
912     0,                                          /* tp_vectorcall_offset */
913     0,                                          /* tp_getattr */
914     0,                                          /* tp_setattr */
915     0,                                          /* tp_as_async */
916     0,                                          /* tp_repr */
917     0,                                          /* tp_as_number */
918     0,                                          /* tp_as_sequence */
919     0,                                          /* tp_as_mapping */
920     0,                                          /* tp_hash */
921     0,                                          /* tp_call */
922     0,                                          /* tp_str */
923     PyObject_GenericGetAttr,                    /* tp_getattro */
924     0,                                          /* tp_setattro */
925     0,                                          /* tp_as_buffer */
926     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
927     0,                                          /* tp_doc */
928     (traverseproc)setiter_traverse,             /* tp_traverse */
929     0,                                          /* tp_clear */
930     0,                                          /* tp_richcompare */
931     0,                                          /* tp_weaklistoffset */
932     PyObject_SelfIter,                          /* tp_iter */
933     (iternextfunc)setiter_iternext,             /* tp_iternext */
934     setiter_methods,                            /* tp_methods */
935     0,
936 };
937 
938 static PyObject *
set_iter(PySetObject * so)939 set_iter(PySetObject *so)
940 {
941     setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
942     if (si == NULL)
943         return NULL;
944     Py_INCREF(so);
945     si->si_set = so;
946     si->si_used = so->used;
947     si->si_pos = 0;
948     si->len = so->used;
949     _PyObject_GC_TRACK(si);
950     return (PyObject *)si;
951 }
952 
953 static int
set_update_internal(PySetObject * so,PyObject * other)954 set_update_internal(PySetObject *so, PyObject *other)
955 {
956     PyObject *key, *it;
957 
958     if (PyAnySet_Check(other))
959         return set_merge(so, other);
960 
961     if (PyDict_CheckExact(other)) {
962         PyObject *value;
963         Py_ssize_t pos = 0;
964         Py_hash_t hash;
965         Py_ssize_t dictsize = PyDict_GET_SIZE(other);
966 
967         /* Do one big resize at the start, rather than
968         * incrementally resizing as we insert new keys.  Expect
969         * that there will be no (or few) overlapping keys.
970         */
971         if (dictsize < 0)
972             return -1;
973         if ((so->fill + dictsize)*5 >= so->mask*3) {
974             if (set_table_resize(so, (so->used + dictsize)*2) != 0)
975                 return -1;
976         }
977         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
978             if (set_add_entry(so, key, hash))
979                 return -1;
980         }
981         return 0;
982     }
983 
984     it = PyObject_GetIter(other);
985     if (it == NULL)
986         return -1;
987 
988     while ((key = PyIter_Next(it)) != NULL) {
989         if (set_add_key(so, key)) {
990             Py_DECREF(it);
991             Py_DECREF(key);
992             return -1;
993         }
994         Py_DECREF(key);
995     }
996     Py_DECREF(it);
997     if (PyErr_Occurred())
998         return -1;
999     return 0;
1000 }
1001 
1002 static PyObject *
set_update(PySetObject * so,PyObject * args)1003 set_update(PySetObject *so, PyObject *args)
1004 {
1005     Py_ssize_t i;
1006 
1007     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1008         PyObject *other = PyTuple_GET_ITEM(args, i);
1009         if (set_update_internal(so, other))
1010             return NULL;
1011     }
1012     Py_RETURN_NONE;
1013 }
1014 
1015 PyDoc_STRVAR(update_doc,
1016 "Update a set with the union of itself and others.");
1017 
1018 /* XXX Todo:
1019    If aligned memory allocations become available, make the
1020    set object 64 byte aligned so that most of the fields
1021    can be retrieved or updated in a single cache line.
1022 */
1023 
1024 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)1025 make_new_set(PyTypeObject *type, PyObject *iterable)
1026 {
1027     PySetObject *so;
1028 
1029     so = (PySetObject *)type->tp_alloc(type, 0);
1030     if (so == NULL)
1031         return NULL;
1032 
1033     so->fill = 0;
1034     so->used = 0;
1035     so->mask = PySet_MINSIZE - 1;
1036     so->table = so->smalltable;
1037     so->hash = -1;
1038     so->finger = 0;
1039     so->weakreflist = NULL;
1040 
1041     if (iterable != NULL) {
1042         if (set_update_internal(so, iterable)) {
1043             Py_DECREF(so);
1044             return NULL;
1045         }
1046     }
1047 
1048     return (PyObject *)so;
1049 }
1050 
1051 static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)1052 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
1053 {
1054     if (type != &PySet_Type && type != &PyFrozenSet_Type) {
1055         if (PyType_IsSubtype(type, &PySet_Type))
1056             type = &PySet_Type;
1057         else
1058             type = &PyFrozenSet_Type;
1059     }
1060     return make_new_set(type, iterable);
1061 }
1062 
1063 /* The empty frozenset is a singleton */
1064 static PyObject *emptyfrozenset = NULL;
1065 
1066 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1067 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1068 {
1069     PyObject *iterable = NULL, *result;
1070 
1071     if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset", kwds))
1072         return NULL;
1073 
1074     if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
1075         return NULL;
1076 
1077     if (type != &PyFrozenSet_Type)
1078         return make_new_set(type, iterable);
1079 
1080     if (iterable != NULL) {
1081         /* frozenset(f) is idempotent */
1082         if (PyFrozenSet_CheckExact(iterable)) {
1083             Py_INCREF(iterable);
1084             return iterable;
1085         }
1086         result = make_new_set(type, iterable);
1087         if (result == NULL || PySet_GET_SIZE(result))
1088             return result;
1089         Py_DECREF(result);
1090     }
1091     /* The empty frozenset is a singleton */
1092     if (emptyfrozenset == NULL)
1093         emptyfrozenset = make_new_set(type, NULL);
1094     Py_XINCREF(emptyfrozenset);
1095     return emptyfrozenset;
1096 }
1097 
1098 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1099 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1100 {
1101     return make_new_set(type, NULL);
1102 }
1103 
1104 /* set_swap_bodies() switches the contents of any two sets by moving their
1105    internal data pointers and, if needed, copying the internal smalltables.
1106    Semantically equivalent to:
1107 
1108      t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1109 
1110    The function always succeeds and it leaves both objects in a stable state.
1111    Useful for operations that update in-place (by allowing an intermediate
1112    result to be swapped into one of the original inputs).
1113 */
1114 
1115 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1116 set_swap_bodies(PySetObject *a, PySetObject *b)
1117 {
1118     Py_ssize_t t;
1119     setentry *u;
1120     setentry tab[PySet_MINSIZE];
1121     Py_hash_t h;
1122 
1123     t = a->fill;     a->fill   = b->fill;        b->fill  = t;
1124     t = a->used;     a->used   = b->used;        b->used  = t;
1125     t = a->mask;     a->mask   = b->mask;        b->mask  = t;
1126 
1127     u = a->table;
1128     if (a->table == a->smalltable)
1129         u = b->smalltable;
1130     a->table  = b->table;
1131     if (b->table == b->smalltable)
1132         a->table = a->smalltable;
1133     b->table = u;
1134 
1135     if (a->table == a->smalltable || b->table == b->smalltable) {
1136         memcpy(tab, a->smalltable, sizeof(tab));
1137         memcpy(a->smalltable, b->smalltable, sizeof(tab));
1138         memcpy(b->smalltable, tab, sizeof(tab));
1139     }
1140 
1141     if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type)  &&
1142         PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1143         h = a->hash;     a->hash = b->hash;  b->hash = h;
1144     } else {
1145         a->hash = -1;
1146         b->hash = -1;
1147     }
1148 }
1149 
1150 static PyObject *
set_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1151 set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1152 {
1153     return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1154 }
1155 
1156 static PyObject *
frozenset_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1157 frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1158 {
1159     if (PyFrozenSet_CheckExact(so)) {
1160         Py_INCREF(so);
1161         return (PyObject *)so;
1162     }
1163     return set_copy(so, NULL);
1164 }
1165 
1166 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1167 
1168 static PyObject *
set_clear(PySetObject * so,PyObject * Py_UNUSED (ignored))1169 set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored))
1170 {
1171     set_clear_internal(so);
1172     Py_RETURN_NONE;
1173 }
1174 
1175 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1176 
1177 static PyObject *
set_union(PySetObject * so,PyObject * args)1178 set_union(PySetObject *so, PyObject *args)
1179 {
1180     PySetObject *result;
1181     PyObject *other;
1182     Py_ssize_t i;
1183 
1184     result = (PySetObject *)set_copy(so, NULL);
1185     if (result == NULL)
1186         return NULL;
1187 
1188     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1189         other = PyTuple_GET_ITEM(args, i);
1190         if ((PyObject *)so == other)
1191             continue;
1192         if (set_update_internal(result, other)) {
1193             Py_DECREF(result);
1194             return NULL;
1195         }
1196     }
1197     return (PyObject *)result;
1198 }
1199 
1200 PyDoc_STRVAR(union_doc,
1201  "Return the union of sets as a new set.\n\
1202 \n\
1203 (i.e. all elements that are in either set.)");
1204 
1205 static PyObject *
set_or(PySetObject * so,PyObject * other)1206 set_or(PySetObject *so, PyObject *other)
1207 {
1208     PySetObject *result;
1209 
1210     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1211         Py_RETURN_NOTIMPLEMENTED;
1212 
1213     result = (PySetObject *)set_copy(so, NULL);
1214     if (result == NULL)
1215         return NULL;
1216     if ((PyObject *)so == other)
1217         return (PyObject *)result;
1218     if (set_update_internal(result, other)) {
1219         Py_DECREF(result);
1220         return NULL;
1221     }
1222     return (PyObject *)result;
1223 }
1224 
1225 static PyObject *
set_ior(PySetObject * so,PyObject * other)1226 set_ior(PySetObject *so, PyObject *other)
1227 {
1228     if (!PyAnySet_Check(other))
1229         Py_RETURN_NOTIMPLEMENTED;
1230 
1231     if (set_update_internal(so, other))
1232         return NULL;
1233     Py_INCREF(so);
1234     return (PyObject *)so;
1235 }
1236 
1237 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1238 set_intersection(PySetObject *so, PyObject *other)
1239 {
1240     PySetObject *result;
1241     PyObject *key, *it, *tmp;
1242     Py_hash_t hash;
1243     int rv;
1244 
1245     if ((PyObject *)so == other)
1246         return set_copy(so, NULL);
1247 
1248     result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1249     if (result == NULL)
1250         return NULL;
1251 
1252     if (PyAnySet_Check(other)) {
1253         Py_ssize_t pos = 0;
1254         setentry *entry;
1255 
1256         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1257             tmp = (PyObject *)so;
1258             so = (PySetObject *)other;
1259             other = tmp;
1260         }
1261 
1262         while (set_next((PySetObject *)other, &pos, &entry)) {
1263             key = entry->key;
1264             hash = entry->hash;
1265             rv = set_contains_entry(so, key, hash);
1266             if (rv < 0) {
1267                 Py_DECREF(result);
1268                 return NULL;
1269             }
1270             if (rv) {
1271                 if (set_add_entry(result, key, hash)) {
1272                     Py_DECREF(result);
1273                     return NULL;
1274                 }
1275             }
1276         }
1277         return (PyObject *)result;
1278     }
1279 
1280     it = PyObject_GetIter(other);
1281     if (it == NULL) {
1282         Py_DECREF(result);
1283         return NULL;
1284     }
1285 
1286     while ((key = PyIter_Next(it)) != NULL) {
1287         hash = PyObject_Hash(key);
1288         if (hash == -1)
1289             goto error;
1290         rv = set_contains_entry(so, key, hash);
1291         if (rv < 0)
1292             goto error;
1293         if (rv) {
1294             if (set_add_entry(result, key, hash))
1295                 goto error;
1296         }
1297         Py_DECREF(key);
1298     }
1299     Py_DECREF(it);
1300     if (PyErr_Occurred()) {
1301         Py_DECREF(result);
1302         return NULL;
1303     }
1304     return (PyObject *)result;
1305   error:
1306     Py_DECREF(it);
1307     Py_DECREF(result);
1308     Py_DECREF(key);
1309     return NULL;
1310 }
1311 
1312 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1313 set_intersection_multi(PySetObject *so, PyObject *args)
1314 {
1315     Py_ssize_t i;
1316     PyObject *result = (PyObject *)so;
1317 
1318     if (PyTuple_GET_SIZE(args) == 0)
1319         return set_copy(so, NULL);
1320 
1321     Py_INCREF(so);
1322     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1323         PyObject *other = PyTuple_GET_ITEM(args, i);
1324         PyObject *newresult = set_intersection((PySetObject *)result, other);
1325         if (newresult == NULL) {
1326             Py_DECREF(result);
1327             return NULL;
1328         }
1329         Py_DECREF(result);
1330         result = newresult;
1331     }
1332     return result;
1333 }
1334 
1335 PyDoc_STRVAR(intersection_doc,
1336 "Return the intersection of two sets as a new set.\n\
1337 \n\
1338 (i.e. all elements that are in both sets.)");
1339 
1340 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1341 set_intersection_update(PySetObject *so, PyObject *other)
1342 {
1343     PyObject *tmp;
1344 
1345     tmp = set_intersection(so, other);
1346     if (tmp == NULL)
1347         return NULL;
1348     set_swap_bodies(so, (PySetObject *)tmp);
1349     Py_DECREF(tmp);
1350     Py_RETURN_NONE;
1351 }
1352 
1353 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1354 set_intersection_update_multi(PySetObject *so, PyObject *args)
1355 {
1356     PyObject *tmp;
1357 
1358     tmp = set_intersection_multi(so, args);
1359     if (tmp == NULL)
1360         return NULL;
1361     set_swap_bodies(so, (PySetObject *)tmp);
1362     Py_DECREF(tmp);
1363     Py_RETURN_NONE;
1364 }
1365 
1366 PyDoc_STRVAR(intersection_update_doc,
1367 "Update a set with the intersection of itself and another.");
1368 
1369 static PyObject *
set_and(PySetObject * so,PyObject * other)1370 set_and(PySetObject *so, PyObject *other)
1371 {
1372     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1373         Py_RETURN_NOTIMPLEMENTED;
1374     return set_intersection(so, other);
1375 }
1376 
1377 static PyObject *
set_iand(PySetObject * so,PyObject * other)1378 set_iand(PySetObject *so, PyObject *other)
1379 {
1380     PyObject *result;
1381 
1382     if (!PyAnySet_Check(other))
1383         Py_RETURN_NOTIMPLEMENTED;
1384     result = set_intersection_update(so, other);
1385     if (result == NULL)
1386         return NULL;
1387     Py_DECREF(result);
1388     Py_INCREF(so);
1389     return (PyObject *)so;
1390 }
1391 
1392 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1393 set_isdisjoint(PySetObject *so, PyObject *other)
1394 {
1395     PyObject *key, *it, *tmp;
1396     int rv;
1397 
1398     if ((PyObject *)so == other) {
1399         if (PySet_GET_SIZE(so) == 0)
1400             Py_RETURN_TRUE;
1401         else
1402             Py_RETURN_FALSE;
1403     }
1404 
1405     if (PyAnySet_CheckExact(other)) {
1406         Py_ssize_t pos = 0;
1407         setentry *entry;
1408 
1409         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1410             tmp = (PyObject *)so;
1411             so = (PySetObject *)other;
1412             other = tmp;
1413         }
1414         while (set_next((PySetObject *)other, &pos, &entry)) {
1415             rv = set_contains_entry(so, entry->key, entry->hash);
1416             if (rv < 0)
1417                 return NULL;
1418             if (rv)
1419                 Py_RETURN_FALSE;
1420         }
1421         Py_RETURN_TRUE;
1422     }
1423 
1424     it = PyObject_GetIter(other);
1425     if (it == NULL)
1426         return NULL;
1427 
1428     while ((key = PyIter_Next(it)) != NULL) {
1429         Py_hash_t hash = PyObject_Hash(key);
1430 
1431         if (hash == -1) {
1432             Py_DECREF(key);
1433             Py_DECREF(it);
1434             return NULL;
1435         }
1436         rv = set_contains_entry(so, key, hash);
1437         Py_DECREF(key);
1438         if (rv < 0) {
1439             Py_DECREF(it);
1440             return NULL;
1441         }
1442         if (rv) {
1443             Py_DECREF(it);
1444             Py_RETURN_FALSE;
1445         }
1446     }
1447     Py_DECREF(it);
1448     if (PyErr_Occurred())
1449         return NULL;
1450     Py_RETURN_TRUE;
1451 }
1452 
1453 PyDoc_STRVAR(isdisjoint_doc,
1454 "Return True if two sets have a null intersection.");
1455 
1456 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1457 set_difference_update_internal(PySetObject *so, PyObject *other)
1458 {
1459     if ((PyObject *)so == other)
1460         return set_clear_internal(so);
1461 
1462     if (PyAnySet_Check(other)) {
1463         setentry *entry;
1464         Py_ssize_t pos = 0;
1465 
1466         while (set_next((PySetObject *)other, &pos, &entry))
1467             if (set_discard_entry(so, entry->key, entry->hash) < 0)
1468                 return -1;
1469     } else {
1470         PyObject *key, *it;
1471         it = PyObject_GetIter(other);
1472         if (it == NULL)
1473             return -1;
1474 
1475         while ((key = PyIter_Next(it)) != NULL) {
1476             if (set_discard_key(so, key) < 0) {
1477                 Py_DECREF(it);
1478                 Py_DECREF(key);
1479                 return -1;
1480             }
1481             Py_DECREF(key);
1482         }
1483         Py_DECREF(it);
1484         if (PyErr_Occurred())
1485             return -1;
1486     }
1487     /* If more than 1/4th are dummies, then resize them away. */
1488     if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1489         return 0;
1490     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1491 }
1492 
1493 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1494 set_difference_update(PySetObject *so, PyObject *args)
1495 {
1496     Py_ssize_t i;
1497 
1498     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1499         PyObject *other = PyTuple_GET_ITEM(args, i);
1500         if (set_difference_update_internal(so, other))
1501             return NULL;
1502     }
1503     Py_RETURN_NONE;
1504 }
1505 
1506 PyDoc_STRVAR(difference_update_doc,
1507 "Remove all elements of another set from this set.");
1508 
1509 static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1510 set_copy_and_difference(PySetObject *so, PyObject *other)
1511 {
1512     PyObject *result;
1513 
1514     result = set_copy(so, NULL);
1515     if (result == NULL)
1516         return NULL;
1517     if (set_difference_update_internal((PySetObject *) result, other) == 0)
1518         return result;
1519     Py_DECREF(result);
1520     return NULL;
1521 }
1522 
1523 static PyObject *
set_difference(PySetObject * so,PyObject * other)1524 set_difference(PySetObject *so, PyObject *other)
1525 {
1526     PyObject *result;
1527     PyObject *key;
1528     Py_hash_t hash;
1529     setentry *entry;
1530     Py_ssize_t pos = 0, other_size;
1531     int rv;
1532 
1533     if (PyAnySet_Check(other)) {
1534         other_size = PySet_GET_SIZE(other);
1535     }
1536     else if (PyDict_CheckExact(other)) {
1537         other_size = PyDict_GET_SIZE(other);
1538     }
1539     else {
1540         return set_copy_and_difference(so, other);
1541     }
1542 
1543     /* If len(so) much more than len(other), it's more efficient to simply copy
1544      * so and then iterate other looking for common elements. */
1545     if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1546         return set_copy_and_difference(so, other);
1547     }
1548 
1549     result = make_new_set_basetype(Py_TYPE(so), NULL);
1550     if (result == NULL)
1551         return NULL;
1552 
1553     if (PyDict_CheckExact(other)) {
1554         while (set_next(so, &pos, &entry)) {
1555             key = entry->key;
1556             hash = entry->hash;
1557             rv = _PyDict_Contains(other, key, hash);
1558             if (rv < 0) {
1559                 Py_DECREF(result);
1560                 return NULL;
1561             }
1562             if (!rv) {
1563                 if (set_add_entry((PySetObject *)result, key, hash)) {
1564                     Py_DECREF(result);
1565                     return NULL;
1566                 }
1567             }
1568         }
1569         return result;
1570     }
1571 
1572     /* Iterate over so, checking for common elements in other. */
1573     while (set_next(so, &pos, &entry)) {
1574         key = entry->key;
1575         hash = entry->hash;
1576         rv = set_contains_entry((PySetObject *)other, key, hash);
1577         if (rv < 0) {
1578             Py_DECREF(result);
1579             return NULL;
1580         }
1581         if (!rv) {
1582             if (set_add_entry((PySetObject *)result, key, hash)) {
1583                 Py_DECREF(result);
1584                 return NULL;
1585             }
1586         }
1587     }
1588     return result;
1589 }
1590 
1591 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1592 set_difference_multi(PySetObject *so, PyObject *args)
1593 {
1594     Py_ssize_t i;
1595     PyObject *result, *other;
1596 
1597     if (PyTuple_GET_SIZE(args) == 0)
1598         return set_copy(so, NULL);
1599 
1600     other = PyTuple_GET_ITEM(args, 0);
1601     result = set_difference(so, other);
1602     if (result == NULL)
1603         return NULL;
1604 
1605     for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1606         other = PyTuple_GET_ITEM(args, i);
1607         if (set_difference_update_internal((PySetObject *)result, other)) {
1608             Py_DECREF(result);
1609             return NULL;
1610         }
1611     }
1612     return result;
1613 }
1614 
1615 PyDoc_STRVAR(difference_doc,
1616 "Return the difference of two or more sets as a new set.\n\
1617 \n\
1618 (i.e. all elements that are in this set but not the others.)");
1619 static PyObject *
set_sub(PySetObject * so,PyObject * other)1620 set_sub(PySetObject *so, PyObject *other)
1621 {
1622     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1623         Py_RETURN_NOTIMPLEMENTED;
1624     return set_difference(so, other);
1625 }
1626 
1627 static PyObject *
set_isub(PySetObject * so,PyObject * other)1628 set_isub(PySetObject *so, PyObject *other)
1629 {
1630     if (!PyAnySet_Check(other))
1631         Py_RETURN_NOTIMPLEMENTED;
1632     if (set_difference_update_internal(so, other))
1633         return NULL;
1634     Py_INCREF(so);
1635     return (PyObject *)so;
1636 }
1637 
1638 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1639 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1640 {
1641     PySetObject *otherset;
1642     PyObject *key;
1643     Py_ssize_t pos = 0;
1644     Py_hash_t hash;
1645     setentry *entry;
1646     int rv;
1647 
1648     if ((PyObject *)so == other)
1649         return set_clear(so, NULL);
1650 
1651     if (PyDict_CheckExact(other)) {
1652         PyObject *value;
1653         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1654             Py_INCREF(key);
1655             rv = set_discard_entry(so, key, hash);
1656             if (rv < 0) {
1657                 Py_DECREF(key);
1658                 return NULL;
1659             }
1660             if (rv == DISCARD_NOTFOUND) {
1661                 if (set_add_entry(so, key, hash)) {
1662                     Py_DECREF(key);
1663                     return NULL;
1664                 }
1665             }
1666             Py_DECREF(key);
1667         }
1668         Py_RETURN_NONE;
1669     }
1670 
1671     if (PyAnySet_Check(other)) {
1672         Py_INCREF(other);
1673         otherset = (PySetObject *)other;
1674     } else {
1675         otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1676         if (otherset == NULL)
1677             return NULL;
1678     }
1679 
1680     while (set_next(otherset, &pos, &entry)) {
1681         key = entry->key;
1682         hash = entry->hash;
1683         rv = set_discard_entry(so, key, hash);
1684         if (rv < 0) {
1685             Py_DECREF(otherset);
1686             return NULL;
1687         }
1688         if (rv == DISCARD_NOTFOUND) {
1689             if (set_add_entry(so, key, hash)) {
1690                 Py_DECREF(otherset);
1691                 return NULL;
1692             }
1693         }
1694     }
1695     Py_DECREF(otherset);
1696     Py_RETURN_NONE;
1697 }
1698 
1699 PyDoc_STRVAR(symmetric_difference_update_doc,
1700 "Update a set with the symmetric difference of itself and another.");
1701 
1702 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1703 set_symmetric_difference(PySetObject *so, PyObject *other)
1704 {
1705     PyObject *rv;
1706     PySetObject *otherset;
1707 
1708     otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1709     if (otherset == NULL)
1710         return NULL;
1711     rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1712     if (rv == NULL) {
1713         Py_DECREF(otherset);
1714         return NULL;
1715     }
1716     Py_DECREF(rv);
1717     return (PyObject *)otherset;
1718 }
1719 
1720 PyDoc_STRVAR(symmetric_difference_doc,
1721 "Return the symmetric difference of two sets as a new set.\n\
1722 \n\
1723 (i.e. all elements that are in exactly one of the sets.)");
1724 
1725 static PyObject *
set_xor(PySetObject * so,PyObject * other)1726 set_xor(PySetObject *so, PyObject *other)
1727 {
1728     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1729         Py_RETURN_NOTIMPLEMENTED;
1730     return set_symmetric_difference(so, other);
1731 }
1732 
1733 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1734 set_ixor(PySetObject *so, PyObject *other)
1735 {
1736     PyObject *result;
1737 
1738     if (!PyAnySet_Check(other))
1739         Py_RETURN_NOTIMPLEMENTED;
1740     result = set_symmetric_difference_update(so, other);
1741     if (result == NULL)
1742         return NULL;
1743     Py_DECREF(result);
1744     Py_INCREF(so);
1745     return (PyObject *)so;
1746 }
1747 
1748 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1749 set_issubset(PySetObject *so, PyObject *other)
1750 {
1751     setentry *entry;
1752     Py_ssize_t pos = 0;
1753     int rv;
1754 
1755     if (!PyAnySet_Check(other)) {
1756         PyObject *tmp, *result;
1757         tmp = make_new_set(&PySet_Type, other);
1758         if (tmp == NULL)
1759             return NULL;
1760         result = set_issubset(so, tmp);
1761         Py_DECREF(tmp);
1762         return result;
1763     }
1764     if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1765         Py_RETURN_FALSE;
1766 
1767     while (set_next(so, &pos, &entry)) {
1768         rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
1769         if (rv < 0)
1770             return NULL;
1771         if (!rv)
1772             Py_RETURN_FALSE;
1773     }
1774     Py_RETURN_TRUE;
1775 }
1776 
1777 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1778 
1779 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1780 set_issuperset(PySetObject *so, PyObject *other)
1781 {
1782     PyObject *tmp, *result;
1783 
1784     if (!PyAnySet_Check(other)) {
1785         tmp = make_new_set(&PySet_Type, other);
1786         if (tmp == NULL)
1787             return NULL;
1788         result = set_issuperset(so, tmp);
1789         Py_DECREF(tmp);
1790         return result;
1791     }
1792     return set_issubset((PySetObject *)other, (PyObject *)so);
1793 }
1794 
1795 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1796 
1797 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1798 set_richcompare(PySetObject *v, PyObject *w, int op)
1799 {
1800     PyObject *r1;
1801     int r2;
1802 
1803     if(!PyAnySet_Check(w))
1804         Py_RETURN_NOTIMPLEMENTED;
1805 
1806     switch (op) {
1807     case Py_EQ:
1808         if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1809             Py_RETURN_FALSE;
1810         if (v->hash != -1  &&
1811             ((PySetObject *)w)->hash != -1 &&
1812             v->hash != ((PySetObject *)w)->hash)
1813             Py_RETURN_FALSE;
1814         return set_issubset(v, w);
1815     case Py_NE:
1816         r1 = set_richcompare(v, w, Py_EQ);
1817         if (r1 == NULL)
1818             return NULL;
1819         r2 = PyObject_IsTrue(r1);
1820         Py_DECREF(r1);
1821         if (r2 < 0)
1822             return NULL;
1823         return PyBool_FromLong(!r2);
1824     case Py_LE:
1825         return set_issubset(v, w);
1826     case Py_GE:
1827         return set_issuperset(v, w);
1828     case Py_LT:
1829         if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1830             Py_RETURN_FALSE;
1831         return set_issubset(v, w);
1832     case Py_GT:
1833         if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1834             Py_RETURN_FALSE;
1835         return set_issuperset(v, w);
1836     }
1837     Py_RETURN_NOTIMPLEMENTED;
1838 }
1839 
1840 static PyObject *
set_add(PySetObject * so,PyObject * key)1841 set_add(PySetObject *so, PyObject *key)
1842 {
1843     if (set_add_key(so, key))
1844         return NULL;
1845     Py_RETURN_NONE;
1846 }
1847 
1848 PyDoc_STRVAR(add_doc,
1849 "Add an element to a set.\n\
1850 \n\
1851 This has no effect if the element is already present.");
1852 
1853 static int
set_contains(PySetObject * so,PyObject * key)1854 set_contains(PySetObject *so, PyObject *key)
1855 {
1856     PyObject *tmpkey;
1857     int rv;
1858 
1859     rv = set_contains_key(so, key);
1860     if (rv < 0) {
1861         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1862             return -1;
1863         PyErr_Clear();
1864         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1865         if (tmpkey == NULL)
1866             return -1;
1867         rv = set_contains_key(so, tmpkey);
1868         Py_DECREF(tmpkey);
1869     }
1870     return rv;
1871 }
1872 
1873 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1874 set_direct_contains(PySetObject *so, PyObject *key)
1875 {
1876     long result;
1877 
1878     result = set_contains(so, key);
1879     if (result < 0)
1880         return NULL;
1881     return PyBool_FromLong(result);
1882 }
1883 
1884 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1885 
1886 static PyObject *
set_remove(PySetObject * so,PyObject * key)1887 set_remove(PySetObject *so, PyObject *key)
1888 {
1889     PyObject *tmpkey;
1890     int rv;
1891 
1892     rv = set_discard_key(so, key);
1893     if (rv < 0) {
1894         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1895             return NULL;
1896         PyErr_Clear();
1897         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1898         if (tmpkey == NULL)
1899             return NULL;
1900         rv = set_discard_key(so, tmpkey);
1901         Py_DECREF(tmpkey);
1902         if (rv < 0)
1903             return NULL;
1904     }
1905 
1906     if (rv == DISCARD_NOTFOUND) {
1907         _PyErr_SetKeyError(key);
1908         return NULL;
1909     }
1910     Py_RETURN_NONE;
1911 }
1912 
1913 PyDoc_STRVAR(remove_doc,
1914 "Remove an element from a set; it must be a member.\n\
1915 \n\
1916 If the element is not a member, raise a KeyError.");
1917 
1918 static PyObject *
set_discard(PySetObject * so,PyObject * key)1919 set_discard(PySetObject *so, PyObject *key)
1920 {
1921     PyObject *tmpkey;
1922     int rv;
1923 
1924     rv = set_discard_key(so, key);
1925     if (rv < 0) {
1926         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1927             return NULL;
1928         PyErr_Clear();
1929         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1930         if (tmpkey == NULL)
1931             return NULL;
1932         rv = set_discard_key(so, tmpkey);
1933         Py_DECREF(tmpkey);
1934         if (rv < 0)
1935             return NULL;
1936     }
1937     Py_RETURN_NONE;
1938 }
1939 
1940 PyDoc_STRVAR(discard_doc,
1941 "Remove an element from a set if it is a member.\n\
1942 \n\
1943 If the element is not a member, do nothing.");
1944 
1945 static PyObject *
set_reduce(PySetObject * so,PyObject * Py_UNUSED (ignored))1946 set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored))
1947 {
1948     PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1949     _Py_IDENTIFIER(__dict__);
1950 
1951     keys = PySequence_List((PyObject *)so);
1952     if (keys == NULL)
1953         goto done;
1954     args = PyTuple_Pack(1, keys);
1955     if (args == NULL)
1956         goto done;
1957     if (_PyObject_LookupAttrId((PyObject *)so, &PyId___dict__, &dict) < 0) {
1958         goto done;
1959     }
1960     if (dict == NULL) {
1961         dict = Py_None;
1962         Py_INCREF(dict);
1963     }
1964     result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1965 done:
1966     Py_XDECREF(args);
1967     Py_XDECREF(keys);
1968     Py_XDECREF(dict);
1969     return result;
1970 }
1971 
1972 static PyObject *
set_sizeof(PySetObject * so,PyObject * Py_UNUSED (ignored))1973 set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored))
1974 {
1975     Py_ssize_t res;
1976 
1977     res = _PyObject_SIZE(Py_TYPE(so));
1978     if (so->table != so->smalltable)
1979         res = res + (so->mask + 1) * sizeof(setentry);
1980     return PyLong_FromSsize_t(res);
1981 }
1982 
1983 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1984 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1985 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1986 {
1987     PyObject *iterable = NULL;
1988 
1989     if (!_PyArg_NoKeywords("set", kwds))
1990         return -1;
1991     if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
1992         return -1;
1993     if (self->fill)
1994         set_clear_internal(self);
1995     self->hash = -1;
1996     if (iterable == NULL)
1997         return 0;
1998     return set_update_internal(self, iterable);
1999 }
2000 
2001 static PySequenceMethods set_as_sequence = {
2002     set_len,                            /* sq_length */
2003     0,                                  /* sq_concat */
2004     0,                                  /* sq_repeat */
2005     0,                                  /* sq_item */
2006     0,                                  /* sq_slice */
2007     0,                                  /* sq_ass_item */
2008     0,                                  /* sq_ass_slice */
2009     (objobjproc)set_contains,           /* sq_contains */
2010 };
2011 
2012 /* set object ********************************************************/
2013 
2014 #ifdef Py_DEBUG
2015 static PyObject *test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored));
2016 
2017 PyDoc_STRVAR(test_c_api_doc, "Exercises C API.  Returns True.\n\
2018 All is well if assertions don't fail.");
2019 #endif
2020 
2021 static PyMethodDef set_methods[] = {
2022     {"add",             (PyCFunction)set_add,           METH_O,
2023      add_doc},
2024     {"clear",           (PyCFunction)set_clear,         METH_NOARGS,
2025      clear_doc},
2026     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2027      contains_doc},
2028     {"copy",            (PyCFunction)set_copy,          METH_NOARGS,
2029      copy_doc},
2030     {"discard",         (PyCFunction)set_discard,       METH_O,
2031      discard_doc},
2032     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2033      difference_doc},
2034     {"difference_update",       (PyCFunction)set_difference_update,     METH_VARARGS,
2035      difference_update_doc},
2036     {"intersection",(PyCFunction)set_intersection_multi,        METH_VARARGS,
2037      intersection_doc},
2038     {"intersection_update",(PyCFunction)set_intersection_update_multi,          METH_VARARGS,
2039      intersection_update_doc},
2040     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2041      isdisjoint_doc},
2042     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2043      issubset_doc},
2044     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2045      issuperset_doc},
2046     {"pop",             (PyCFunction)set_pop,           METH_NOARGS,
2047      pop_doc},
2048     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2049      reduce_doc},
2050     {"remove",          (PyCFunction)set_remove,        METH_O,
2051      remove_doc},
2052     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2053      sizeof_doc},
2054     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2055      symmetric_difference_doc},
2056     {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update,        METH_O,
2057      symmetric_difference_update_doc},
2058 #ifdef Py_DEBUG
2059     {"test_c_api",      (PyCFunction)test_c_api,        METH_NOARGS,
2060      test_c_api_doc},
2061 #endif
2062     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2063      union_doc},
2064     {"update",          (PyCFunction)set_update,        METH_VARARGS,
2065      update_doc},
2066     {NULL,              NULL}   /* sentinel */
2067 };
2068 
2069 static PyNumberMethods set_as_number = {
2070     0,                                  /*nb_add*/
2071     (binaryfunc)set_sub,                /*nb_subtract*/
2072     0,                                  /*nb_multiply*/
2073     0,                                  /*nb_remainder*/
2074     0,                                  /*nb_divmod*/
2075     0,                                  /*nb_power*/
2076     0,                                  /*nb_negative*/
2077     0,                                  /*nb_positive*/
2078     0,                                  /*nb_absolute*/
2079     0,                                  /*nb_bool*/
2080     0,                                  /*nb_invert*/
2081     0,                                  /*nb_lshift*/
2082     0,                                  /*nb_rshift*/
2083     (binaryfunc)set_and,                /*nb_and*/
2084     (binaryfunc)set_xor,                /*nb_xor*/
2085     (binaryfunc)set_or,                 /*nb_or*/
2086     0,                                  /*nb_int*/
2087     0,                                  /*nb_reserved*/
2088     0,                                  /*nb_float*/
2089     0,                                  /*nb_inplace_add*/
2090     (binaryfunc)set_isub,               /*nb_inplace_subtract*/
2091     0,                                  /*nb_inplace_multiply*/
2092     0,                                  /*nb_inplace_remainder*/
2093     0,                                  /*nb_inplace_power*/
2094     0,                                  /*nb_inplace_lshift*/
2095     0,                                  /*nb_inplace_rshift*/
2096     (binaryfunc)set_iand,               /*nb_inplace_and*/
2097     (binaryfunc)set_ixor,               /*nb_inplace_xor*/
2098     (binaryfunc)set_ior,                /*nb_inplace_or*/
2099 };
2100 
2101 PyDoc_STRVAR(set_doc,
2102 "set() -> new empty set object\n\
2103 set(iterable) -> new set object\n\
2104 \n\
2105 Build an unordered collection of unique elements.");
2106 
2107 PyTypeObject PySet_Type = {
2108     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2109     "set",                              /* tp_name */
2110     sizeof(PySetObject),                /* tp_basicsize */
2111     0,                                  /* tp_itemsize */
2112     /* methods */
2113     (destructor)set_dealloc,            /* tp_dealloc */
2114     0,                                  /* tp_vectorcall_offset */
2115     0,                                  /* tp_getattr */
2116     0,                                  /* tp_setattr */
2117     0,                                  /* tp_as_async */
2118     (reprfunc)set_repr,                 /* tp_repr */
2119     &set_as_number,                     /* tp_as_number */
2120     &set_as_sequence,                   /* tp_as_sequence */
2121     0,                                  /* tp_as_mapping */
2122     PyObject_HashNotImplemented,        /* tp_hash */
2123     0,                                  /* tp_call */
2124     0,                                  /* tp_str */
2125     PyObject_GenericGetAttr,            /* tp_getattro */
2126     0,                                  /* tp_setattro */
2127     0,                                  /* tp_as_buffer */
2128     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2129         Py_TPFLAGS_BASETYPE,            /* tp_flags */
2130     set_doc,                            /* tp_doc */
2131     (traverseproc)set_traverse,         /* tp_traverse */
2132     (inquiry)set_clear_internal,        /* tp_clear */
2133     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2134     offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2135     (getiterfunc)set_iter,              /* tp_iter */
2136     0,                                  /* tp_iternext */
2137     set_methods,                        /* tp_methods */
2138     0,                                  /* tp_members */
2139     0,                                  /* tp_getset */
2140     0,                                  /* tp_base */
2141     0,                                  /* tp_dict */
2142     0,                                  /* tp_descr_get */
2143     0,                                  /* tp_descr_set */
2144     0,                                  /* tp_dictoffset */
2145     (initproc)set_init,                 /* tp_init */
2146     PyType_GenericAlloc,                /* tp_alloc */
2147     set_new,                            /* tp_new */
2148     PyObject_GC_Del,                    /* tp_free */
2149 };
2150 
2151 /* frozenset object ********************************************************/
2152 
2153 
2154 static PyMethodDef frozenset_methods[] = {
2155     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2156      contains_doc},
2157     {"copy",            (PyCFunction)frozenset_copy,    METH_NOARGS,
2158      copy_doc},
2159     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2160      difference_doc},
2161     {"intersection",    (PyCFunction)set_intersection_multi,    METH_VARARGS,
2162      intersection_doc},
2163     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2164      isdisjoint_doc},
2165     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2166      issubset_doc},
2167     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2168      issuperset_doc},
2169     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2170      reduce_doc},
2171     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2172      sizeof_doc},
2173     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2174      symmetric_difference_doc},
2175     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2176      union_doc},
2177     {NULL,              NULL}   /* sentinel */
2178 };
2179 
2180 static PyNumberMethods frozenset_as_number = {
2181     0,                                  /*nb_add*/
2182     (binaryfunc)set_sub,                /*nb_subtract*/
2183     0,                                  /*nb_multiply*/
2184     0,                                  /*nb_remainder*/
2185     0,                                  /*nb_divmod*/
2186     0,                                  /*nb_power*/
2187     0,                                  /*nb_negative*/
2188     0,                                  /*nb_positive*/
2189     0,                                  /*nb_absolute*/
2190     0,                                  /*nb_bool*/
2191     0,                                  /*nb_invert*/
2192     0,                                  /*nb_lshift*/
2193     0,                                  /*nb_rshift*/
2194     (binaryfunc)set_and,                /*nb_and*/
2195     (binaryfunc)set_xor,                /*nb_xor*/
2196     (binaryfunc)set_or,                 /*nb_or*/
2197 };
2198 
2199 PyDoc_STRVAR(frozenset_doc,
2200 "frozenset() -> empty frozenset object\n\
2201 frozenset(iterable) -> frozenset object\n\
2202 \n\
2203 Build an immutable unordered collection of unique elements.");
2204 
2205 PyTypeObject PyFrozenSet_Type = {
2206     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2207     "frozenset",                        /* tp_name */
2208     sizeof(PySetObject),                /* tp_basicsize */
2209     0,                                  /* tp_itemsize */
2210     /* methods */
2211     (destructor)set_dealloc,            /* tp_dealloc */
2212     0,                                  /* tp_vectorcall_offset */
2213     0,                                  /* tp_getattr */
2214     0,                                  /* tp_setattr */
2215     0,                                  /* tp_as_async */
2216     (reprfunc)set_repr,                 /* tp_repr */
2217     &frozenset_as_number,               /* tp_as_number */
2218     &set_as_sequence,                   /* tp_as_sequence */
2219     0,                                  /* tp_as_mapping */
2220     frozenset_hash,                     /* tp_hash */
2221     0,                                  /* tp_call */
2222     0,                                  /* tp_str */
2223     PyObject_GenericGetAttr,            /* tp_getattro */
2224     0,                                  /* tp_setattro */
2225     0,                                  /* tp_as_buffer */
2226     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2227         Py_TPFLAGS_BASETYPE,            /* tp_flags */
2228     frozenset_doc,                      /* tp_doc */
2229     (traverseproc)set_traverse,         /* tp_traverse */
2230     (inquiry)set_clear_internal,        /* tp_clear */
2231     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2232     offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2233     (getiterfunc)set_iter,              /* tp_iter */
2234     0,                                  /* tp_iternext */
2235     frozenset_methods,                  /* tp_methods */
2236     0,                                  /* tp_members */
2237     0,                                  /* tp_getset */
2238     0,                                  /* tp_base */
2239     0,                                  /* tp_dict */
2240     0,                                  /* tp_descr_get */
2241     0,                                  /* tp_descr_set */
2242     0,                                  /* tp_dictoffset */
2243     0,                                  /* tp_init */
2244     PyType_GenericAlloc,                /* tp_alloc */
2245     frozenset_new,                      /* tp_new */
2246     PyObject_GC_Del,                    /* tp_free */
2247 };
2248 
2249 
2250 /***** C API functions *************************************************/
2251 
2252 PyObject *
PySet_New(PyObject * iterable)2253 PySet_New(PyObject *iterable)
2254 {
2255     return make_new_set(&PySet_Type, iterable);
2256 }
2257 
2258 PyObject *
PyFrozenSet_New(PyObject * iterable)2259 PyFrozenSet_New(PyObject *iterable)
2260 {
2261     return make_new_set(&PyFrozenSet_Type, iterable);
2262 }
2263 
2264 Py_ssize_t
PySet_Size(PyObject * anyset)2265 PySet_Size(PyObject *anyset)
2266 {
2267     if (!PyAnySet_Check(anyset)) {
2268         PyErr_BadInternalCall();
2269         return -1;
2270     }
2271     return PySet_GET_SIZE(anyset);
2272 }
2273 
2274 int
PySet_Clear(PyObject * set)2275 PySet_Clear(PyObject *set)
2276 {
2277     if (!PySet_Check(set)) {
2278         PyErr_BadInternalCall();
2279         return -1;
2280     }
2281     return set_clear_internal((PySetObject *)set);
2282 }
2283 
2284 int
PySet_Contains(PyObject * anyset,PyObject * key)2285 PySet_Contains(PyObject *anyset, PyObject *key)
2286 {
2287     if (!PyAnySet_Check(anyset)) {
2288         PyErr_BadInternalCall();
2289         return -1;
2290     }
2291     return set_contains_key((PySetObject *)anyset, key);
2292 }
2293 
2294 int
PySet_Discard(PyObject * set,PyObject * key)2295 PySet_Discard(PyObject *set, PyObject *key)
2296 {
2297     if (!PySet_Check(set)) {
2298         PyErr_BadInternalCall();
2299         return -1;
2300     }
2301     return set_discard_key((PySetObject *)set, key);
2302 }
2303 
2304 int
PySet_Add(PyObject * anyset,PyObject * key)2305 PySet_Add(PyObject *anyset, PyObject *key)
2306 {
2307     if (!PySet_Check(anyset) &&
2308         (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2309         PyErr_BadInternalCall();
2310         return -1;
2311     }
2312     return set_add_key((PySetObject *)anyset, key);
2313 }
2314 
2315 int
PySet_ClearFreeList(void)2316 PySet_ClearFreeList(void)
2317 {
2318     return 0;
2319 }
2320 
2321 void
PySet_Fini(void)2322 PySet_Fini(void)
2323 {
2324     Py_CLEAR(emptyfrozenset);
2325 }
2326 
2327 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2328 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2329 {
2330     setentry *entry;
2331 
2332     if (!PyAnySet_Check(set)) {
2333         PyErr_BadInternalCall();
2334         return -1;
2335     }
2336     if (set_next((PySetObject *)set, pos, &entry) == 0)
2337         return 0;
2338     *key = entry->key;
2339     *hash = entry->hash;
2340     return 1;
2341 }
2342 
2343 PyObject *
PySet_Pop(PyObject * set)2344 PySet_Pop(PyObject *set)
2345 {
2346     if (!PySet_Check(set)) {
2347         PyErr_BadInternalCall();
2348         return NULL;
2349     }
2350     return set_pop((PySetObject *)set, NULL);
2351 }
2352 
2353 int
_PySet_Update(PyObject * set,PyObject * iterable)2354 _PySet_Update(PyObject *set, PyObject *iterable)
2355 {
2356     if (!PySet_Check(set)) {
2357         PyErr_BadInternalCall();
2358         return -1;
2359     }
2360     return set_update_internal((PySetObject *)set, iterable);
2361 }
2362 
2363 /* Exported for the gdb plugin's benefit. */
2364 PyObject *_PySet_Dummy = dummy;
2365 
2366 #ifdef Py_DEBUG
2367 
2368 /* Test code to be called with any three element set.
2369    Returns True and original set is restored. */
2370 
2371 #define assertRaises(call_return_value, exception)              \
2372     do {                                                        \
2373         assert(call_return_value);                              \
2374         assert(PyErr_ExceptionMatches(exception));              \
2375         PyErr_Clear();                                          \
2376     } while(0)
2377 
2378 static PyObject *
test_c_api(PySetObject * so,PyObject * Py_UNUSED (ignored))2379 test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored))
2380 {
2381     Py_ssize_t count;
2382     const char *s;
2383     Py_ssize_t i;
2384     PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2385     PyObject *ob = (PyObject *)so;
2386     Py_hash_t hash;
2387     PyObject *str;
2388 
2389     /* Verify preconditions */
2390     assert(PyAnySet_Check(ob));
2391     assert(PyAnySet_CheckExact(ob));
2392     assert(!PyFrozenSet_CheckExact(ob));
2393 
2394     /* so.clear(); so |= set("abc"); */
2395     str = PyUnicode_FromString("abc");
2396     if (str == NULL)
2397         return NULL;
2398     set_clear_internal(so);
2399     if (set_update_internal(so, str)) {
2400         Py_DECREF(str);
2401         return NULL;
2402     }
2403     Py_DECREF(str);
2404 
2405     /* Exercise type/size checks */
2406     assert(PySet_Size(ob) == 3);
2407     assert(PySet_GET_SIZE(ob) == 3);
2408 
2409     /* Raise TypeError for non-iterable constructor arguments */
2410     assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2411     assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2412 
2413     /* Raise TypeError for unhashable key */
2414     dup = PySet_New(ob);
2415     assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2416     assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2417     assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2418 
2419     /* Exercise successful pop, contains, add, and discard */
2420     elem = PySet_Pop(ob);
2421     assert(PySet_Contains(ob, elem) == 0);
2422     assert(PySet_GET_SIZE(ob) == 2);
2423     assert(PySet_Add(ob, elem) == 0);
2424     assert(PySet_Contains(ob, elem) == 1);
2425     assert(PySet_GET_SIZE(ob) == 3);
2426     assert(PySet_Discard(ob, elem) == 1);
2427     assert(PySet_GET_SIZE(ob) == 2);
2428     assert(PySet_Discard(ob, elem) == 0);
2429     assert(PySet_GET_SIZE(ob) == 2);
2430 
2431     /* Exercise clear */
2432     dup2 = PySet_New(dup);
2433     assert(PySet_Clear(dup2) == 0);
2434     assert(PySet_Size(dup2) == 0);
2435     Py_DECREF(dup2);
2436 
2437     /* Raise SystemError on clear or update of frozen set */
2438     f = PyFrozenSet_New(dup);
2439     assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2440     assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2441     assert(PySet_Add(f, elem) == 0);
2442     Py_INCREF(f);
2443     assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2444     Py_DECREF(f);
2445     Py_DECREF(f);
2446 
2447     /* Exercise direct iteration */
2448     i = 0, count = 0;
2449     while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2450         s = PyUnicode_AsUTF8(x);
2451         assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2452         count++;
2453     }
2454     assert(count == 3);
2455 
2456     /* Exercise updates */
2457     dup2 = PySet_New(NULL);
2458     assert(_PySet_Update(dup2, dup) == 0);
2459     assert(PySet_Size(dup2) == 3);
2460     assert(_PySet_Update(dup2, dup) == 0);
2461     assert(PySet_Size(dup2) == 3);
2462     Py_DECREF(dup2);
2463 
2464     /* Raise SystemError when self argument is not a set or frozenset. */
2465     t = PyTuple_New(0);
2466     assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2467     assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2468     Py_DECREF(t);
2469 
2470     /* Raise SystemError when self argument is not a set. */
2471     f = PyFrozenSet_New(dup);
2472     assert(PySet_Size(f) == 3);
2473     assert(PyFrozenSet_CheckExact(f));
2474     assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2475     assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2476     Py_DECREF(f);
2477 
2478     /* Raise KeyError when popping from an empty set */
2479     assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2480     Py_DECREF(ob);
2481     assert(PySet_GET_SIZE(ob) == 0);
2482     assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2483 
2484     /* Restore the set from the copy using the PyNumber API */
2485     assert(PyNumber_InPlaceOr(ob, dup) == ob);
2486     Py_DECREF(ob);
2487 
2488     /* Verify constructors accept NULL arguments */
2489     f = PySet_New(NULL);
2490     assert(f != NULL);
2491     assert(PySet_GET_SIZE(f) == 0);
2492     Py_DECREF(f);
2493     f = PyFrozenSet_New(NULL);
2494     assert(f != NULL);
2495     assert(PyFrozenSet_CheckExact(f));
2496     assert(PySet_GET_SIZE(f) == 0);
2497     Py_DECREF(f);
2498 
2499     Py_DECREF(elem);
2500     Py_DECREF(dup);
2501     Py_RETURN_TRUE;
2502 }
2503 
2504 #undef assertRaises
2505 
2506 #endif
2507 
2508 /***** Dummy Struct  *************************************************/
2509 
2510 static PyObject *
dummy_repr(PyObject * op)2511 dummy_repr(PyObject *op)
2512 {
2513     return PyUnicode_FromString("<dummy key>");
2514 }
2515 
2516 static void
dummy_dealloc(PyObject * ignore)2517 dummy_dealloc(PyObject* ignore)
2518 {
2519     Py_FatalError("deallocating <dummy key>");
2520 }
2521 
2522 static PyTypeObject _PySetDummy_Type = {
2523     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2524     "<dummy key> type",
2525     0,
2526     0,
2527     dummy_dealloc,      /*tp_dealloc*/ /*never called*/
2528     0,                  /*tp_vectorcall_offset*/
2529     0,                  /*tp_getattr*/
2530     0,                  /*tp_setattr*/
2531     0,                  /*tp_as_async*/
2532     dummy_repr,         /*tp_repr*/
2533     0,                  /*tp_as_number*/
2534     0,                  /*tp_as_sequence*/
2535     0,                  /*tp_as_mapping*/
2536     0,                  /*tp_hash */
2537     0,                  /*tp_call */
2538     0,                  /*tp_str */
2539     0,                  /*tp_getattro */
2540     0,                  /*tp_setattro */
2541     0,                  /*tp_as_buffer */
2542     Py_TPFLAGS_DEFAULT, /*tp_flags */
2543 };
2544 
2545 static PyObject _dummy_struct = {
2546   _PyObject_EXTRA_INIT
2547   2, &_PySetDummy_Type
2548 };
2549 
2550