• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* set object implementation
3 
4    Written and maintained by Raymond D. Hettinger <python@rcn.com>
5    Derived from Lib/sets.py and Objects/dictobject.c.
6 
7    The basic lookup function used by all operations.
8    This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9 
10    The initial probe index is computed as hash mod the table size.
11    Subsequent probe indices are computed as explained in Objects/dictobject.c.
12 
13    To improve cache locality, each probe inspects a series of consecutive
14    nearby entries before moving on to probes elsewhere in memory.  This leaves
15    us with a hybrid of linear probing and randomized probing.  The linear probing
16    reduces the cost of hash collisions because consecutive memory accesses
17    tend to be much cheaper than scattered probes.  After LINEAR_PROBES steps,
18    we then use more of the upper bits from the hash value and apply a simple
19    linear congruential random number genearator.  This helps break-up long
20    chains of collisions.
21 
22    All arithmetic on hash should ignore overflow.
23 
24    Unlike the dictionary implementation, the lookkey function can return
25    NULL if the rich comparison returns an error.
26 
27    Use cases for sets differ considerably from dictionaries where looked-up
28    keys are more likely to be present.  In contrast, sets are primarily
29    about membership testing where the presence of an element is not known in
30    advance.  Accordingly, the set implementation needs to optimize for both
31    the found and not-found case.
32 */
33 
34 #include "Python.h"
35 #include "internal/pystate.h"
36 #include "structmember.h"
37 
38 /* Object used as dummy key to fill deleted entries */
39 static PyObject _dummy_struct;
40 
41 #define dummy (&_dummy_struct)
42 
43 
44 /* ======================================================================== */
45 /* ======= Begin logic for probing the hash table ========================= */
46 
47 /* Set this to zero to turn-off linear probing */
48 #ifndef LINEAR_PROBES
49 #define LINEAR_PROBES 9
50 #endif
51 
52 /* This must be >= 1 */
53 #define PERTURB_SHIFT 5
54 
55 static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)56 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
57 {
58     setentry *table;
59     setentry *entry;
60     size_t perturb;
61     size_t mask = so->mask;
62     size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
63     size_t j;
64     int cmp;
65 
66     entry = &so->table[i];
67     if (entry->key == NULL)
68         return entry;
69 
70     perturb = hash;
71 
72     while (1) {
73         if (entry->hash == hash) {
74             PyObject *startkey = entry->key;
75             /* startkey cannot be a dummy because the dummy hash field is -1 */
76             assert(startkey != dummy);
77             if (startkey == key)
78                 return entry;
79             if (PyUnicode_CheckExact(startkey)
80                 && PyUnicode_CheckExact(key)
81                 && _PyUnicode_EQ(startkey, key))
82                 return entry;
83             table = so->table;
84             Py_INCREF(startkey);
85             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
86             Py_DECREF(startkey);
87             if (cmp < 0)                                          /* unlikely */
88                 return NULL;
89             if (table != so->table || entry->key != startkey)     /* unlikely */
90                 return set_lookkey(so, key, hash);
91             if (cmp > 0)                                          /* likely */
92                 return entry;
93             mask = so->mask;                 /* help avoid a register spill */
94         }
95 
96         if (i + LINEAR_PROBES <= mask) {
97             for (j = 0 ; j < LINEAR_PROBES ; j++) {
98                 entry++;
99                 if (entry->hash == 0 && entry->key == NULL)
100                     return entry;
101                 if (entry->hash == hash) {
102                     PyObject *startkey = entry->key;
103                     assert(startkey != dummy);
104                     if (startkey == key)
105                         return entry;
106                     if (PyUnicode_CheckExact(startkey)
107                         && PyUnicode_CheckExact(key)
108                         && _PyUnicode_EQ(startkey, key))
109                         return entry;
110                     table = so->table;
111                     Py_INCREF(startkey);
112                     cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
113                     Py_DECREF(startkey);
114                     if (cmp < 0)
115                         return NULL;
116                     if (table != so->table || entry->key != startkey)
117                         return set_lookkey(so, key, hash);
118                     if (cmp > 0)
119                         return entry;
120                     mask = so->mask;
121                 }
122             }
123         }
124 
125         perturb >>= PERTURB_SHIFT;
126         i = (i * 5 + 1 + perturb) & mask;
127 
128         entry = &so->table[i];
129         if (entry->key == NULL)
130             return entry;
131     }
132 }
133 
134 static int set_table_resize(PySetObject *, Py_ssize_t);
135 
136 static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)137 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
138 {
139     setentry *table;
140     setentry *freeslot;
141     setentry *entry;
142     size_t perturb;
143     size_t mask;
144     size_t i;                       /* Unsigned for defined overflow behavior */
145     size_t j;
146     int cmp;
147 
148     /* Pre-increment is necessary to prevent arbitrary code in the rich
149        comparison from deallocating the key just before the insertion. */
150     Py_INCREF(key);
151 
152   restart:
153 
154     mask = so->mask;
155     i = (size_t)hash & mask;
156 
157     entry = &so->table[i];
158     if (entry->key == NULL)
159         goto found_unused;
160 
161     freeslot = NULL;
162     perturb = hash;
163 
164     while (1) {
165         if (entry->hash == hash) {
166             PyObject *startkey = entry->key;
167             /* startkey cannot be a dummy because the dummy hash field is -1 */
168             assert(startkey != dummy);
169             if (startkey == key)
170                 goto found_active;
171             if (PyUnicode_CheckExact(startkey)
172                 && PyUnicode_CheckExact(key)
173                 && _PyUnicode_EQ(startkey, key))
174                 goto found_active;
175             table = so->table;
176             Py_INCREF(startkey);
177             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
178             Py_DECREF(startkey);
179             if (cmp > 0)                                          /* likely */
180                 goto found_active;
181             if (cmp < 0)
182                 goto comparison_error;
183             /* Continuing the search from the current entry only makes
184                sense if the table and entry are unchanged; otherwise,
185                we have to restart from the beginning */
186             if (table != so->table || entry->key != startkey)
187                 goto restart;
188             mask = so->mask;                 /* help avoid a register spill */
189         }
190         else if (entry->hash == -1)
191             freeslot = entry;
192 
193         if (i + LINEAR_PROBES <= mask) {
194             for (j = 0 ; j < LINEAR_PROBES ; j++) {
195                 entry++;
196                 if (entry->hash == 0 && entry->key == NULL)
197                     goto found_unused_or_dummy;
198                 if (entry->hash == hash) {
199                     PyObject *startkey = entry->key;
200                     assert(startkey != dummy);
201                     if (startkey == key)
202                         goto found_active;
203                     if (PyUnicode_CheckExact(startkey)
204                         && PyUnicode_CheckExact(key)
205                         && _PyUnicode_EQ(startkey, key))
206                         goto found_active;
207                     table = so->table;
208                     Py_INCREF(startkey);
209                     cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
210                     Py_DECREF(startkey);
211                     if (cmp > 0)
212                         goto found_active;
213                     if (cmp < 0)
214                         goto comparison_error;
215                     if (table != so->table || entry->key != startkey)
216                         goto restart;
217                     mask = so->mask;
218                 }
219                 else if (entry->hash == -1)
220                     freeslot = entry;
221             }
222         }
223 
224         perturb >>= PERTURB_SHIFT;
225         i = (i * 5 + 1 + perturb) & mask;
226 
227         entry = &so->table[i];
228         if (entry->key == NULL)
229             goto found_unused_or_dummy;
230     }
231 
232   found_unused_or_dummy:
233     if (freeslot == NULL)
234         goto found_unused;
235     so->used++;
236     freeslot->key = key;
237     freeslot->hash = hash;
238     return 0;
239 
240   found_unused:
241     so->fill++;
242     so->used++;
243     entry->key = key;
244     entry->hash = hash;
245     if ((size_t)so->fill*5 < mask*3)
246         return 0;
247     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
248 
249   found_active:
250     Py_DECREF(key);
251     return 0;
252 
253   comparison_error:
254     Py_DECREF(key);
255     return -1;
256 }
257 
258 /*
259 Internal routine used by set_table_resize() to insert an item which is
260 known to be absent from the set.  This routine also assumes that
261 the set contains no deleted entries.  Besides the performance benefit,
262 there is also safety benefit since using set_add_entry() risks making
263 a callback in the middle of a set_table_resize(), see issue 1456209.
264 The caller is responsible for updating the key's reference count and
265 the setobject's fill and used fields.
266 */
267 static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)268 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
269 {
270     setentry *entry;
271     size_t perturb = hash;
272     size_t i = (size_t)hash & mask;
273     size_t j;
274 
275     while (1) {
276         entry = &table[i];
277         if (entry->key == NULL)
278             goto found_null;
279         if (i + LINEAR_PROBES <= mask) {
280             for (j = 0; j < LINEAR_PROBES; j++) {
281                 entry++;
282                 if (entry->key == NULL)
283                     goto found_null;
284             }
285         }
286         perturb >>= PERTURB_SHIFT;
287         i = (i * 5 + 1 + perturb) & mask;
288     }
289   found_null:
290     entry->key = key;
291     entry->hash = hash;
292 }
293 
294 /* ======== End logic for probing the hash table ========================== */
295 /* ======================================================================== */
296 
297 /*
298 Restructure the table by allocating a new table and reinserting all
299 keys again.  When entries have been deleted, the new table may
300 actually be smaller than the old one.
301 */
302 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)303 set_table_resize(PySetObject *so, Py_ssize_t minused)
304 {
305     setentry *oldtable, *newtable, *entry;
306     Py_ssize_t oldmask = so->mask;
307     size_t newmask;
308     int is_oldtable_malloced;
309     setentry small_copy[PySet_MINSIZE];
310 
311     assert(minused >= 0);
312 
313     /* Find the smallest table size > minused. */
314     /* XXX speed-up with intrinsics */
315     size_t newsize = PySet_MINSIZE;
316     while (newsize <= (size_t)minused) {
317         newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
318     }
319 
320     /* Get space for a new table. */
321     oldtable = so->table;
322     assert(oldtable != NULL);
323     is_oldtable_malloced = oldtable != so->smalltable;
324 
325     if (newsize == PySet_MINSIZE) {
326         /* A large table is shrinking, or we can't get any smaller. */
327         newtable = so->smalltable;
328         if (newtable == oldtable) {
329             if (so->fill == so->used) {
330                 /* No dummies, so no point doing anything. */
331                 return 0;
332             }
333             /* We're not going to resize it, but rebuild the
334                table anyway to purge old dummy entries.
335                Subtle:  This is *necessary* if fill==size,
336                as set_lookkey needs at least one virgin slot to
337                terminate failing searches.  If fill < size, it's
338                merely desirable, as dummies slow searches. */
339             assert(so->fill > so->used);
340             memcpy(small_copy, oldtable, sizeof(small_copy));
341             oldtable = small_copy;
342         }
343     }
344     else {
345         newtable = PyMem_NEW(setentry, newsize);
346         if (newtable == NULL) {
347             PyErr_NoMemory();
348             return -1;
349         }
350     }
351 
352     /* Make the set empty, using the new table. */
353     assert(newtable != oldtable);
354     memset(newtable, 0, sizeof(setentry) * newsize);
355     so->mask = newsize - 1;
356     so->table = newtable;
357 
358     /* Copy the data over; this is refcount-neutral for active entries;
359        dummy entries aren't copied over, of course */
360     newmask = (size_t)so->mask;
361     if (so->fill == so->used) {
362         for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
363             if (entry->key != NULL) {
364                 set_insert_clean(newtable, newmask, entry->key, entry->hash);
365             }
366         }
367     } else {
368         so->fill = so->used;
369         for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
370             if (entry->key != NULL && entry->key != dummy) {
371                 set_insert_clean(newtable, newmask, entry->key, entry->hash);
372             }
373         }
374     }
375 
376     if (is_oldtable_malloced)
377         PyMem_DEL(oldtable);
378     return 0;
379 }
380 
381 static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)382 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
383 {
384     setentry *entry;
385 
386     entry = set_lookkey(so, key, hash);
387     if (entry != NULL)
388         return entry->key != NULL;
389     return -1;
390 }
391 
392 #define DISCARD_NOTFOUND 0
393 #define DISCARD_FOUND 1
394 
395 static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)396 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
397 {
398     setentry *entry;
399     PyObject *old_key;
400 
401     entry = set_lookkey(so, key, hash);
402     if (entry == NULL)
403         return -1;
404     if (entry->key == NULL)
405         return DISCARD_NOTFOUND;
406     old_key = entry->key;
407     entry->key = dummy;
408     entry->hash = -1;
409     so->used--;
410     Py_DECREF(old_key);
411     return DISCARD_FOUND;
412 }
413 
414 static int
set_add_key(PySetObject * so,PyObject * key)415 set_add_key(PySetObject *so, PyObject *key)
416 {
417     Py_hash_t hash;
418 
419     if (!PyUnicode_CheckExact(key) ||
420         (hash = ((PyASCIIObject *) key)->hash) == -1) {
421         hash = PyObject_Hash(key);
422         if (hash == -1)
423             return -1;
424     }
425     return set_add_entry(so, key, hash);
426 }
427 
428 static int
set_contains_key(PySetObject * so,PyObject * key)429 set_contains_key(PySetObject *so, PyObject *key)
430 {
431     Py_hash_t hash;
432 
433     if (!PyUnicode_CheckExact(key) ||
434         (hash = ((PyASCIIObject *) key)->hash) == -1) {
435         hash = PyObject_Hash(key);
436         if (hash == -1)
437             return -1;
438     }
439     return set_contains_entry(so, key, hash);
440 }
441 
442 static int
set_discard_key(PySetObject * so,PyObject * key)443 set_discard_key(PySetObject *so, PyObject *key)
444 {
445     Py_hash_t hash;
446 
447     if (!PyUnicode_CheckExact(key) ||
448         (hash = ((PyASCIIObject *) key)->hash) == -1) {
449         hash = PyObject_Hash(key);
450         if (hash == -1)
451             return -1;
452     }
453     return set_discard_entry(so, key, hash);
454 }
455 
456 static void
set_empty_to_minsize(PySetObject * so)457 set_empty_to_minsize(PySetObject *so)
458 {
459     memset(so->smalltable, 0, sizeof(so->smalltable));
460     so->fill = 0;
461     so->used = 0;
462     so->mask = PySet_MINSIZE - 1;
463     so->table = so->smalltable;
464     so->hash = -1;
465 }
466 
467 static int
set_clear_internal(PySetObject * so)468 set_clear_internal(PySetObject *so)
469 {
470     setentry *entry;
471     setentry *table = so->table;
472     Py_ssize_t fill = so->fill;
473     Py_ssize_t used = so->used;
474     int table_is_malloced = table != so->smalltable;
475     setentry small_copy[PySet_MINSIZE];
476 
477     assert (PyAnySet_Check(so));
478     assert(table != NULL);
479 
480     /* This is delicate.  During the process of clearing the set,
481      * decrefs can cause the set to mutate.  To avoid fatal confusion
482      * (voice of experience), we have to make the set empty before
483      * clearing the slots, and never refer to anything via so->ref while
484      * clearing.
485      */
486     if (table_is_malloced)
487         set_empty_to_minsize(so);
488 
489     else if (fill > 0) {
490         /* It's a small table with something that needs to be cleared.
491          * Afraid the only safe way is to copy the set entries into
492          * another small table first.
493          */
494         memcpy(small_copy, table, sizeof(small_copy));
495         table = small_copy;
496         set_empty_to_minsize(so);
497     }
498     /* else it's a small table that's already empty */
499 
500     /* Now we can finally clear things.  If C had refcounts, we could
501      * assert that the refcount on table is 1 now, i.e. that this function
502      * has unique access to it, so decref side-effects can't alter it.
503      */
504     for (entry = table; used > 0; entry++) {
505         if (entry->key && entry->key != dummy) {
506             used--;
507             Py_DECREF(entry->key);
508         }
509     }
510 
511     if (table_is_malloced)
512         PyMem_DEL(table);
513     return 0;
514 }
515 
516 /*
517  * Iterate over a set table.  Use like so:
518  *
519  *     Py_ssize_t pos;
520  *     setentry *entry;
521  *     pos = 0;   # important!  pos should not otherwise be changed by you
522  *     while (set_next(yourset, &pos, &entry)) {
523  *              Refer to borrowed reference in entry->key.
524  *     }
525  *
526  * CAUTION:  In general, it isn't safe to use set_next in a loop that
527  * mutates the table.
528  */
529 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)530 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
531 {
532     Py_ssize_t i;
533     Py_ssize_t mask;
534     setentry *entry;
535 
536     assert (PyAnySet_Check(so));
537     i = *pos_ptr;
538     assert(i >= 0);
539     mask = so->mask;
540     entry = &so->table[i];
541     while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
542         i++;
543         entry++;
544     }
545     *pos_ptr = i+1;
546     if (i > mask)
547         return 0;
548     assert(entry != NULL);
549     *entry_ptr = entry;
550     return 1;
551 }
552 
553 static void
set_dealloc(PySetObject * so)554 set_dealloc(PySetObject *so)
555 {
556     setentry *entry;
557     Py_ssize_t used = so->used;
558 
559     /* bpo-31095: UnTrack is needed before calling any callbacks */
560     PyObject_GC_UnTrack(so);
561     Py_TRASHCAN_SAFE_BEGIN(so)
562     if (so->weakreflist != NULL)
563         PyObject_ClearWeakRefs((PyObject *) so);
564 
565     for (entry = so->table; used > 0; entry++) {
566         if (entry->key && entry->key != dummy) {
567                 used--;
568                 Py_DECREF(entry->key);
569         }
570     }
571     if (so->table != so->smalltable)
572         PyMem_DEL(so->table);
573     Py_TYPE(so)->tp_free(so);
574     Py_TRASHCAN_SAFE_END(so)
575 }
576 
577 static PyObject *
set_repr(PySetObject * so)578 set_repr(PySetObject *so)
579 {
580     PyObject *result=NULL, *keys, *listrepr, *tmp;
581     int status = Py_ReprEnter((PyObject*)so);
582 
583     if (status != 0) {
584         if (status < 0)
585             return NULL;
586         return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
587     }
588 
589     /* shortcut for the empty set */
590     if (!so->used) {
591         Py_ReprLeave((PyObject*)so);
592         return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
593     }
594 
595     keys = PySequence_List((PyObject *)so);
596     if (keys == NULL)
597         goto done;
598 
599     /* repr(keys)[1:-1] */
600     listrepr = PyObject_Repr(keys);
601     Py_DECREF(keys);
602     if (listrepr == NULL)
603         goto done;
604     tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
605     Py_DECREF(listrepr);
606     if (tmp == NULL)
607         goto done;
608     listrepr = tmp;
609 
610     if (Py_TYPE(so) != &PySet_Type)
611         result = PyUnicode_FromFormat("%s({%U})",
612                                       Py_TYPE(so)->tp_name,
613                                       listrepr);
614     else
615         result = PyUnicode_FromFormat("{%U}", listrepr);
616     Py_DECREF(listrepr);
617 done:
618     Py_ReprLeave((PyObject*)so);
619     return result;
620 }
621 
622 static Py_ssize_t
set_len(PyObject * so)623 set_len(PyObject *so)
624 {
625     return ((PySetObject *)so)->used;
626 }
627 
628 static int
set_merge(PySetObject * so,PyObject * otherset)629 set_merge(PySetObject *so, PyObject *otherset)
630 {
631     PySetObject *other;
632     PyObject *key;
633     Py_ssize_t i;
634     setentry *so_entry;
635     setentry *other_entry;
636 
637     assert (PyAnySet_Check(so));
638     assert (PyAnySet_Check(otherset));
639 
640     other = (PySetObject*)otherset;
641     if (other == so || other->used == 0)
642         /* a.update(a) or a.update(set()); nothing to do */
643         return 0;
644     /* Do one big resize at the start, rather than
645      * incrementally resizing as we insert new keys.  Expect
646      * that there will be no (or few) overlapping keys.
647      */
648     if ((so->fill + other->used)*5 >= so->mask*3) {
649         if (set_table_resize(so, (so->used + other->used)*2) != 0)
650             return -1;
651     }
652     so_entry = so->table;
653     other_entry = other->table;
654 
655     /* If our table is empty, and both tables have the same size, and
656        there are no dummies to eliminate, then just copy the pointers. */
657     if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
658         for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
659             key = other_entry->key;
660             if (key != NULL) {
661                 assert(so_entry->key == NULL);
662                 Py_INCREF(key);
663                 so_entry->key = key;
664                 so_entry->hash = other_entry->hash;
665             }
666         }
667         so->fill = other->fill;
668         so->used = other->used;
669         return 0;
670     }
671 
672     /* If our table is empty, we can use set_insert_clean() */
673     if (so->fill == 0) {
674         setentry *newtable = so->table;
675         size_t newmask = (size_t)so->mask;
676         so->fill = other->used;
677         so->used = other->used;
678         for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
679             key = other_entry->key;
680             if (key != NULL && key != dummy) {
681                 Py_INCREF(key);
682                 set_insert_clean(newtable, newmask, key, other_entry->hash);
683             }
684         }
685         return 0;
686     }
687 
688     /* We can't assure there are no duplicates, so do normal insertions */
689     for (i = 0; i <= other->mask; i++) {
690         other_entry = &other->table[i];
691         key = other_entry->key;
692         if (key != NULL && key != dummy) {
693             if (set_add_entry(so, key, other_entry->hash))
694                 return -1;
695         }
696     }
697     return 0;
698 }
699 
700 static PyObject *
set_pop(PySetObject * so)701 set_pop(PySetObject *so)
702 {
703     /* Make sure the search finger is in bounds */
704     Py_ssize_t i = so->finger & so->mask;
705     setentry *entry;
706     PyObject *key;
707 
708     assert (PyAnySet_Check(so));
709     if (so->used == 0) {
710         PyErr_SetString(PyExc_KeyError, "pop from an empty set");
711         return NULL;
712     }
713 
714     while ((entry = &so->table[i])->key == NULL || entry->key==dummy) {
715         i++;
716         if (i > so->mask)
717             i = 0;
718     }
719     key = entry->key;
720     entry->key = dummy;
721     entry->hash = -1;
722     so->used--;
723     so->finger = i + 1;         /* next place to start */
724     return key;
725 }
726 
727 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
728 Raises KeyError if the set is empty.");
729 
730 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)731 set_traverse(PySetObject *so, visitproc visit, void *arg)
732 {
733     Py_ssize_t pos = 0;
734     setentry *entry;
735 
736     while (set_next(so, &pos, &entry))
737         Py_VISIT(entry->key);
738     return 0;
739 }
740 
741 /* Work to increase the bit dispersion for closely spaced hash values.
742    This is important because some use cases have many combinations of a
743    small number of elements with nearby hashes so that many distinct
744    combinations collapse to only a handful of distinct hash values. */
745 
746 static Py_uhash_t
_shuffle_bits(Py_uhash_t h)747 _shuffle_bits(Py_uhash_t h)
748 {
749     return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
750 }
751 
752 /* Most of the constants in this hash algorithm are randomly chosen
753    large primes with "interesting bit patterns" and that passed tests
754    for good collision statistics on a variety of problematic datasets
755    including powersets and graph structures (such as David Eppstein's
756    graph recipes in Lib/test/test_set.py) */
757 
758 static Py_hash_t
frozenset_hash(PyObject * self)759 frozenset_hash(PyObject *self)
760 {
761     PySetObject *so = (PySetObject *)self;
762     Py_uhash_t hash = 0;
763     setentry *entry;
764 
765     if (so->hash != -1)
766         return so->hash;
767 
768     /* Xor-in shuffled bits from every entry's hash field because xor is
769        commutative and a frozenset hash should be independent of order.
770 
771        For speed, include null entries and dummy entries and then
772        subtract out their effect afterwards so that the final hash
773        depends only on active entries.  This allows the code to be
774        vectorized by the compiler and it saves the unpredictable
775        branches that would arise when trying to exclude null and dummy
776        entries on every iteration. */
777 
778     for (entry = so->table; entry <= &so->table[so->mask]; entry++)
779         hash ^= _shuffle_bits(entry->hash);
780 
781     /* Remove the effect of an odd number of NULL entries */
782     if ((so->mask + 1 - so->fill) & 1)
783         hash ^= _shuffle_bits(0);
784 
785     /* Remove the effect of an odd number of dummy entries */
786     if ((so->fill - so->used) & 1)
787         hash ^= _shuffle_bits(-1);
788 
789     /* Factor in the number of active entries */
790     hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
791 
792     /* Disperse patterns arising in nested frozensets */
793     hash ^= (hash >> 11) ^ (hash >> 25);
794     hash = hash * 69069U + 907133923UL;
795 
796     /* -1 is reserved as an error code */
797     if (hash == (Py_uhash_t)-1)
798         hash = 590923713UL;
799 
800     so->hash = hash;
801     return hash;
802 }
803 
804 /***** Set iterator type ***********************************************/
805 
806 typedef struct {
807     PyObject_HEAD
808     PySetObject *si_set; /* Set to NULL when iterator is exhausted */
809     Py_ssize_t si_used;
810     Py_ssize_t si_pos;
811     Py_ssize_t len;
812 } setiterobject;
813 
814 static void
setiter_dealloc(setiterobject * si)815 setiter_dealloc(setiterobject *si)
816 {
817     /* bpo-31095: UnTrack is needed before calling any callbacks */
818     _PyObject_GC_UNTRACK(si);
819     Py_XDECREF(si->si_set);
820     PyObject_GC_Del(si);
821 }
822 
823 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)824 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
825 {
826     Py_VISIT(si->si_set);
827     return 0;
828 }
829 
830 static PyObject *
setiter_len(setiterobject * si)831 setiter_len(setiterobject *si)
832 {
833     Py_ssize_t len = 0;
834     if (si->si_set != NULL && si->si_used == si->si_set->used)
835         len = si->len;
836     return PyLong_FromSsize_t(len);
837 }
838 
839 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
840 
841 static PyObject *setiter_iternext(setiterobject *si);
842 
843 static PyObject *
setiter_reduce(setiterobject * si)844 setiter_reduce(setiterobject *si)
845 {
846     PyObject *list;
847     setiterobject tmp;
848 
849     list = PyList_New(0);
850     if (!list)
851         return NULL;
852 
853     /* copy the iterator state */
854     tmp = *si;
855     Py_XINCREF(tmp.si_set);
856 
857     /* iterate the temporary into a list */
858     for(;;) {
859         PyObject *element = setiter_iternext(&tmp);
860         if (element) {
861             if (PyList_Append(list, element)) {
862                 Py_DECREF(element);
863                 Py_DECREF(list);
864                 Py_XDECREF(tmp.si_set);
865                 return NULL;
866             }
867             Py_DECREF(element);
868         } else
869             break;
870     }
871     Py_XDECREF(tmp.si_set);
872     /* check for error */
873     if (tmp.si_set != NULL) {
874         /* we have an error */
875         Py_DECREF(list);
876         return NULL;
877     }
878     return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), list);
879 }
880 
881 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
882 
883 static PyMethodDef setiter_methods[] = {
884     {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
885     {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
886     {NULL,              NULL}           /* sentinel */
887 };
888 
setiter_iternext(setiterobject * si)889 static PyObject *setiter_iternext(setiterobject *si)
890 {
891     PyObject *key;
892     Py_ssize_t i, mask;
893     setentry *entry;
894     PySetObject *so = si->si_set;
895 
896     if (so == NULL)
897         return NULL;
898     assert (PyAnySet_Check(so));
899 
900     if (si->si_used != so->used) {
901         PyErr_SetString(PyExc_RuntimeError,
902                         "Set changed size during iteration");
903         si->si_used = -1; /* Make this state sticky */
904         return NULL;
905     }
906 
907     i = si->si_pos;
908     assert(i>=0);
909     entry = so->table;
910     mask = so->mask;
911     while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
912         i++;
913     si->si_pos = i+1;
914     if (i > mask)
915         goto fail;
916     si->len--;
917     key = entry[i].key;
918     Py_INCREF(key);
919     return key;
920 
921 fail:
922     si->si_set = NULL;
923     Py_DECREF(so);
924     return NULL;
925 }
926 
927 PyTypeObject PySetIter_Type = {
928     PyVarObject_HEAD_INIT(&PyType_Type, 0)
929     "set_iterator",                             /* tp_name */
930     sizeof(setiterobject),                      /* tp_basicsize */
931     0,                                          /* tp_itemsize */
932     /* methods */
933     (destructor)setiter_dealloc,                /* tp_dealloc */
934     0,                                          /* tp_print */
935     0,                                          /* tp_getattr */
936     0,                                          /* tp_setattr */
937     0,                                          /* tp_reserved */
938     0,                                          /* tp_repr */
939     0,                                          /* tp_as_number */
940     0,                                          /* tp_as_sequence */
941     0,                                          /* tp_as_mapping */
942     0,                                          /* tp_hash */
943     0,                                          /* tp_call */
944     0,                                          /* tp_str */
945     PyObject_GenericGetAttr,                    /* tp_getattro */
946     0,                                          /* tp_setattro */
947     0,                                          /* tp_as_buffer */
948     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
949     0,                                          /* tp_doc */
950     (traverseproc)setiter_traverse,             /* tp_traverse */
951     0,                                          /* tp_clear */
952     0,                                          /* tp_richcompare */
953     0,                                          /* tp_weaklistoffset */
954     PyObject_SelfIter,                          /* tp_iter */
955     (iternextfunc)setiter_iternext,             /* tp_iternext */
956     setiter_methods,                            /* tp_methods */
957     0,
958 };
959 
960 static PyObject *
set_iter(PySetObject * so)961 set_iter(PySetObject *so)
962 {
963     setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
964     if (si == NULL)
965         return NULL;
966     Py_INCREF(so);
967     si->si_set = so;
968     si->si_used = so->used;
969     si->si_pos = 0;
970     si->len = so->used;
971     _PyObject_GC_TRACK(si);
972     return (PyObject *)si;
973 }
974 
975 static int
set_update_internal(PySetObject * so,PyObject * other)976 set_update_internal(PySetObject *so, PyObject *other)
977 {
978     PyObject *key, *it;
979 
980     if (PyAnySet_Check(other))
981         return set_merge(so, other);
982 
983     if (PyDict_CheckExact(other)) {
984         PyObject *value;
985         Py_ssize_t pos = 0;
986         Py_hash_t hash;
987         Py_ssize_t dictsize = PyDict_GET_SIZE(other);
988 
989         /* Do one big resize at the start, rather than
990         * incrementally resizing as we insert new keys.  Expect
991         * that there will be no (or few) overlapping keys.
992         */
993         if (dictsize < 0)
994             return -1;
995         if ((so->fill + dictsize)*5 >= so->mask*3) {
996             if (set_table_resize(so, (so->used + dictsize)*2) != 0)
997                 return -1;
998         }
999         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1000             if (set_add_entry(so, key, hash))
1001                 return -1;
1002         }
1003         return 0;
1004     }
1005 
1006     it = PyObject_GetIter(other);
1007     if (it == NULL)
1008         return -1;
1009 
1010     while ((key = PyIter_Next(it)) != NULL) {
1011         if (set_add_key(so, key)) {
1012             Py_DECREF(it);
1013             Py_DECREF(key);
1014             return -1;
1015         }
1016         Py_DECREF(key);
1017     }
1018     Py_DECREF(it);
1019     if (PyErr_Occurred())
1020         return -1;
1021     return 0;
1022 }
1023 
1024 static PyObject *
set_update(PySetObject * so,PyObject * args)1025 set_update(PySetObject *so, PyObject *args)
1026 {
1027     Py_ssize_t i;
1028 
1029     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1030         PyObject *other = PyTuple_GET_ITEM(args, i);
1031         if (set_update_internal(so, other))
1032             return NULL;
1033     }
1034     Py_RETURN_NONE;
1035 }
1036 
1037 PyDoc_STRVAR(update_doc,
1038 "Update a set with the union of itself and others.");
1039 
1040 /* XXX Todo:
1041    If aligned memory allocations become available, make the
1042    set object 64 byte aligned so that most of the fields
1043    can be retrieved or updated in a single cache line.
1044 */
1045 
1046 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)1047 make_new_set(PyTypeObject *type, PyObject *iterable)
1048 {
1049     PySetObject *so;
1050 
1051     so = (PySetObject *)type->tp_alloc(type, 0);
1052     if (so == NULL)
1053         return NULL;
1054 
1055     so->fill = 0;
1056     so->used = 0;
1057     so->mask = PySet_MINSIZE - 1;
1058     so->table = so->smalltable;
1059     so->hash = -1;
1060     so->finger = 0;
1061     so->weakreflist = NULL;
1062 
1063     if (iterable != NULL) {
1064         if (set_update_internal(so, iterable)) {
1065             Py_DECREF(so);
1066             return NULL;
1067         }
1068     }
1069 
1070     return (PyObject *)so;
1071 }
1072 
1073 static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)1074 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
1075 {
1076     if (type != &PySet_Type && type != &PyFrozenSet_Type) {
1077         if (PyType_IsSubtype(type, &PySet_Type))
1078             type = &PySet_Type;
1079         else
1080             type = &PyFrozenSet_Type;
1081     }
1082     return make_new_set(type, iterable);
1083 }
1084 
1085 /* The empty frozenset is a singleton */
1086 static PyObject *emptyfrozenset = NULL;
1087 
1088 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1089 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1090 {
1091     PyObject *iterable = NULL, *result;
1092 
1093     if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset", kwds))
1094         return NULL;
1095 
1096     if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
1097         return NULL;
1098 
1099     if (type != &PyFrozenSet_Type)
1100         return make_new_set(type, iterable);
1101 
1102     if (iterable != NULL) {
1103         /* frozenset(f) is idempotent */
1104         if (PyFrozenSet_CheckExact(iterable)) {
1105             Py_INCREF(iterable);
1106             return iterable;
1107         }
1108         result = make_new_set(type, iterable);
1109         if (result == NULL || PySet_GET_SIZE(result))
1110             return result;
1111         Py_DECREF(result);
1112     }
1113     /* The empty frozenset is a singleton */
1114     if (emptyfrozenset == NULL)
1115         emptyfrozenset = make_new_set(type, NULL);
1116     Py_XINCREF(emptyfrozenset);
1117     return emptyfrozenset;
1118 }
1119 
1120 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1121 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1122 {
1123     return make_new_set(type, NULL);
1124 }
1125 
1126 /* set_swap_bodies() switches the contents of any two sets by moving their
1127    internal data pointers and, if needed, copying the internal smalltables.
1128    Semantically equivalent to:
1129 
1130      t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1131 
1132    The function always succeeds and it leaves both objects in a stable state.
1133    Useful for operations that update in-place (by allowing an intermediate
1134    result to be swapped into one of the original inputs).
1135 */
1136 
1137 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1138 set_swap_bodies(PySetObject *a, PySetObject *b)
1139 {
1140     Py_ssize_t t;
1141     setentry *u;
1142     setentry tab[PySet_MINSIZE];
1143     Py_hash_t h;
1144 
1145     t = a->fill;     a->fill   = b->fill;        b->fill  = t;
1146     t = a->used;     a->used   = b->used;        b->used  = t;
1147     t = a->mask;     a->mask   = b->mask;        b->mask  = t;
1148 
1149     u = a->table;
1150     if (a->table == a->smalltable)
1151         u = b->smalltable;
1152     a->table  = b->table;
1153     if (b->table == b->smalltable)
1154         a->table = a->smalltable;
1155     b->table = u;
1156 
1157     if (a->table == a->smalltable || b->table == b->smalltable) {
1158         memcpy(tab, a->smalltable, sizeof(tab));
1159         memcpy(a->smalltable, b->smalltable, sizeof(tab));
1160         memcpy(b->smalltable, tab, sizeof(tab));
1161     }
1162 
1163     if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type)  &&
1164         PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1165         h = a->hash;     a->hash = b->hash;  b->hash = h;
1166     } else {
1167         a->hash = -1;
1168         b->hash = -1;
1169     }
1170 }
1171 
1172 static PyObject *
set_copy(PySetObject * so)1173 set_copy(PySetObject *so)
1174 {
1175     return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1176 }
1177 
1178 static PyObject *
frozenset_copy(PySetObject * so)1179 frozenset_copy(PySetObject *so)
1180 {
1181     if (PyFrozenSet_CheckExact(so)) {
1182         Py_INCREF(so);
1183         return (PyObject *)so;
1184     }
1185     return set_copy(so);
1186 }
1187 
1188 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1189 
1190 static PyObject *
set_clear(PySetObject * so)1191 set_clear(PySetObject *so)
1192 {
1193     set_clear_internal(so);
1194     Py_RETURN_NONE;
1195 }
1196 
1197 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1198 
1199 static PyObject *
set_union(PySetObject * so,PyObject * args)1200 set_union(PySetObject *so, PyObject *args)
1201 {
1202     PySetObject *result;
1203     PyObject *other;
1204     Py_ssize_t i;
1205 
1206     result = (PySetObject *)set_copy(so);
1207     if (result == NULL)
1208         return NULL;
1209 
1210     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1211         other = PyTuple_GET_ITEM(args, i);
1212         if ((PyObject *)so == other)
1213             continue;
1214         if (set_update_internal(result, other)) {
1215             Py_DECREF(result);
1216             return NULL;
1217         }
1218     }
1219     return (PyObject *)result;
1220 }
1221 
1222 PyDoc_STRVAR(union_doc,
1223  "Return the union of sets as a new set.\n\
1224 \n\
1225 (i.e. all elements that are in either set.)");
1226 
1227 static PyObject *
set_or(PySetObject * so,PyObject * other)1228 set_or(PySetObject *so, PyObject *other)
1229 {
1230     PySetObject *result;
1231 
1232     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1233         Py_RETURN_NOTIMPLEMENTED;
1234 
1235     result = (PySetObject *)set_copy(so);
1236     if (result == NULL)
1237         return NULL;
1238     if ((PyObject *)so == other)
1239         return (PyObject *)result;
1240     if (set_update_internal(result, other)) {
1241         Py_DECREF(result);
1242         return NULL;
1243     }
1244     return (PyObject *)result;
1245 }
1246 
1247 static PyObject *
set_ior(PySetObject * so,PyObject * other)1248 set_ior(PySetObject *so, PyObject *other)
1249 {
1250     if (!PyAnySet_Check(other))
1251         Py_RETURN_NOTIMPLEMENTED;
1252 
1253     if (set_update_internal(so, other))
1254         return NULL;
1255     Py_INCREF(so);
1256     return (PyObject *)so;
1257 }
1258 
1259 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1260 set_intersection(PySetObject *so, PyObject *other)
1261 {
1262     PySetObject *result;
1263     PyObject *key, *it, *tmp;
1264     Py_hash_t hash;
1265     int rv;
1266 
1267     if ((PyObject *)so == other)
1268         return set_copy(so);
1269 
1270     result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1271     if (result == NULL)
1272         return NULL;
1273 
1274     if (PyAnySet_Check(other)) {
1275         Py_ssize_t pos = 0;
1276         setentry *entry;
1277 
1278         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1279             tmp = (PyObject *)so;
1280             so = (PySetObject *)other;
1281             other = tmp;
1282         }
1283 
1284         while (set_next((PySetObject *)other, &pos, &entry)) {
1285             key = entry->key;
1286             hash = entry->hash;
1287             rv = set_contains_entry(so, key, hash);
1288             if (rv < 0) {
1289                 Py_DECREF(result);
1290                 return NULL;
1291             }
1292             if (rv) {
1293                 if (set_add_entry(result, key, hash)) {
1294                     Py_DECREF(result);
1295                     return NULL;
1296                 }
1297             }
1298         }
1299         return (PyObject *)result;
1300     }
1301 
1302     it = PyObject_GetIter(other);
1303     if (it == NULL) {
1304         Py_DECREF(result);
1305         return NULL;
1306     }
1307 
1308     while ((key = PyIter_Next(it)) != NULL) {
1309         hash = PyObject_Hash(key);
1310         if (hash == -1)
1311             goto error;
1312         rv = set_contains_entry(so, key, hash);
1313         if (rv < 0)
1314             goto error;
1315         if (rv) {
1316             if (set_add_entry(result, key, hash))
1317                 goto error;
1318         }
1319         Py_DECREF(key);
1320     }
1321     Py_DECREF(it);
1322     if (PyErr_Occurred()) {
1323         Py_DECREF(result);
1324         return NULL;
1325     }
1326     return (PyObject *)result;
1327   error:
1328     Py_DECREF(it);
1329     Py_DECREF(result);
1330     Py_DECREF(key);
1331     return NULL;
1332 }
1333 
1334 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1335 set_intersection_multi(PySetObject *so, PyObject *args)
1336 {
1337     Py_ssize_t i;
1338     PyObject *result = (PyObject *)so;
1339 
1340     if (PyTuple_GET_SIZE(args) == 0)
1341         return set_copy(so);
1342 
1343     Py_INCREF(so);
1344     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1345         PyObject *other = PyTuple_GET_ITEM(args, i);
1346         PyObject *newresult = set_intersection((PySetObject *)result, other);
1347         if (newresult == NULL) {
1348             Py_DECREF(result);
1349             return NULL;
1350         }
1351         Py_DECREF(result);
1352         result = newresult;
1353     }
1354     return result;
1355 }
1356 
1357 PyDoc_STRVAR(intersection_doc,
1358 "Return the intersection of two sets as a new set.\n\
1359 \n\
1360 (i.e. all elements that are in both sets.)");
1361 
1362 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1363 set_intersection_update(PySetObject *so, PyObject *other)
1364 {
1365     PyObject *tmp;
1366 
1367     tmp = set_intersection(so, other);
1368     if (tmp == NULL)
1369         return NULL;
1370     set_swap_bodies(so, (PySetObject *)tmp);
1371     Py_DECREF(tmp);
1372     Py_RETURN_NONE;
1373 }
1374 
1375 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1376 set_intersection_update_multi(PySetObject *so, PyObject *args)
1377 {
1378     PyObject *tmp;
1379 
1380     tmp = set_intersection_multi(so, args);
1381     if (tmp == NULL)
1382         return NULL;
1383     set_swap_bodies(so, (PySetObject *)tmp);
1384     Py_DECREF(tmp);
1385     Py_RETURN_NONE;
1386 }
1387 
1388 PyDoc_STRVAR(intersection_update_doc,
1389 "Update a set with the intersection of itself and another.");
1390 
1391 static PyObject *
set_and(PySetObject * so,PyObject * other)1392 set_and(PySetObject *so, PyObject *other)
1393 {
1394     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1395         Py_RETURN_NOTIMPLEMENTED;
1396     return set_intersection(so, other);
1397 }
1398 
1399 static PyObject *
set_iand(PySetObject * so,PyObject * other)1400 set_iand(PySetObject *so, PyObject *other)
1401 {
1402     PyObject *result;
1403 
1404     if (!PyAnySet_Check(other))
1405         Py_RETURN_NOTIMPLEMENTED;
1406     result = set_intersection_update(so, other);
1407     if (result == NULL)
1408         return NULL;
1409     Py_DECREF(result);
1410     Py_INCREF(so);
1411     return (PyObject *)so;
1412 }
1413 
1414 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1415 set_isdisjoint(PySetObject *so, PyObject *other)
1416 {
1417     PyObject *key, *it, *tmp;
1418     int rv;
1419 
1420     if ((PyObject *)so == other) {
1421         if (PySet_GET_SIZE(so) == 0)
1422             Py_RETURN_TRUE;
1423         else
1424             Py_RETURN_FALSE;
1425     }
1426 
1427     if (PyAnySet_CheckExact(other)) {
1428         Py_ssize_t pos = 0;
1429         setentry *entry;
1430 
1431         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1432             tmp = (PyObject *)so;
1433             so = (PySetObject *)other;
1434             other = tmp;
1435         }
1436         while (set_next((PySetObject *)other, &pos, &entry)) {
1437             rv = set_contains_entry(so, entry->key, entry->hash);
1438             if (rv < 0)
1439                 return NULL;
1440             if (rv)
1441                 Py_RETURN_FALSE;
1442         }
1443         Py_RETURN_TRUE;
1444     }
1445 
1446     it = PyObject_GetIter(other);
1447     if (it == NULL)
1448         return NULL;
1449 
1450     while ((key = PyIter_Next(it)) != NULL) {
1451         Py_hash_t hash = PyObject_Hash(key);
1452 
1453         if (hash == -1) {
1454             Py_DECREF(key);
1455             Py_DECREF(it);
1456             return NULL;
1457         }
1458         rv = set_contains_entry(so, key, hash);
1459         Py_DECREF(key);
1460         if (rv < 0) {
1461             Py_DECREF(it);
1462             return NULL;
1463         }
1464         if (rv) {
1465             Py_DECREF(it);
1466             Py_RETURN_FALSE;
1467         }
1468     }
1469     Py_DECREF(it);
1470     if (PyErr_Occurred())
1471         return NULL;
1472     Py_RETURN_TRUE;
1473 }
1474 
1475 PyDoc_STRVAR(isdisjoint_doc,
1476 "Return True if two sets have a null intersection.");
1477 
1478 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1479 set_difference_update_internal(PySetObject *so, PyObject *other)
1480 {
1481     if (PySet_GET_SIZE(so) == 0) {
1482         return 0;
1483     }
1484 
1485     if ((PyObject *)so == other)
1486         return set_clear_internal(so);
1487 
1488     if (PyAnySet_Check(other)) {
1489         setentry *entry;
1490         Py_ssize_t pos = 0;
1491 
1492         while (set_next((PySetObject *)other, &pos, &entry))
1493             if (set_discard_entry(so, entry->key, entry->hash) < 0)
1494                 return -1;
1495     } else {
1496         PyObject *key, *it;
1497         it = PyObject_GetIter(other);
1498         if (it == NULL)
1499             return -1;
1500 
1501         while ((key = PyIter_Next(it)) != NULL) {
1502             if (set_discard_key(so, key) < 0) {
1503                 Py_DECREF(it);
1504                 Py_DECREF(key);
1505                 return -1;
1506             }
1507             Py_DECREF(key);
1508         }
1509         Py_DECREF(it);
1510         if (PyErr_Occurred())
1511             return -1;
1512     }
1513     /* If more than 1/4th are dummies, then resize them away. */
1514     if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1515         return 0;
1516     return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1517 }
1518 
1519 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1520 set_difference_update(PySetObject *so, PyObject *args)
1521 {
1522     Py_ssize_t i;
1523 
1524     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1525         PyObject *other = PyTuple_GET_ITEM(args, i);
1526         if (set_difference_update_internal(so, other))
1527             return NULL;
1528     }
1529     Py_RETURN_NONE;
1530 }
1531 
1532 PyDoc_STRVAR(difference_update_doc,
1533 "Remove all elements of another set from this set.");
1534 
1535 static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1536 set_copy_and_difference(PySetObject *so, PyObject *other)
1537 {
1538     PyObject *result;
1539 
1540     result = set_copy(so);
1541     if (result == NULL)
1542         return NULL;
1543     if (set_difference_update_internal((PySetObject *) result, other) == 0)
1544         return result;
1545     Py_DECREF(result);
1546     return NULL;
1547 }
1548 
1549 static PyObject *
set_difference(PySetObject * so,PyObject * other)1550 set_difference(PySetObject *so, PyObject *other)
1551 {
1552     PyObject *result;
1553     PyObject *key;
1554     Py_hash_t hash;
1555     setentry *entry;
1556     Py_ssize_t pos = 0, other_size;
1557     int rv;
1558 
1559     if (PySet_GET_SIZE(so) == 0) {
1560         return set_copy(so);
1561     }
1562 
1563     if (PyAnySet_Check(other)) {
1564         other_size = PySet_GET_SIZE(other);
1565     }
1566     else if (PyDict_CheckExact(other)) {
1567         other_size = PyDict_GET_SIZE(other);
1568     }
1569     else {
1570         return set_copy_and_difference(so, other);
1571     }
1572 
1573     /* If len(so) much more than len(other), it's more efficient to simply copy
1574      * so and then iterate other looking for common elements. */
1575     if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1576         return set_copy_and_difference(so, other);
1577     }
1578 
1579     result = make_new_set_basetype(Py_TYPE(so), NULL);
1580     if (result == NULL)
1581         return NULL;
1582 
1583     if (PyDict_CheckExact(other)) {
1584         while (set_next(so, &pos, &entry)) {
1585             key = entry->key;
1586             hash = entry->hash;
1587             rv = _PyDict_Contains(other, key, hash);
1588             if (rv < 0) {
1589                 Py_DECREF(result);
1590                 return NULL;
1591             }
1592             if (!rv) {
1593                 if (set_add_entry((PySetObject *)result, key, hash)) {
1594                     Py_DECREF(result);
1595                     return NULL;
1596                 }
1597             }
1598         }
1599         return result;
1600     }
1601 
1602     /* Iterate over so, checking for common elements in other. */
1603     while (set_next(so, &pos, &entry)) {
1604         key = entry->key;
1605         hash = entry->hash;
1606         rv = set_contains_entry((PySetObject *)other, key, hash);
1607         if (rv < 0) {
1608             Py_DECREF(result);
1609             return NULL;
1610         }
1611         if (!rv) {
1612             if (set_add_entry((PySetObject *)result, key, hash)) {
1613                 Py_DECREF(result);
1614                 return NULL;
1615             }
1616         }
1617     }
1618     return result;
1619 }
1620 
1621 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1622 set_difference_multi(PySetObject *so, PyObject *args)
1623 {
1624     Py_ssize_t i;
1625     PyObject *result, *other;
1626 
1627     if (PyTuple_GET_SIZE(args) == 0)
1628         return set_copy(so);
1629 
1630     other = PyTuple_GET_ITEM(args, 0);
1631     result = set_difference(so, other);
1632     if (result == NULL)
1633         return NULL;
1634 
1635     for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1636         other = PyTuple_GET_ITEM(args, i);
1637         if (set_difference_update_internal((PySetObject *)result, other)) {
1638             Py_DECREF(result);
1639             return NULL;
1640         }
1641     }
1642     return result;
1643 }
1644 
1645 PyDoc_STRVAR(difference_doc,
1646 "Return the difference of two or more sets as a new set.\n\
1647 \n\
1648 (i.e. all elements that are in this set but not the others.)");
1649 static PyObject *
set_sub(PySetObject * so,PyObject * other)1650 set_sub(PySetObject *so, PyObject *other)
1651 {
1652     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1653         Py_RETURN_NOTIMPLEMENTED;
1654     return set_difference(so, other);
1655 }
1656 
1657 static PyObject *
set_isub(PySetObject * so,PyObject * other)1658 set_isub(PySetObject *so, PyObject *other)
1659 {
1660     if (!PyAnySet_Check(other))
1661         Py_RETURN_NOTIMPLEMENTED;
1662     if (set_difference_update_internal(so, other))
1663         return NULL;
1664     Py_INCREF(so);
1665     return (PyObject *)so;
1666 }
1667 
1668 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1669 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1670 {
1671     PySetObject *otherset;
1672     PyObject *key;
1673     Py_ssize_t pos = 0;
1674     Py_hash_t hash;
1675     setentry *entry;
1676     int rv;
1677 
1678     if ((PyObject *)so == other)
1679         return set_clear(so);
1680 
1681     if (PyDict_CheckExact(other)) {
1682         PyObject *value;
1683         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1684             Py_INCREF(key);
1685             rv = set_discard_entry(so, key, hash);
1686             if (rv < 0) {
1687                 Py_DECREF(key);
1688                 return NULL;
1689             }
1690             if (rv == DISCARD_NOTFOUND) {
1691                 if (set_add_entry(so, key, hash)) {
1692                     Py_DECREF(key);
1693                     return NULL;
1694                 }
1695             }
1696             Py_DECREF(key);
1697         }
1698         Py_RETURN_NONE;
1699     }
1700 
1701     if (PyAnySet_Check(other)) {
1702         Py_INCREF(other);
1703         otherset = (PySetObject *)other;
1704     } else {
1705         otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1706         if (otherset == NULL)
1707             return NULL;
1708     }
1709 
1710     while (set_next(otherset, &pos, &entry)) {
1711         key = entry->key;
1712         hash = entry->hash;
1713         rv = set_discard_entry(so, key, hash);
1714         if (rv < 0) {
1715             Py_DECREF(otherset);
1716             return NULL;
1717         }
1718         if (rv == DISCARD_NOTFOUND) {
1719             if (set_add_entry(so, key, hash)) {
1720                 Py_DECREF(otherset);
1721                 return NULL;
1722             }
1723         }
1724     }
1725     Py_DECREF(otherset);
1726     Py_RETURN_NONE;
1727 }
1728 
1729 PyDoc_STRVAR(symmetric_difference_update_doc,
1730 "Update a set with the symmetric difference of itself and another.");
1731 
1732 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1733 set_symmetric_difference(PySetObject *so, PyObject *other)
1734 {
1735     PyObject *rv;
1736     PySetObject *otherset;
1737 
1738     otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1739     if (otherset == NULL)
1740         return NULL;
1741     rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1742     if (rv == NULL) {
1743         Py_DECREF(otherset);
1744         return NULL;
1745     }
1746     Py_DECREF(rv);
1747     return (PyObject *)otherset;
1748 }
1749 
1750 PyDoc_STRVAR(symmetric_difference_doc,
1751 "Return the symmetric difference of two sets as a new set.\n\
1752 \n\
1753 (i.e. all elements that are in exactly one of the sets.)");
1754 
1755 static PyObject *
set_xor(PySetObject * so,PyObject * other)1756 set_xor(PySetObject *so, PyObject *other)
1757 {
1758     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1759         Py_RETURN_NOTIMPLEMENTED;
1760     return set_symmetric_difference(so, other);
1761 }
1762 
1763 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1764 set_ixor(PySetObject *so, PyObject *other)
1765 {
1766     PyObject *result;
1767 
1768     if (!PyAnySet_Check(other))
1769         Py_RETURN_NOTIMPLEMENTED;
1770     result = set_symmetric_difference_update(so, other);
1771     if (result == NULL)
1772         return NULL;
1773     Py_DECREF(result);
1774     Py_INCREF(so);
1775     return (PyObject *)so;
1776 }
1777 
1778 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1779 set_issubset(PySetObject *so, PyObject *other)
1780 {
1781     setentry *entry;
1782     Py_ssize_t pos = 0;
1783     int rv;
1784 
1785     if (!PyAnySet_Check(other)) {
1786         PyObject *tmp, *result;
1787         tmp = make_new_set(&PySet_Type, other);
1788         if (tmp == NULL)
1789             return NULL;
1790         result = set_issubset(so, tmp);
1791         Py_DECREF(tmp);
1792         return result;
1793     }
1794     if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1795         Py_RETURN_FALSE;
1796 
1797     while (set_next(so, &pos, &entry)) {
1798         rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
1799         if (rv < 0)
1800             return NULL;
1801         if (!rv)
1802             Py_RETURN_FALSE;
1803     }
1804     Py_RETURN_TRUE;
1805 }
1806 
1807 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1808 
1809 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1810 set_issuperset(PySetObject *so, PyObject *other)
1811 {
1812     PyObject *tmp, *result;
1813 
1814     if (!PyAnySet_Check(other)) {
1815         tmp = make_new_set(&PySet_Type, other);
1816         if (tmp == NULL)
1817             return NULL;
1818         result = set_issuperset(so, tmp);
1819         Py_DECREF(tmp);
1820         return result;
1821     }
1822     return set_issubset((PySetObject *)other, (PyObject *)so);
1823 }
1824 
1825 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1826 
1827 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1828 set_richcompare(PySetObject *v, PyObject *w, int op)
1829 {
1830     PyObject *r1;
1831     int r2;
1832 
1833     if(!PyAnySet_Check(w))
1834         Py_RETURN_NOTIMPLEMENTED;
1835 
1836     switch (op) {
1837     case Py_EQ:
1838         if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1839             Py_RETURN_FALSE;
1840         if (v->hash != -1  &&
1841             ((PySetObject *)w)->hash != -1 &&
1842             v->hash != ((PySetObject *)w)->hash)
1843             Py_RETURN_FALSE;
1844         return set_issubset(v, w);
1845     case Py_NE:
1846         r1 = set_richcompare(v, w, Py_EQ);
1847         if (r1 == NULL)
1848             return NULL;
1849         r2 = PyObject_IsTrue(r1);
1850         Py_DECREF(r1);
1851         if (r2 < 0)
1852             return NULL;
1853         return PyBool_FromLong(!r2);
1854     case Py_LE:
1855         return set_issubset(v, w);
1856     case Py_GE:
1857         return set_issuperset(v, w);
1858     case Py_LT:
1859         if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1860             Py_RETURN_FALSE;
1861         return set_issubset(v, w);
1862     case Py_GT:
1863         if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1864             Py_RETURN_FALSE;
1865         return set_issuperset(v, w);
1866     }
1867     Py_RETURN_NOTIMPLEMENTED;
1868 }
1869 
1870 static PyObject *
set_add(PySetObject * so,PyObject * key)1871 set_add(PySetObject *so, PyObject *key)
1872 {
1873     if (set_add_key(so, key))
1874         return NULL;
1875     Py_RETURN_NONE;
1876 }
1877 
1878 PyDoc_STRVAR(add_doc,
1879 "Add an element to a set.\n\
1880 \n\
1881 This has no effect if the element is already present.");
1882 
1883 static int
set_contains(PySetObject * so,PyObject * key)1884 set_contains(PySetObject *so, PyObject *key)
1885 {
1886     PyObject *tmpkey;
1887     int rv;
1888 
1889     rv = set_contains_key(so, key);
1890     if (rv < 0) {
1891         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1892             return -1;
1893         PyErr_Clear();
1894         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1895         if (tmpkey == NULL)
1896             return -1;
1897         rv = set_contains_key(so, tmpkey);
1898         Py_DECREF(tmpkey);
1899     }
1900     return rv;
1901 }
1902 
1903 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1904 set_direct_contains(PySetObject *so, PyObject *key)
1905 {
1906     long result;
1907 
1908     result = set_contains(so, key);
1909     if (result < 0)
1910         return NULL;
1911     return PyBool_FromLong(result);
1912 }
1913 
1914 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1915 
1916 static PyObject *
set_remove(PySetObject * so,PyObject * key)1917 set_remove(PySetObject *so, PyObject *key)
1918 {
1919     PyObject *tmpkey;
1920     int rv;
1921 
1922     rv = set_discard_key(so, key);
1923     if (rv < 0) {
1924         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1925             return NULL;
1926         PyErr_Clear();
1927         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1928         if (tmpkey == NULL)
1929             return NULL;
1930         rv = set_discard_key(so, tmpkey);
1931         Py_DECREF(tmpkey);
1932         if (rv < 0)
1933             return NULL;
1934     }
1935 
1936     if (rv == DISCARD_NOTFOUND) {
1937         _PyErr_SetKeyError(key);
1938         return NULL;
1939     }
1940     Py_RETURN_NONE;
1941 }
1942 
1943 PyDoc_STRVAR(remove_doc,
1944 "Remove an element from a set; it must be a member.\n\
1945 \n\
1946 If the element is not a member, raise a KeyError.");
1947 
1948 static PyObject *
set_discard(PySetObject * so,PyObject * key)1949 set_discard(PySetObject *so, PyObject *key)
1950 {
1951     PyObject *tmpkey;
1952     int rv;
1953 
1954     rv = set_discard_key(so, key);
1955     if (rv < 0) {
1956         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1957             return NULL;
1958         PyErr_Clear();
1959         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1960         if (tmpkey == NULL)
1961             return NULL;
1962         rv = set_discard_key(so, tmpkey);
1963         Py_DECREF(tmpkey);
1964         if (rv < 0)
1965             return NULL;
1966     }
1967     Py_RETURN_NONE;
1968 }
1969 
1970 PyDoc_STRVAR(discard_doc,
1971 "Remove an element from a set if it is a member.\n\
1972 \n\
1973 If the element is not a member, do nothing.");
1974 
1975 static PyObject *
set_reduce(PySetObject * so)1976 set_reduce(PySetObject *so)
1977 {
1978     PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1979     _Py_IDENTIFIER(__dict__);
1980 
1981     keys = PySequence_List((PyObject *)so);
1982     if (keys == NULL)
1983         goto done;
1984     args = PyTuple_Pack(1, keys);
1985     if (args == NULL)
1986         goto done;
1987     dict = _PyObject_GetAttrId((PyObject *)so, &PyId___dict__);
1988     if (dict == NULL) {
1989         PyErr_Clear();
1990         dict = Py_None;
1991         Py_INCREF(dict);
1992     }
1993     result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1994 done:
1995     Py_XDECREF(args);
1996     Py_XDECREF(keys);
1997     Py_XDECREF(dict);
1998     return result;
1999 }
2000 
2001 static PyObject *
set_sizeof(PySetObject * so)2002 set_sizeof(PySetObject *so)
2003 {
2004     Py_ssize_t res;
2005 
2006     res = _PyObject_SIZE(Py_TYPE(so));
2007     if (so->table != so->smalltable)
2008         res = res + (so->mask + 1) * sizeof(setentry);
2009     return PyLong_FromSsize_t(res);
2010 }
2011 
2012 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
2013 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)2014 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
2015 {
2016     PyObject *iterable = NULL;
2017 
2018     if (!_PyArg_NoKeywords("set", kwds))
2019         return -1;
2020     if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
2021         return -1;
2022     if (self->fill)
2023         set_clear_internal(self);
2024     self->hash = -1;
2025     if (iterable == NULL)
2026         return 0;
2027     return set_update_internal(self, iterable);
2028 }
2029 
2030 static PySequenceMethods set_as_sequence = {
2031     set_len,                            /* sq_length */
2032     0,                                  /* sq_concat */
2033     0,                                  /* sq_repeat */
2034     0,                                  /* sq_item */
2035     0,                                  /* sq_slice */
2036     0,                                  /* sq_ass_item */
2037     0,                                  /* sq_ass_slice */
2038     (objobjproc)set_contains,           /* sq_contains */
2039 };
2040 
2041 /* set object ********************************************************/
2042 
2043 #ifdef Py_DEBUG
2044 static PyObject *test_c_api(PySetObject *so);
2045 
2046 PyDoc_STRVAR(test_c_api_doc, "Exercises C API.  Returns True.\n\
2047 All is well if assertions don't fail.");
2048 #endif
2049 
2050 static PyMethodDef set_methods[] = {
2051     {"add",             (PyCFunction)set_add,           METH_O,
2052      add_doc},
2053     {"clear",           (PyCFunction)set_clear,         METH_NOARGS,
2054      clear_doc},
2055     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2056      contains_doc},
2057     {"copy",            (PyCFunction)set_copy,          METH_NOARGS,
2058      copy_doc},
2059     {"discard",         (PyCFunction)set_discard,       METH_O,
2060      discard_doc},
2061     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2062      difference_doc},
2063     {"difference_update",       (PyCFunction)set_difference_update,     METH_VARARGS,
2064      difference_update_doc},
2065     {"intersection",(PyCFunction)set_intersection_multi,        METH_VARARGS,
2066      intersection_doc},
2067     {"intersection_update",(PyCFunction)set_intersection_update_multi,          METH_VARARGS,
2068      intersection_update_doc},
2069     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2070      isdisjoint_doc},
2071     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2072      issubset_doc},
2073     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2074      issuperset_doc},
2075     {"pop",             (PyCFunction)set_pop,           METH_NOARGS,
2076      pop_doc},
2077     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2078      reduce_doc},
2079     {"remove",          (PyCFunction)set_remove,        METH_O,
2080      remove_doc},
2081     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2082      sizeof_doc},
2083     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2084      symmetric_difference_doc},
2085     {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update,        METH_O,
2086      symmetric_difference_update_doc},
2087 #ifdef Py_DEBUG
2088     {"test_c_api",      (PyCFunction)test_c_api,        METH_NOARGS,
2089      test_c_api_doc},
2090 #endif
2091     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2092      union_doc},
2093     {"update",          (PyCFunction)set_update,        METH_VARARGS,
2094      update_doc},
2095     {NULL,              NULL}   /* sentinel */
2096 };
2097 
2098 static PyNumberMethods set_as_number = {
2099     0,                                  /*nb_add*/
2100     (binaryfunc)set_sub,                /*nb_subtract*/
2101     0,                                  /*nb_multiply*/
2102     0,                                  /*nb_remainder*/
2103     0,                                  /*nb_divmod*/
2104     0,                                  /*nb_power*/
2105     0,                                  /*nb_negative*/
2106     0,                                  /*nb_positive*/
2107     0,                                  /*nb_absolute*/
2108     0,                                  /*nb_bool*/
2109     0,                                  /*nb_invert*/
2110     0,                                  /*nb_lshift*/
2111     0,                                  /*nb_rshift*/
2112     (binaryfunc)set_and,                /*nb_and*/
2113     (binaryfunc)set_xor,                /*nb_xor*/
2114     (binaryfunc)set_or,                 /*nb_or*/
2115     0,                                  /*nb_int*/
2116     0,                                  /*nb_reserved*/
2117     0,                                  /*nb_float*/
2118     0,                                  /*nb_inplace_add*/
2119     (binaryfunc)set_isub,               /*nb_inplace_subtract*/
2120     0,                                  /*nb_inplace_multiply*/
2121     0,                                  /*nb_inplace_remainder*/
2122     0,                                  /*nb_inplace_power*/
2123     0,                                  /*nb_inplace_lshift*/
2124     0,                                  /*nb_inplace_rshift*/
2125     (binaryfunc)set_iand,               /*nb_inplace_and*/
2126     (binaryfunc)set_ixor,               /*nb_inplace_xor*/
2127     (binaryfunc)set_ior,                /*nb_inplace_or*/
2128 };
2129 
2130 PyDoc_STRVAR(set_doc,
2131 "set() -> new empty set object\n\
2132 set(iterable) -> new set object\n\
2133 \n\
2134 Build an unordered collection of unique elements.");
2135 
2136 PyTypeObject PySet_Type = {
2137     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2138     "set",                              /* tp_name */
2139     sizeof(PySetObject),                /* tp_basicsize */
2140     0,                                  /* tp_itemsize */
2141     /* methods */
2142     (destructor)set_dealloc,            /* tp_dealloc */
2143     0,                                  /* tp_print */
2144     0,                                  /* tp_getattr */
2145     0,                                  /* tp_setattr */
2146     0,                                  /* tp_reserved */
2147     (reprfunc)set_repr,                 /* tp_repr */
2148     &set_as_number,                     /* tp_as_number */
2149     &set_as_sequence,                   /* tp_as_sequence */
2150     0,                                  /* tp_as_mapping */
2151     PyObject_HashNotImplemented,        /* tp_hash */
2152     0,                                  /* tp_call */
2153     0,                                  /* tp_str */
2154     PyObject_GenericGetAttr,            /* tp_getattro */
2155     0,                                  /* tp_setattro */
2156     0,                                  /* tp_as_buffer */
2157     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2158         Py_TPFLAGS_BASETYPE,            /* tp_flags */
2159     set_doc,                            /* tp_doc */
2160     (traverseproc)set_traverse,         /* tp_traverse */
2161     (inquiry)set_clear_internal,        /* tp_clear */
2162     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2163     offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2164     (getiterfunc)set_iter,              /* tp_iter */
2165     0,                                  /* tp_iternext */
2166     set_methods,                        /* tp_methods */
2167     0,                                  /* tp_members */
2168     0,                                  /* tp_getset */
2169     0,                                  /* tp_base */
2170     0,                                  /* tp_dict */
2171     0,                                  /* tp_descr_get */
2172     0,                                  /* tp_descr_set */
2173     0,                                  /* tp_dictoffset */
2174     (initproc)set_init,                 /* tp_init */
2175     PyType_GenericAlloc,                /* tp_alloc */
2176     set_new,                            /* tp_new */
2177     PyObject_GC_Del,                    /* tp_free */
2178 };
2179 
2180 /* frozenset object ********************************************************/
2181 
2182 
2183 static PyMethodDef frozenset_methods[] = {
2184     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2185      contains_doc},
2186     {"copy",            (PyCFunction)frozenset_copy,    METH_NOARGS,
2187      copy_doc},
2188     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2189      difference_doc},
2190     {"intersection",    (PyCFunction)set_intersection_multi,    METH_VARARGS,
2191      intersection_doc},
2192     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2193      isdisjoint_doc},
2194     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2195      issubset_doc},
2196     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2197      issuperset_doc},
2198     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2199      reduce_doc},
2200     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2201      sizeof_doc},
2202     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2203      symmetric_difference_doc},
2204     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2205      union_doc},
2206     {NULL,              NULL}   /* sentinel */
2207 };
2208 
2209 static PyNumberMethods frozenset_as_number = {
2210     0,                                  /*nb_add*/
2211     (binaryfunc)set_sub,                /*nb_subtract*/
2212     0,                                  /*nb_multiply*/
2213     0,                                  /*nb_remainder*/
2214     0,                                  /*nb_divmod*/
2215     0,                                  /*nb_power*/
2216     0,                                  /*nb_negative*/
2217     0,                                  /*nb_positive*/
2218     0,                                  /*nb_absolute*/
2219     0,                                  /*nb_bool*/
2220     0,                                  /*nb_invert*/
2221     0,                                  /*nb_lshift*/
2222     0,                                  /*nb_rshift*/
2223     (binaryfunc)set_and,                /*nb_and*/
2224     (binaryfunc)set_xor,                /*nb_xor*/
2225     (binaryfunc)set_or,                 /*nb_or*/
2226 };
2227 
2228 PyDoc_STRVAR(frozenset_doc,
2229 "frozenset() -> empty frozenset object\n\
2230 frozenset(iterable) -> frozenset object\n\
2231 \n\
2232 Build an immutable unordered collection of unique elements.");
2233 
2234 PyTypeObject PyFrozenSet_Type = {
2235     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2236     "frozenset",                        /* tp_name */
2237     sizeof(PySetObject),                /* tp_basicsize */
2238     0,                                  /* tp_itemsize */
2239     /* methods */
2240     (destructor)set_dealloc,            /* tp_dealloc */
2241     0,                                  /* tp_print */
2242     0,                                  /* tp_getattr */
2243     0,                                  /* tp_setattr */
2244     0,                                  /* tp_reserved */
2245     (reprfunc)set_repr,                 /* tp_repr */
2246     &frozenset_as_number,               /* tp_as_number */
2247     &set_as_sequence,                   /* tp_as_sequence */
2248     0,                                  /* tp_as_mapping */
2249     frozenset_hash,                     /* tp_hash */
2250     0,                                  /* tp_call */
2251     0,                                  /* tp_str */
2252     PyObject_GenericGetAttr,            /* tp_getattro */
2253     0,                                  /* tp_setattro */
2254     0,                                  /* tp_as_buffer */
2255     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2256         Py_TPFLAGS_BASETYPE,            /* tp_flags */
2257     frozenset_doc,                      /* tp_doc */
2258     (traverseproc)set_traverse,         /* tp_traverse */
2259     (inquiry)set_clear_internal,        /* tp_clear */
2260     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2261     offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2262     (getiterfunc)set_iter,              /* tp_iter */
2263     0,                                  /* tp_iternext */
2264     frozenset_methods,                  /* tp_methods */
2265     0,                                  /* tp_members */
2266     0,                                  /* tp_getset */
2267     0,                                  /* tp_base */
2268     0,                                  /* tp_dict */
2269     0,                                  /* tp_descr_get */
2270     0,                                  /* tp_descr_set */
2271     0,                                  /* tp_dictoffset */
2272     0,                                  /* tp_init */
2273     PyType_GenericAlloc,                /* tp_alloc */
2274     frozenset_new,                      /* tp_new */
2275     PyObject_GC_Del,                    /* tp_free */
2276 };
2277 
2278 
2279 /***** C API functions *************************************************/
2280 
2281 PyObject *
PySet_New(PyObject * iterable)2282 PySet_New(PyObject *iterable)
2283 {
2284     return make_new_set(&PySet_Type, iterable);
2285 }
2286 
2287 PyObject *
PyFrozenSet_New(PyObject * iterable)2288 PyFrozenSet_New(PyObject *iterable)
2289 {
2290     return make_new_set(&PyFrozenSet_Type, iterable);
2291 }
2292 
2293 Py_ssize_t
PySet_Size(PyObject * anyset)2294 PySet_Size(PyObject *anyset)
2295 {
2296     if (!PyAnySet_Check(anyset)) {
2297         PyErr_BadInternalCall();
2298         return -1;
2299     }
2300     return PySet_GET_SIZE(anyset);
2301 }
2302 
2303 int
PySet_Clear(PyObject * set)2304 PySet_Clear(PyObject *set)
2305 {
2306     if (!PySet_Check(set)) {
2307         PyErr_BadInternalCall();
2308         return -1;
2309     }
2310     return set_clear_internal((PySetObject *)set);
2311 }
2312 
2313 int
PySet_Contains(PyObject * anyset,PyObject * key)2314 PySet_Contains(PyObject *anyset, PyObject *key)
2315 {
2316     if (!PyAnySet_Check(anyset)) {
2317         PyErr_BadInternalCall();
2318         return -1;
2319     }
2320     return set_contains_key((PySetObject *)anyset, key);
2321 }
2322 
2323 int
PySet_Discard(PyObject * set,PyObject * key)2324 PySet_Discard(PyObject *set, PyObject *key)
2325 {
2326     if (!PySet_Check(set)) {
2327         PyErr_BadInternalCall();
2328         return -1;
2329     }
2330     return set_discard_key((PySetObject *)set, key);
2331 }
2332 
2333 int
PySet_Add(PyObject * anyset,PyObject * key)2334 PySet_Add(PyObject *anyset, PyObject *key)
2335 {
2336     if (!PySet_Check(anyset) &&
2337         (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2338         PyErr_BadInternalCall();
2339         return -1;
2340     }
2341     return set_add_key((PySetObject *)anyset, key);
2342 }
2343 
2344 int
PySet_ClearFreeList(void)2345 PySet_ClearFreeList(void)
2346 {
2347     return 0;
2348 }
2349 
2350 void
PySet_Fini(void)2351 PySet_Fini(void)
2352 {
2353     Py_CLEAR(emptyfrozenset);
2354 }
2355 
2356 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2357 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2358 {
2359     setentry *entry;
2360 
2361     if (!PyAnySet_Check(set)) {
2362         PyErr_BadInternalCall();
2363         return -1;
2364     }
2365     if (set_next((PySetObject *)set, pos, &entry) == 0)
2366         return 0;
2367     *key = entry->key;
2368     *hash = entry->hash;
2369     return 1;
2370 }
2371 
2372 PyObject *
PySet_Pop(PyObject * set)2373 PySet_Pop(PyObject *set)
2374 {
2375     if (!PySet_Check(set)) {
2376         PyErr_BadInternalCall();
2377         return NULL;
2378     }
2379     return set_pop((PySetObject *)set);
2380 }
2381 
2382 int
_PySet_Update(PyObject * set,PyObject * iterable)2383 _PySet_Update(PyObject *set, PyObject *iterable)
2384 {
2385     if (!PySet_Check(set)) {
2386         PyErr_BadInternalCall();
2387         return -1;
2388     }
2389     return set_update_internal((PySetObject *)set, iterable);
2390 }
2391 
2392 /* Exported for the gdb plugin's benefit. */
2393 PyObject *_PySet_Dummy = dummy;
2394 
2395 #ifdef Py_DEBUG
2396 
2397 /* Test code to be called with any three element set.
2398    Returns True and original set is restored. */
2399 
2400 #define assertRaises(call_return_value, exception)              \
2401     do {                                                        \
2402         assert(call_return_value);                              \
2403         assert(PyErr_ExceptionMatches(exception));              \
2404         PyErr_Clear();                                          \
2405     } while(0)
2406 
2407 static PyObject *
test_c_api(PySetObject * so)2408 test_c_api(PySetObject *so)
2409 {
2410     Py_ssize_t count;
2411     const char *s;
2412     Py_ssize_t i;
2413     PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2414     PyObject *ob = (PyObject *)so;
2415     Py_hash_t hash;
2416     PyObject *str;
2417 
2418     /* Verify preconditions */
2419     assert(PyAnySet_Check(ob));
2420     assert(PyAnySet_CheckExact(ob));
2421     assert(!PyFrozenSet_CheckExact(ob));
2422 
2423     /* so.clear(); so |= set("abc"); */
2424     str = PyUnicode_FromString("abc");
2425     if (str == NULL)
2426         return NULL;
2427     set_clear_internal(so);
2428     if (set_update_internal(so, str)) {
2429         Py_DECREF(str);
2430         return NULL;
2431     }
2432     Py_DECREF(str);
2433 
2434     /* Exercise type/size checks */
2435     assert(PySet_Size(ob) == 3);
2436     assert(PySet_GET_SIZE(ob) == 3);
2437 
2438     /* Raise TypeError for non-iterable constructor arguments */
2439     assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2440     assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2441 
2442     /* Raise TypeError for unhashable key */
2443     dup = PySet_New(ob);
2444     assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2445     assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2446     assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2447 
2448     /* Exercise successful pop, contains, add, and discard */
2449     elem = PySet_Pop(ob);
2450     assert(PySet_Contains(ob, elem) == 0);
2451     assert(PySet_GET_SIZE(ob) == 2);
2452     assert(PySet_Add(ob, elem) == 0);
2453     assert(PySet_Contains(ob, elem) == 1);
2454     assert(PySet_GET_SIZE(ob) == 3);
2455     assert(PySet_Discard(ob, elem) == 1);
2456     assert(PySet_GET_SIZE(ob) == 2);
2457     assert(PySet_Discard(ob, elem) == 0);
2458     assert(PySet_GET_SIZE(ob) == 2);
2459 
2460     /* Exercise clear */
2461     dup2 = PySet_New(dup);
2462     assert(PySet_Clear(dup2) == 0);
2463     assert(PySet_Size(dup2) == 0);
2464     Py_DECREF(dup2);
2465 
2466     /* Raise SystemError on clear or update of frozen set */
2467     f = PyFrozenSet_New(dup);
2468     assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2469     assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2470     assert(PySet_Add(f, elem) == 0);
2471     Py_INCREF(f);
2472     assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2473     Py_DECREF(f);
2474     Py_DECREF(f);
2475 
2476     /* Exercise direct iteration */
2477     i = 0, count = 0;
2478     while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2479         s = PyUnicode_AsUTF8(x);
2480         assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2481         count++;
2482     }
2483     assert(count == 3);
2484 
2485     /* Exercise updates */
2486     dup2 = PySet_New(NULL);
2487     assert(_PySet_Update(dup2, dup) == 0);
2488     assert(PySet_Size(dup2) == 3);
2489     assert(_PySet_Update(dup2, dup) == 0);
2490     assert(PySet_Size(dup2) == 3);
2491     Py_DECREF(dup2);
2492 
2493     /* Raise SystemError when self argument is not a set or frozenset. */
2494     t = PyTuple_New(0);
2495     assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2496     assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2497     Py_DECREF(t);
2498 
2499     /* Raise SystemError when self argument is not a set. */
2500     f = PyFrozenSet_New(dup);
2501     assert(PySet_Size(f) == 3);
2502     assert(PyFrozenSet_CheckExact(f));
2503     assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2504     assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2505     Py_DECREF(f);
2506 
2507     /* Raise KeyError when popping from an empty set */
2508     assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2509     Py_DECREF(ob);
2510     assert(PySet_GET_SIZE(ob) == 0);
2511     assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2512 
2513     /* Restore the set from the copy using the PyNumber API */
2514     assert(PyNumber_InPlaceOr(ob, dup) == ob);
2515     Py_DECREF(ob);
2516 
2517     /* Verify constructors accept NULL arguments */
2518     f = PySet_New(NULL);
2519     assert(f != NULL);
2520     assert(PySet_GET_SIZE(f) == 0);
2521     Py_DECREF(f);
2522     f = PyFrozenSet_New(NULL);
2523     assert(f != NULL);
2524     assert(PyFrozenSet_CheckExact(f));
2525     assert(PySet_GET_SIZE(f) == 0);
2526     Py_DECREF(f);
2527 
2528     Py_DECREF(elem);
2529     Py_DECREF(dup);
2530     Py_RETURN_TRUE;
2531 }
2532 
2533 #undef assertRaises
2534 
2535 #endif
2536 
2537 /***** Dummy Struct  *************************************************/
2538 
2539 static PyObject *
dummy_repr(PyObject * op)2540 dummy_repr(PyObject *op)
2541 {
2542     return PyUnicode_FromString("<dummy key>");
2543 }
2544 
2545 static void
dummy_dealloc(PyObject * ignore)2546 dummy_dealloc(PyObject* ignore)
2547 {
2548     Py_FatalError("deallocating <dummy key>");
2549 }
2550 
2551 static PyTypeObject _PySetDummy_Type = {
2552     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2553     "<dummy key> type",
2554     0,
2555     0,
2556     dummy_dealloc,      /*tp_dealloc*/ /*never called*/
2557     0,                  /*tp_print*/
2558     0,                  /*tp_getattr*/
2559     0,                  /*tp_setattr*/
2560     0,                  /*tp_reserved*/
2561     dummy_repr,         /*tp_repr*/
2562     0,                  /*tp_as_number*/
2563     0,                  /*tp_as_sequence*/
2564     0,                  /*tp_as_mapping*/
2565     0,                  /*tp_hash */
2566     0,                  /*tp_call */
2567     0,                  /*tp_str */
2568     0,                  /*tp_getattro */
2569     0,                  /*tp_setattro */
2570     0,                  /*tp_as_buffer */
2571     Py_TPFLAGS_DEFAULT, /*tp_flags */
2572 };
2573 
2574 static PyObject _dummy_struct = {
2575   _PyObject_EXTRA_INIT
2576   2, &_PySetDummy_Type
2577 };
2578 
2579