• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* set object implementation
3 
4    Written and maintained by Raymond D. Hettinger <python@rcn.com>
5    Derived from Lib/sets.py and Objects/dictobject.c.
6 
7    The basic lookup function used by all operations.
8    This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9 
10    The initial probe index is computed as hash mod the table size.
11    Subsequent probe indices are computed as explained in Objects/dictobject.c.
12 
13    To improve cache locality, each probe inspects a series of consecutive
14    nearby entries before moving on to probes elsewhere in memory.  This leaves
15    us with a hybrid of linear probing and open addressing.  The linear probing
16    reduces the cost of hash collisions because consecutive memory accesses
17    tend to be much cheaper than scattered probes.  After LINEAR_PROBES steps,
18    we then use open addressing with the upper bits from the hash value.  This
19    helps break-up long chains of collisions.
20 
21    All arithmetic on hash should ignore overflow.
22 
23    Unlike the dictionary implementation, the lookkey function can return
24    NULL if the rich comparison returns an error.
25 */
26 
27 #include "Python.h"
28 #include "structmember.h"
29 
30 /* Object used as dummy key to fill deleted entries */
31 static PyObject _dummy_struct;
32 
33 #define dummy (&_dummy_struct)
34 
35 
36 /* ======================================================================== */
37 /* ======= Begin logic for probing the hash table ========================= */
38 
39 /* Set this to zero to turn-off linear probing */
40 #ifndef LINEAR_PROBES
41 #define LINEAR_PROBES 9
42 #endif
43 
44 /* This must be >= 1 */
45 #define PERTURB_SHIFT 5
46 
47 static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)48 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
49 {
50     setentry *table;
51     setentry *entry;
52     size_t perturb;
53     size_t mask = so->mask;
54     size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
55     size_t j;
56     int cmp;
57 
58     entry = &so->table[i];
59     if (entry->key == NULL)
60         return entry;
61 
62     perturb = hash;
63 
64     while (1) {
65         if (entry->hash == hash) {
66             PyObject *startkey = entry->key;
67             /* startkey cannot be a dummy because the dummy hash field is -1 */
68             assert(startkey != dummy);
69             if (startkey == key)
70                 return entry;
71             if (PyUnicode_CheckExact(startkey)
72                 && PyUnicode_CheckExact(key)
73                 && _PyUnicode_EQ(startkey, key))
74                 return entry;
75             table = so->table;
76             Py_INCREF(startkey);
77             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
78             Py_DECREF(startkey);
79             if (cmp < 0)                                          /* unlikely */
80                 return NULL;
81             if (table != so->table || entry->key != startkey)     /* unlikely */
82                 return set_lookkey(so, key, hash);
83             if (cmp > 0)                                          /* likely */
84                 return entry;
85             mask = so->mask;                 /* help avoid a register spill */
86         }
87 
88         if (i + LINEAR_PROBES <= mask) {
89             for (j = 0 ; j < LINEAR_PROBES ; j++) {
90                 entry++;
91                 if (entry->hash == 0 && entry->key == NULL)
92                     return entry;
93                 if (entry->hash == hash) {
94                     PyObject *startkey = entry->key;
95                     assert(startkey != dummy);
96                     if (startkey == key)
97                         return entry;
98                     if (PyUnicode_CheckExact(startkey)
99                         && PyUnicode_CheckExact(key)
100                         && _PyUnicode_EQ(startkey, key))
101                         return entry;
102                     table = so->table;
103                     Py_INCREF(startkey);
104                     cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
105                     Py_DECREF(startkey);
106                     if (cmp < 0)
107                         return NULL;
108                     if (table != so->table || entry->key != startkey)
109                         return set_lookkey(so, key, hash);
110                     if (cmp > 0)
111                         return entry;
112                     mask = so->mask;
113                 }
114             }
115         }
116 
117         perturb >>= PERTURB_SHIFT;
118         i = (i * 5 + 1 + perturb) & mask;
119 
120         entry = &so->table[i];
121         if (entry->key == NULL)
122             return entry;
123     }
124 }
125 
126 static int set_table_resize(PySetObject *, Py_ssize_t);
127 
128 static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)129 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
130 {
131     setentry *table;
132     setentry *freeslot;
133     setentry *entry;
134     size_t perturb;
135     size_t mask;
136     size_t i;                       /* Unsigned for defined overflow behavior */
137     size_t j;
138     int cmp;
139 
140     /* Pre-increment is necessary to prevent arbitrary code in the rich
141        comparison from deallocating the key just before the insertion. */
142     Py_INCREF(key);
143 
144   restart:
145 
146     mask = so->mask;
147     i = (size_t)hash & mask;
148 
149     entry = &so->table[i];
150     if (entry->key == NULL)
151         goto found_unused;
152 
153     freeslot = NULL;
154     perturb = hash;
155 
156     while (1) {
157         if (entry->hash == hash) {
158             PyObject *startkey = entry->key;
159             /* startkey cannot be a dummy because the dummy hash field is -1 */
160             assert(startkey != dummy);
161             if (startkey == key)
162                 goto found_active;
163             if (PyUnicode_CheckExact(startkey)
164                 && PyUnicode_CheckExact(key)
165                 && _PyUnicode_EQ(startkey, key))
166                 goto found_active;
167             table = so->table;
168             Py_INCREF(startkey);
169             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
170             Py_DECREF(startkey);
171             if (cmp > 0)                                          /* likely */
172                 goto found_active;
173             if (cmp < 0)
174                 goto comparison_error;
175             /* Continuing the search from the current entry only makes
176                sense if the table and entry are unchanged; otherwise,
177                we have to restart from the beginning */
178             if (table != so->table || entry->key != startkey)
179                 goto restart;
180             mask = so->mask;                 /* help avoid a register spill */
181         }
182         else if (entry->hash == -1 && freeslot == NULL)
183             freeslot = entry;
184 
185         if (i + LINEAR_PROBES <= mask) {
186             for (j = 0 ; j < LINEAR_PROBES ; j++) {
187                 entry++;
188                 if (entry->hash == 0 && entry->key == NULL)
189                     goto found_unused_or_dummy;
190                 if (entry->hash == hash) {
191                     PyObject *startkey = entry->key;
192                     assert(startkey != dummy);
193                     if (startkey == key)
194                         goto found_active;
195                     if (PyUnicode_CheckExact(startkey)
196                         && PyUnicode_CheckExact(key)
197                         && _PyUnicode_EQ(startkey, key))
198                         goto found_active;
199                     table = so->table;
200                     Py_INCREF(startkey);
201                     cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
202                     Py_DECREF(startkey);
203                     if (cmp > 0)
204                         goto found_active;
205                     if (cmp < 0)
206                         goto comparison_error;
207                     if (table != so->table || entry->key != startkey)
208                         goto restart;
209                     mask = so->mask;
210                 }
211                 else if (entry->hash == -1 && freeslot == NULL)
212                     freeslot = entry;
213             }
214         }
215 
216         perturb >>= PERTURB_SHIFT;
217         i = (i * 5 + 1 + perturb) & mask;
218 
219         entry = &so->table[i];
220         if (entry->key == NULL)
221             goto found_unused_or_dummy;
222     }
223 
224   found_unused_or_dummy:
225     if (freeslot == NULL)
226         goto found_unused;
227     so->used++;
228     freeslot->key = key;
229     freeslot->hash = hash;
230     return 0;
231 
232   found_unused:
233     so->fill++;
234     so->used++;
235     entry->key = key;
236     entry->hash = hash;
237     if ((size_t)so->fill*3 < mask*2)
238         return 0;
239     return set_table_resize(so, so->used);
240 
241   found_active:
242     Py_DECREF(key);
243     return 0;
244 
245   comparison_error:
246     Py_DECREF(key);
247     return -1;
248 }
249 
250 /*
251 Internal routine used by set_table_resize() to insert an item which is
252 known to be absent from the set.  This routine also assumes that
253 the set contains no deleted entries.  Besides the performance benefit,
254 there is also safety benefit since using set_add_entry() risks making
255 a callback in the middle of a set_table_resize(), see issue 1456209.
256 The caller is responsible for updating the key's reference count and
257 the setobject's fill and used fields.
258 */
259 static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)260 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
261 {
262     setentry *entry;
263     size_t perturb = hash;
264     size_t i = (size_t)hash & mask;
265     size_t j;
266 
267     while (1) {
268         entry = &table[i];
269         if (entry->key == NULL)
270             goto found_null;
271         if (i + LINEAR_PROBES <= mask) {
272             for (j = 0; j < LINEAR_PROBES; j++) {
273                 entry++;
274                 if (entry->key == NULL)
275                     goto found_null;
276             }
277         }
278         perturb >>= PERTURB_SHIFT;
279         i = (i * 5 + 1 + perturb) & mask;
280     }
281   found_null:
282     entry->key = key;
283     entry->hash = hash;
284 }
285 
286 /* ======== End logic for probing the hash table ========================== */
287 /* ======================================================================== */
288 
289 /*
290 Restructure the table by allocating a new table and reinserting all
291 keys again.  When entries have been deleted, the new table may
292 actually be smaller than the old one.
293 */
294 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)295 set_table_resize(PySetObject *so, Py_ssize_t minused)
296 {
297     Py_ssize_t newsize;
298     setentry *oldtable, *newtable, *entry;
299     Py_ssize_t oldfill = so->fill;
300     Py_ssize_t oldused = so->used;
301     Py_ssize_t oldmask = so->mask;
302     size_t newmask;
303     int is_oldtable_malloced;
304     setentry small_copy[PySet_MINSIZE];
305 
306     assert(minused >= 0);
307     minused = (minused > 50000) ? minused * 2 : minused * 4;
308 
309     /* Find the smallest table size > minused. */
310     /* XXX speed-up with intrinsics */
311     for (newsize = PySet_MINSIZE;
312          newsize <= minused && newsize > 0;
313          newsize <<= 1)
314         ;
315     if (newsize <= 0) {
316         PyErr_NoMemory();
317         return -1;
318     }
319 
320     /* Get space for a new table. */
321     oldtable = so->table;
322     assert(oldtable != NULL);
323     is_oldtable_malloced = oldtable != so->smalltable;
324 
325     if (newsize == PySet_MINSIZE) {
326         /* A large table is shrinking, or we can't get any smaller. */
327         newtable = so->smalltable;
328         if (newtable == oldtable) {
329             if (so->fill == so->used) {
330                 /* No dummies, so no point doing anything. */
331                 return 0;
332             }
333             /* We're not going to resize it, but rebuild the
334                table anyway to purge old dummy entries.
335                Subtle:  This is *necessary* if fill==size,
336                as set_lookkey needs at least one virgin slot to
337                terminate failing searches.  If fill < size, it's
338                merely desirable, as dummies slow searches. */
339             assert(so->fill > so->used);
340             memcpy(small_copy, oldtable, sizeof(small_copy));
341             oldtable = small_copy;
342         }
343     }
344     else {
345         newtable = PyMem_NEW(setentry, newsize);
346         if (newtable == NULL) {
347             PyErr_NoMemory();
348             return -1;
349         }
350     }
351 
352     /* Make the set empty, using the new table. */
353     assert(newtable != oldtable);
354     memset(newtable, 0, sizeof(setentry) * newsize);
355     so->fill = oldused;
356     so->used = oldused;
357     so->mask = newsize - 1;
358     so->table = newtable;
359 
360     /* Copy the data over; this is refcount-neutral for active entries;
361        dummy entries aren't copied over, of course */
362     newmask = (size_t)so->mask;
363     if (oldfill == oldused) {
364         for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
365             if (entry->key != NULL) {
366                 set_insert_clean(newtable, newmask, entry->key, entry->hash);
367             }
368         }
369     } else {
370         for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
371             if (entry->key != NULL && entry->key != dummy) {
372                 set_insert_clean(newtable, newmask, entry->key, entry->hash);
373             }
374         }
375     }
376 
377     if (is_oldtable_malloced)
378         PyMem_DEL(oldtable);
379     return 0;
380 }
381 
382 static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)383 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
384 {
385     setentry *entry;
386 
387     entry = set_lookkey(so, key, hash);
388     if (entry != NULL)
389         return entry->key != NULL;
390     return -1;
391 }
392 
393 #define DISCARD_NOTFOUND 0
394 #define DISCARD_FOUND 1
395 
396 static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)397 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
398 {
399     setentry *entry;
400     PyObject *old_key;
401 
402     entry = set_lookkey(so, key, hash);
403     if (entry == NULL)
404         return -1;
405     if (entry->key == NULL)
406         return DISCARD_NOTFOUND;
407     old_key = entry->key;
408     entry->key = dummy;
409     entry->hash = -1;
410     so->used--;
411     Py_DECREF(old_key);
412     return DISCARD_FOUND;
413 }
414 
415 static int
set_add_key(PySetObject * so,PyObject * key)416 set_add_key(PySetObject *so, PyObject *key)
417 {
418     Py_hash_t hash;
419 
420     if (!PyUnicode_CheckExact(key) ||
421         (hash = ((PyASCIIObject *) key)->hash) == -1) {
422         hash = PyObject_Hash(key);
423         if (hash == -1)
424             return -1;
425     }
426     return set_add_entry(so, key, hash);
427 }
428 
429 static int
set_contains_key(PySetObject * so,PyObject * key)430 set_contains_key(PySetObject *so, PyObject *key)
431 {
432     Py_hash_t hash;
433 
434     if (!PyUnicode_CheckExact(key) ||
435         (hash = ((PyASCIIObject *) key)->hash) == -1) {
436         hash = PyObject_Hash(key);
437         if (hash == -1)
438             return -1;
439     }
440     return set_contains_entry(so, key, hash);
441 }
442 
443 static int
set_discard_key(PySetObject * so,PyObject * key)444 set_discard_key(PySetObject *so, PyObject *key)
445 {
446     Py_hash_t hash;
447 
448     if (!PyUnicode_CheckExact(key) ||
449         (hash = ((PyASCIIObject *) key)->hash) == -1) {
450         hash = PyObject_Hash(key);
451         if (hash == -1)
452             return -1;
453     }
454     return set_discard_entry(so, key, hash);
455 }
456 
457 static void
set_empty_to_minsize(PySetObject * so)458 set_empty_to_minsize(PySetObject *so)
459 {
460     memset(so->smalltable, 0, sizeof(so->smalltable));
461     so->fill = 0;
462     so->used = 0;
463     so->mask = PySet_MINSIZE - 1;
464     so->table = so->smalltable;
465     so->hash = -1;
466 }
467 
468 static int
set_clear_internal(PySetObject * so)469 set_clear_internal(PySetObject *so)
470 {
471     setentry *entry;
472     setentry *table = so->table;
473     Py_ssize_t fill = so->fill;
474     Py_ssize_t used = so->used;
475     int table_is_malloced = table != so->smalltable;
476     setentry small_copy[PySet_MINSIZE];
477 
478     assert (PyAnySet_Check(so));
479     assert(table != NULL);
480 
481     /* This is delicate.  During the process of clearing the set,
482      * decrefs can cause the set to mutate.  To avoid fatal confusion
483      * (voice of experience), we have to make the set empty before
484      * clearing the slots, and never refer to anything via so->ref while
485      * clearing.
486      */
487     if (table_is_malloced)
488         set_empty_to_minsize(so);
489 
490     else if (fill > 0) {
491         /* It's a small table with something that needs to be cleared.
492          * Afraid the only safe way is to copy the set entries into
493          * another small table first.
494          */
495         memcpy(small_copy, table, sizeof(small_copy));
496         table = small_copy;
497         set_empty_to_minsize(so);
498     }
499     /* else it's a small table that's already empty */
500 
501     /* Now we can finally clear things.  If C had refcounts, we could
502      * assert that the refcount on table is 1 now, i.e. that this function
503      * has unique access to it, so decref side-effects can't alter it.
504      */
505     for (entry = table; used > 0; entry++) {
506         if (entry->key && entry->key != dummy) {
507             used--;
508             Py_DECREF(entry->key);
509         }
510     }
511 
512     if (table_is_malloced)
513         PyMem_DEL(table);
514     return 0;
515 }
516 
517 /*
518  * Iterate over a set table.  Use like so:
519  *
520  *     Py_ssize_t pos;
521  *     setentry *entry;
522  *     pos = 0;   # important!  pos should not otherwise be changed by you
523  *     while (set_next(yourset, &pos, &entry)) {
524  *              Refer to borrowed reference in entry->key.
525  *     }
526  *
527  * CAUTION:  In general, it isn't safe to use set_next in a loop that
528  * mutates the table.
529  */
530 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)531 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
532 {
533     Py_ssize_t i;
534     Py_ssize_t mask;
535     setentry *entry;
536 
537     assert (PyAnySet_Check(so));
538     i = *pos_ptr;
539     assert(i >= 0);
540     mask = so->mask;
541     entry = &so->table[i];
542     while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
543         i++;
544         entry++;
545     }
546     *pos_ptr = i+1;
547     if (i > mask)
548         return 0;
549     assert(entry != NULL);
550     *entry_ptr = entry;
551     return 1;
552 }
553 
554 static void
set_dealloc(PySetObject * so)555 set_dealloc(PySetObject *so)
556 {
557     setentry *entry;
558     Py_ssize_t used = so->used;
559 
560     PyObject_GC_UnTrack(so);
561     Py_TRASHCAN_SAFE_BEGIN(so)
562     if (so->weakreflist != NULL)
563         PyObject_ClearWeakRefs((PyObject *) so);
564 
565     for (entry = so->table; used > 0; entry++) {
566         if (entry->key && entry->key != dummy) {
567                 used--;
568                 Py_DECREF(entry->key);
569         }
570     }
571     if (so->table != so->smalltable)
572         PyMem_DEL(so->table);
573     Py_TYPE(so)->tp_free(so);
574     Py_TRASHCAN_SAFE_END(so)
575 }
576 
577 static PyObject *
set_repr(PySetObject * so)578 set_repr(PySetObject *so)
579 {
580     PyObject *result=NULL, *keys, *listrepr, *tmp;
581     int status = Py_ReprEnter((PyObject*)so);
582 
583     if (status != 0) {
584         if (status < 0)
585             return NULL;
586         return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
587     }
588 
589     /* shortcut for the empty set */
590     if (!so->used) {
591         Py_ReprLeave((PyObject*)so);
592         return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
593     }
594 
595     keys = PySequence_List((PyObject *)so);
596     if (keys == NULL)
597         goto done;
598 
599     /* repr(keys)[1:-1] */
600     listrepr = PyObject_Repr(keys);
601     Py_DECREF(keys);
602     if (listrepr == NULL)
603         goto done;
604     tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
605     Py_DECREF(listrepr);
606     if (tmp == NULL)
607         goto done;
608     listrepr = tmp;
609 
610     if (Py_TYPE(so) != &PySet_Type)
611         result = PyUnicode_FromFormat("%s({%U})",
612                                       Py_TYPE(so)->tp_name,
613                                       listrepr);
614     else
615         result = PyUnicode_FromFormat("{%U}", listrepr);
616     Py_DECREF(listrepr);
617 done:
618     Py_ReprLeave((PyObject*)so);
619     return result;
620 }
621 
622 static Py_ssize_t
set_len(PyObject * so)623 set_len(PyObject *so)
624 {
625     return ((PySetObject *)so)->used;
626 }
627 
628 static int
set_merge(PySetObject * so,PyObject * otherset)629 set_merge(PySetObject *so, PyObject *otherset)
630 {
631     PySetObject *other;
632     PyObject *key;
633     Py_ssize_t i;
634     setentry *so_entry;
635     setentry *other_entry;
636 
637     assert (PyAnySet_Check(so));
638     assert (PyAnySet_Check(otherset));
639 
640     other = (PySetObject*)otherset;
641     if (other == so || other->used == 0)
642         /* a.update(a) or a.update(set()); nothing to do */
643         return 0;
644     /* Do one big resize at the start, rather than
645      * incrementally resizing as we insert new keys.  Expect
646      * that there will be no (or few) overlapping keys.
647      */
648     if ((so->fill + other->used)*3 >= so->mask*2) {
649        if (set_table_resize(so, so->used + other->used) != 0)
650            return -1;
651     }
652     so_entry = so->table;
653     other_entry = other->table;
654 
655     /* If our table is empty, and both tables have the same size, and
656        there are no dummies to eliminate, then just copy the pointers. */
657     if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
658         for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
659             key = other_entry->key;
660             if (key != NULL) {
661                 assert(so_entry->key == NULL);
662                 Py_INCREF(key);
663                 so_entry->key = key;
664                 so_entry->hash = other_entry->hash;
665             }
666         }
667         so->fill = other->fill;
668         so->used = other->used;
669         return 0;
670     }
671 
672     /* If our table is empty, we can use set_insert_clean() */
673     if (so->fill == 0) {
674         setentry *newtable = so->table;
675         size_t newmask = (size_t)so->mask;
676         so->fill = other->used;
677         so->used = other->used;
678         for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
679             key = other_entry->key;
680             if (key != NULL && key != dummy) {
681                 Py_INCREF(key);
682                 set_insert_clean(newtable, newmask, key, other_entry->hash);
683             }
684         }
685         return 0;
686     }
687 
688     /* We can't assure there are no duplicates, so do normal insertions */
689     for (i = 0; i <= other->mask; i++) {
690         other_entry = &other->table[i];
691         key = other_entry->key;
692         if (key != NULL && key != dummy) {
693             if (set_add_entry(so, key, other_entry->hash))
694                 return -1;
695         }
696     }
697     return 0;
698 }
699 
700 static PyObject *
set_pop(PySetObject * so)701 set_pop(PySetObject *so)
702 {
703     /* Make sure the search finger is in bounds */
704     Py_ssize_t i = so->finger & so->mask;
705     setentry *entry;
706     PyObject *key;
707 
708     assert (PyAnySet_Check(so));
709     if (so->used == 0) {
710         PyErr_SetString(PyExc_KeyError, "pop from an empty set");
711         return NULL;
712     }
713 
714     while ((entry = &so->table[i])->key == NULL || entry->key==dummy) {
715         i++;
716         if (i > so->mask)
717             i = 0;
718     }
719     key = entry->key;
720     entry->key = dummy;
721     entry->hash = -1;
722     so->used--;
723     so->finger = i + 1;         /* next place to start */
724     return key;
725 }
726 
727 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
728 Raises KeyError if the set is empty.");
729 
730 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)731 set_traverse(PySetObject *so, visitproc visit, void *arg)
732 {
733     Py_ssize_t pos = 0;
734     setentry *entry;
735 
736     while (set_next(so, &pos, &entry))
737         Py_VISIT(entry->key);
738     return 0;
739 }
740 
741 /* Work to increase the bit dispersion for closely spaced hash values.
742    This is important because some use cases have many combinations of a
743    small number of elements with nearby hashes so that many distinct
744    combinations collapse to only a handful of distinct hash values. */
745 
746 static Py_uhash_t
_shuffle_bits(Py_uhash_t h)747 _shuffle_bits(Py_uhash_t h)
748 {
749     return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
750 }
751 
752 /* Most of the constants in this hash algorithm are randomly chosen
753    large primes with "interesting bit patterns" and that passed tests
754    for good collision statistics on a variety of problematic datasets
755    including powersets and graph structures (such as David Eppstein's
756    graph recipes in Lib/test/test_set.py) */
757 
758 static Py_hash_t
frozenset_hash(PyObject * self)759 frozenset_hash(PyObject *self)
760 {
761     PySetObject *so = (PySetObject *)self;
762     Py_uhash_t hash = 0;
763     setentry *entry;
764 
765     if (so->hash != -1)
766         return so->hash;
767 
768     /* Xor-in shuffled bits from every entry's hash field because xor is
769        commutative and a frozenset hash should be independent of order.
770 
771        For speed, include null entries and dummy entries and then
772        subtract out their effect afterwards so that the final hash
773        depends only on active entries.  This allows the code to be
774        vectorized by the compiler and it saves the unpredictable
775        branches that would arise when trying to exclude null and dummy
776        entries on every iteration. */
777 
778     for (entry = so->table; entry <= &so->table[so->mask]; entry++)
779         hash ^= _shuffle_bits(entry->hash);
780 
781     /* Remove the effect of an odd number of NULL entries */
782     if ((so->mask + 1 - so->fill) & 1)
783         hash ^= _shuffle_bits(0);
784 
785     /* Remove the effect of an odd number of dummy entries */
786     if ((so->fill - so->used) & 1)
787         hash ^= _shuffle_bits(-1);
788 
789     /* Factor in the number of active entries */
790     hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
791 
792     /* Disperse patterns arising in nested frozensets */
793     hash = hash * 69069U + 907133923UL;
794 
795     /* -1 is reserved as an error code */
796     if (hash == (Py_uhash_t)-1)
797         hash = 590923713UL;
798 
799     so->hash = hash;
800     return hash;
801 }
802 
803 /***** Set iterator type ***********************************************/
804 
805 typedef struct {
806     PyObject_HEAD
807     PySetObject *si_set; /* Set to NULL when iterator is exhausted */
808     Py_ssize_t si_used;
809     Py_ssize_t si_pos;
810     Py_ssize_t len;
811 } setiterobject;
812 
813 static void
setiter_dealloc(setiterobject * si)814 setiter_dealloc(setiterobject *si)
815 {
816     Py_XDECREF(si->si_set);
817     PyObject_GC_Del(si);
818 }
819 
820 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)821 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
822 {
823     Py_VISIT(si->si_set);
824     return 0;
825 }
826 
827 static PyObject *
setiter_len(setiterobject * si)828 setiter_len(setiterobject *si)
829 {
830     Py_ssize_t len = 0;
831     if (si->si_set != NULL && si->si_used == si->si_set->used)
832         len = si->len;
833     return PyLong_FromSsize_t(len);
834 }
835 
836 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
837 
838 static PyObject *setiter_iternext(setiterobject *si);
839 
840 static PyObject *
setiter_reduce(setiterobject * si)841 setiter_reduce(setiterobject *si)
842 {
843     PyObject *list;
844     setiterobject tmp;
845 
846     list = PyList_New(0);
847     if (!list)
848         return NULL;
849 
850     /* copy the iterator state */
851     tmp = *si;
852     Py_XINCREF(tmp.si_set);
853 
854     /* iterate the temporary into a list */
855     for(;;) {
856         PyObject *element = setiter_iternext(&tmp);
857         if (element) {
858             if (PyList_Append(list, element)) {
859                 Py_DECREF(element);
860                 Py_DECREF(list);
861                 Py_XDECREF(tmp.si_set);
862                 return NULL;
863             }
864             Py_DECREF(element);
865         } else
866             break;
867     }
868     Py_XDECREF(tmp.si_set);
869     /* check for error */
870     if (tmp.si_set != NULL) {
871         /* we have an error */
872         Py_DECREF(list);
873         return NULL;
874     }
875     return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), list);
876 }
877 
878 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
879 
880 static PyMethodDef setiter_methods[] = {
881     {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
882     {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
883     {NULL,              NULL}           /* sentinel */
884 };
885 
setiter_iternext(setiterobject * si)886 static PyObject *setiter_iternext(setiterobject *si)
887 {
888     PyObject *key;
889     Py_ssize_t i, mask;
890     setentry *entry;
891     PySetObject *so = si->si_set;
892 
893     if (so == NULL)
894         return NULL;
895     assert (PyAnySet_Check(so));
896 
897     if (si->si_used != so->used) {
898         PyErr_SetString(PyExc_RuntimeError,
899                         "Set changed size during iteration");
900         si->si_used = -1; /* Make this state sticky */
901         return NULL;
902     }
903 
904     i = si->si_pos;
905     assert(i>=0);
906     entry = so->table;
907     mask = so->mask;
908     while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
909         i++;
910     si->si_pos = i+1;
911     if (i > mask)
912         goto fail;
913     si->len--;
914     key = entry[i].key;
915     Py_INCREF(key);
916     return key;
917 
918 fail:
919     si->si_set = NULL;
920     Py_DECREF(so);
921     return NULL;
922 }
923 
924 PyTypeObject PySetIter_Type = {
925     PyVarObject_HEAD_INIT(&PyType_Type, 0)
926     "set_iterator",                             /* tp_name */
927     sizeof(setiterobject),                      /* tp_basicsize */
928     0,                                          /* tp_itemsize */
929     /* methods */
930     (destructor)setiter_dealloc,                /* tp_dealloc */
931     0,                                          /* tp_print */
932     0,                                          /* tp_getattr */
933     0,                                          /* tp_setattr */
934     0,                                          /* tp_reserved */
935     0,                                          /* tp_repr */
936     0,                                          /* tp_as_number */
937     0,                                          /* tp_as_sequence */
938     0,                                          /* tp_as_mapping */
939     0,                                          /* tp_hash */
940     0,                                          /* tp_call */
941     0,                                          /* tp_str */
942     PyObject_GenericGetAttr,                    /* tp_getattro */
943     0,                                          /* tp_setattro */
944     0,                                          /* tp_as_buffer */
945     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
946     0,                                          /* tp_doc */
947     (traverseproc)setiter_traverse,             /* tp_traverse */
948     0,                                          /* tp_clear */
949     0,                                          /* tp_richcompare */
950     0,                                          /* tp_weaklistoffset */
951     PyObject_SelfIter,                          /* tp_iter */
952     (iternextfunc)setiter_iternext,             /* tp_iternext */
953     setiter_methods,                            /* tp_methods */
954     0,
955 };
956 
957 static PyObject *
set_iter(PySetObject * so)958 set_iter(PySetObject *so)
959 {
960     setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
961     if (si == NULL)
962         return NULL;
963     Py_INCREF(so);
964     si->si_set = so;
965     si->si_used = so->used;
966     si->si_pos = 0;
967     si->len = so->used;
968     _PyObject_GC_TRACK(si);
969     return (PyObject *)si;
970 }
971 
972 static int
set_update_internal(PySetObject * so,PyObject * other)973 set_update_internal(PySetObject *so, PyObject *other)
974 {
975     PyObject *key, *it;
976 
977     if (PyAnySet_Check(other))
978         return set_merge(so, other);
979 
980     if (PyDict_CheckExact(other)) {
981         PyObject *value;
982         Py_ssize_t pos = 0;
983         Py_hash_t hash;
984         Py_ssize_t dictsize = PyDict_Size(other);
985 
986         /* Do one big resize at the start, rather than
987         * incrementally resizing as we insert new keys.  Expect
988         * that there will be no (or few) overlapping keys.
989         */
990         if (dictsize < 0)
991             return -1;
992         if ((so->fill + dictsize)*3 >= so->mask*2) {
993             if (set_table_resize(so, so->used + dictsize) != 0)
994                 return -1;
995         }
996         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
997             if (set_add_entry(so, key, hash))
998                 return -1;
999         }
1000         return 0;
1001     }
1002 
1003     it = PyObject_GetIter(other);
1004     if (it == NULL)
1005         return -1;
1006 
1007     while ((key = PyIter_Next(it)) != NULL) {
1008         if (set_add_key(so, key)) {
1009             Py_DECREF(it);
1010             Py_DECREF(key);
1011             return -1;
1012         }
1013         Py_DECREF(key);
1014     }
1015     Py_DECREF(it);
1016     if (PyErr_Occurred())
1017         return -1;
1018     return 0;
1019 }
1020 
1021 static PyObject *
set_update(PySetObject * so,PyObject * args)1022 set_update(PySetObject *so, PyObject *args)
1023 {
1024     Py_ssize_t i;
1025 
1026     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1027         PyObject *other = PyTuple_GET_ITEM(args, i);
1028         if (set_update_internal(so, other))
1029             return NULL;
1030     }
1031     Py_RETURN_NONE;
1032 }
1033 
1034 PyDoc_STRVAR(update_doc,
1035 "Update a set with the union of itself and others.");
1036 
1037 /* XXX Todo:
1038    If aligned memory allocations become available, make the
1039    set object 64 byte aligned so that most of the fields
1040    can be retrieved or updated in a single cache line.
1041 */
1042 
1043 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)1044 make_new_set(PyTypeObject *type, PyObject *iterable)
1045 {
1046     PySetObject *so;
1047 
1048     so = (PySetObject *)type->tp_alloc(type, 0);
1049     if (so == NULL)
1050         return NULL;
1051 
1052     so->fill = 0;
1053     so->used = 0;
1054     so->mask = PySet_MINSIZE - 1;
1055     so->table = so->smalltable;
1056     so->hash = -1;
1057     so->finger = 0;
1058     so->weakreflist = NULL;
1059 
1060     if (iterable != NULL) {
1061         if (set_update_internal(so, iterable)) {
1062             Py_DECREF(so);
1063             return NULL;
1064         }
1065     }
1066 
1067     return (PyObject *)so;
1068 }
1069 
1070 static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)1071 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
1072 {
1073     if (type != &PySet_Type && type != &PyFrozenSet_Type) {
1074         if (PyType_IsSubtype(type, &PySet_Type))
1075             type = &PySet_Type;
1076         else
1077             type = &PyFrozenSet_Type;
1078     }
1079     return make_new_set(type, iterable);
1080 }
1081 
1082 /* The empty frozenset is a singleton */
1083 static PyObject *emptyfrozenset = NULL;
1084 
1085 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1086 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1087 {
1088     PyObject *iterable = NULL, *result;
1089 
1090     if (kwds != NULL && type == &PyFrozenSet_Type
1091         && !_PyArg_NoKeywords("frozenset()", kwds))
1092         return NULL;
1093 
1094     if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
1095         return NULL;
1096 
1097     if (type != &PyFrozenSet_Type)
1098         return make_new_set(type, iterable);
1099 
1100     if (iterable != NULL) {
1101         /* frozenset(f) is idempotent */
1102         if (PyFrozenSet_CheckExact(iterable)) {
1103             Py_INCREF(iterable);
1104             return iterable;
1105         }
1106         result = make_new_set(type, iterable);
1107         if (result == NULL || PySet_GET_SIZE(result))
1108             return result;
1109         Py_DECREF(result);
1110     }
1111     /* The empty frozenset is a singleton */
1112     if (emptyfrozenset == NULL)
1113         emptyfrozenset = make_new_set(type, NULL);
1114     Py_XINCREF(emptyfrozenset);
1115     return emptyfrozenset;
1116 }
1117 
1118 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1119 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1120 {
1121     return make_new_set(type, NULL);
1122 }
1123 
1124 /* set_swap_bodies() switches the contents of any two sets by moving their
1125    internal data pointers and, if needed, copying the internal smalltables.
1126    Semantically equivalent to:
1127 
1128      t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1129 
1130    The function always succeeds and it leaves both objects in a stable state.
1131    Useful for operations that update in-place (by allowing an intermediate
1132    result to be swapped into one of the original inputs).
1133 */
1134 
1135 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1136 set_swap_bodies(PySetObject *a, PySetObject *b)
1137 {
1138     Py_ssize_t t;
1139     setentry *u;
1140     setentry tab[PySet_MINSIZE];
1141     Py_hash_t h;
1142 
1143     t = a->fill;     a->fill   = b->fill;        b->fill  = t;
1144     t = a->used;     a->used   = b->used;        b->used  = t;
1145     t = a->mask;     a->mask   = b->mask;        b->mask  = t;
1146 
1147     u = a->table;
1148     if (a->table == a->smalltable)
1149         u = b->smalltable;
1150     a->table  = b->table;
1151     if (b->table == b->smalltable)
1152         a->table = a->smalltable;
1153     b->table = u;
1154 
1155     if (a->table == a->smalltable || b->table == b->smalltable) {
1156         memcpy(tab, a->smalltable, sizeof(tab));
1157         memcpy(a->smalltable, b->smalltable, sizeof(tab));
1158         memcpy(b->smalltable, tab, sizeof(tab));
1159     }
1160 
1161     if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type)  &&
1162         PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1163         h = a->hash;     a->hash = b->hash;  b->hash = h;
1164     } else {
1165         a->hash = -1;
1166         b->hash = -1;
1167     }
1168 }
1169 
1170 static PyObject *
set_copy(PySetObject * so)1171 set_copy(PySetObject *so)
1172 {
1173     return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1174 }
1175 
1176 static PyObject *
frozenset_copy(PySetObject * so)1177 frozenset_copy(PySetObject *so)
1178 {
1179     if (PyFrozenSet_CheckExact(so)) {
1180         Py_INCREF(so);
1181         return (PyObject *)so;
1182     }
1183     return set_copy(so);
1184 }
1185 
1186 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1187 
1188 static PyObject *
set_clear(PySetObject * so)1189 set_clear(PySetObject *so)
1190 {
1191     set_clear_internal(so);
1192     Py_RETURN_NONE;
1193 }
1194 
1195 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1196 
1197 static PyObject *
set_union(PySetObject * so,PyObject * args)1198 set_union(PySetObject *so, PyObject *args)
1199 {
1200     PySetObject *result;
1201     PyObject *other;
1202     Py_ssize_t i;
1203 
1204     result = (PySetObject *)set_copy(so);
1205     if (result == NULL)
1206         return NULL;
1207 
1208     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1209         other = PyTuple_GET_ITEM(args, i);
1210         if ((PyObject *)so == other)
1211             continue;
1212         if (set_update_internal(result, other)) {
1213             Py_DECREF(result);
1214             return NULL;
1215         }
1216     }
1217     return (PyObject *)result;
1218 }
1219 
1220 PyDoc_STRVAR(union_doc,
1221  "Return the union of sets as a new set.\n\
1222 \n\
1223 (i.e. all elements that are in either set.)");
1224 
1225 static PyObject *
set_or(PySetObject * so,PyObject * other)1226 set_or(PySetObject *so, PyObject *other)
1227 {
1228     PySetObject *result;
1229 
1230     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1231         Py_RETURN_NOTIMPLEMENTED;
1232 
1233     result = (PySetObject *)set_copy(so);
1234     if (result == NULL)
1235         return NULL;
1236     if ((PyObject *)so == other)
1237         return (PyObject *)result;
1238     if (set_update_internal(result, other)) {
1239         Py_DECREF(result);
1240         return NULL;
1241     }
1242     return (PyObject *)result;
1243 }
1244 
1245 static PyObject *
set_ior(PySetObject * so,PyObject * other)1246 set_ior(PySetObject *so, PyObject *other)
1247 {
1248     if (!PyAnySet_Check(other))
1249         Py_RETURN_NOTIMPLEMENTED;
1250 
1251     if (set_update_internal(so, other))
1252         return NULL;
1253     Py_INCREF(so);
1254     return (PyObject *)so;
1255 }
1256 
1257 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1258 set_intersection(PySetObject *so, PyObject *other)
1259 {
1260     PySetObject *result;
1261     PyObject *key, *it, *tmp;
1262     Py_hash_t hash;
1263     int rv;
1264 
1265     if ((PyObject *)so == other)
1266         return set_copy(so);
1267 
1268     result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1269     if (result == NULL)
1270         return NULL;
1271 
1272     if (PyAnySet_Check(other)) {
1273         Py_ssize_t pos = 0;
1274         setentry *entry;
1275 
1276         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1277             tmp = (PyObject *)so;
1278             so = (PySetObject *)other;
1279             other = tmp;
1280         }
1281 
1282         while (set_next((PySetObject *)other, &pos, &entry)) {
1283             key = entry->key;
1284             hash = entry->hash;
1285             rv = set_contains_entry(so, key, hash);
1286             if (rv < 0) {
1287                 Py_DECREF(result);
1288                 return NULL;
1289             }
1290             if (rv) {
1291                 if (set_add_entry(result, key, hash)) {
1292                     Py_DECREF(result);
1293                     return NULL;
1294                 }
1295             }
1296         }
1297         return (PyObject *)result;
1298     }
1299 
1300     it = PyObject_GetIter(other);
1301     if (it == NULL) {
1302         Py_DECREF(result);
1303         return NULL;
1304     }
1305 
1306     while ((key = PyIter_Next(it)) != NULL) {
1307         hash = PyObject_Hash(key);
1308         if (hash == -1)
1309             goto error;
1310         rv = set_contains_entry(so, key, hash);
1311         if (rv < 0)
1312             goto error;
1313         if (rv) {
1314             if (set_add_entry(result, key, hash))
1315                 goto error;
1316         }
1317         Py_DECREF(key);
1318     }
1319     Py_DECREF(it);
1320     if (PyErr_Occurred()) {
1321         Py_DECREF(result);
1322         return NULL;
1323     }
1324     return (PyObject *)result;
1325   error:
1326     Py_DECREF(it);
1327     Py_DECREF(result);
1328     Py_DECREF(key);
1329     return NULL;
1330 }
1331 
1332 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1333 set_intersection_multi(PySetObject *so, PyObject *args)
1334 {
1335     Py_ssize_t i;
1336     PyObject *result = (PyObject *)so;
1337 
1338     if (PyTuple_GET_SIZE(args) == 0)
1339         return set_copy(so);
1340 
1341     Py_INCREF(so);
1342     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1343         PyObject *other = PyTuple_GET_ITEM(args, i);
1344         PyObject *newresult = set_intersection((PySetObject *)result, other);
1345         if (newresult == NULL) {
1346             Py_DECREF(result);
1347             return NULL;
1348         }
1349         Py_DECREF(result);
1350         result = newresult;
1351     }
1352     return result;
1353 }
1354 
1355 PyDoc_STRVAR(intersection_doc,
1356 "Return the intersection of two sets as a new set.\n\
1357 \n\
1358 (i.e. all elements that are in both sets.)");
1359 
1360 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1361 set_intersection_update(PySetObject *so, PyObject *other)
1362 {
1363     PyObject *tmp;
1364 
1365     tmp = set_intersection(so, other);
1366     if (tmp == NULL)
1367         return NULL;
1368     set_swap_bodies(so, (PySetObject *)tmp);
1369     Py_DECREF(tmp);
1370     Py_RETURN_NONE;
1371 }
1372 
1373 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1374 set_intersection_update_multi(PySetObject *so, PyObject *args)
1375 {
1376     PyObject *tmp;
1377 
1378     tmp = set_intersection_multi(so, args);
1379     if (tmp == NULL)
1380         return NULL;
1381     set_swap_bodies(so, (PySetObject *)tmp);
1382     Py_DECREF(tmp);
1383     Py_RETURN_NONE;
1384 }
1385 
1386 PyDoc_STRVAR(intersection_update_doc,
1387 "Update a set with the intersection of itself and another.");
1388 
1389 static PyObject *
set_and(PySetObject * so,PyObject * other)1390 set_and(PySetObject *so, PyObject *other)
1391 {
1392     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1393         Py_RETURN_NOTIMPLEMENTED;
1394     return set_intersection(so, other);
1395 }
1396 
1397 static PyObject *
set_iand(PySetObject * so,PyObject * other)1398 set_iand(PySetObject *so, PyObject *other)
1399 {
1400     PyObject *result;
1401 
1402     if (!PyAnySet_Check(other))
1403         Py_RETURN_NOTIMPLEMENTED;
1404     result = set_intersection_update(so, other);
1405     if (result == NULL)
1406         return NULL;
1407     Py_DECREF(result);
1408     Py_INCREF(so);
1409     return (PyObject *)so;
1410 }
1411 
1412 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1413 set_isdisjoint(PySetObject *so, PyObject *other)
1414 {
1415     PyObject *key, *it, *tmp;
1416     int rv;
1417 
1418     if ((PyObject *)so == other) {
1419         if (PySet_GET_SIZE(so) == 0)
1420             Py_RETURN_TRUE;
1421         else
1422             Py_RETURN_FALSE;
1423     }
1424 
1425     if (PyAnySet_CheckExact(other)) {
1426         Py_ssize_t pos = 0;
1427         setentry *entry;
1428 
1429         if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1430             tmp = (PyObject *)so;
1431             so = (PySetObject *)other;
1432             other = tmp;
1433         }
1434         while (set_next((PySetObject *)other, &pos, &entry)) {
1435             rv = set_contains_entry(so, entry->key, entry->hash);
1436             if (rv < 0)
1437                 return NULL;
1438             if (rv)
1439                 Py_RETURN_FALSE;
1440         }
1441         Py_RETURN_TRUE;
1442     }
1443 
1444     it = PyObject_GetIter(other);
1445     if (it == NULL)
1446         return NULL;
1447 
1448     while ((key = PyIter_Next(it)) != NULL) {
1449         Py_hash_t hash = PyObject_Hash(key);
1450 
1451         if (hash == -1) {
1452             Py_DECREF(key);
1453             Py_DECREF(it);
1454             return NULL;
1455         }
1456         rv = set_contains_entry(so, key, hash);
1457         Py_DECREF(key);
1458         if (rv < 0) {
1459             Py_DECREF(it);
1460             return NULL;
1461         }
1462         if (rv) {
1463             Py_DECREF(it);
1464             Py_RETURN_FALSE;
1465         }
1466     }
1467     Py_DECREF(it);
1468     if (PyErr_Occurred())
1469         return NULL;
1470     Py_RETURN_TRUE;
1471 }
1472 
1473 PyDoc_STRVAR(isdisjoint_doc,
1474 "Return True if two sets have a null intersection.");
1475 
1476 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1477 set_difference_update_internal(PySetObject *so, PyObject *other)
1478 {
1479     if (PySet_GET_SIZE(so) == 0) {
1480         return 0;
1481     }
1482 
1483     if ((PyObject *)so == other)
1484         return set_clear_internal(so);
1485 
1486     if (PyAnySet_Check(other)) {
1487         setentry *entry;
1488         Py_ssize_t pos = 0;
1489 
1490         while (set_next((PySetObject *)other, &pos, &entry))
1491             if (set_discard_entry(so, entry->key, entry->hash) < 0)
1492                 return -1;
1493     } else {
1494         PyObject *key, *it;
1495         it = PyObject_GetIter(other);
1496         if (it == NULL)
1497             return -1;
1498 
1499         while ((key = PyIter_Next(it)) != NULL) {
1500             if (set_discard_key(so, key) < 0) {
1501                 Py_DECREF(it);
1502                 Py_DECREF(key);
1503                 return -1;
1504             }
1505             Py_DECREF(key);
1506         }
1507         Py_DECREF(it);
1508         if (PyErr_Occurred())
1509             return -1;
1510     }
1511     /* If more than 1/4th are dummies, then resize them away. */
1512     if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1513         return 0;
1514     return set_table_resize(so, so->used);
1515 }
1516 
1517 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1518 set_difference_update(PySetObject *so, PyObject *args)
1519 {
1520     Py_ssize_t i;
1521 
1522     for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1523         PyObject *other = PyTuple_GET_ITEM(args, i);
1524         if (set_difference_update_internal(so, other))
1525             return NULL;
1526     }
1527     Py_RETURN_NONE;
1528 }
1529 
1530 PyDoc_STRVAR(difference_update_doc,
1531 "Remove all elements of another set from this set.");
1532 
1533 static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1534 set_copy_and_difference(PySetObject *so, PyObject *other)
1535 {
1536     PyObject *result;
1537 
1538     result = set_copy(so);
1539     if (result == NULL)
1540         return NULL;
1541     if (set_difference_update_internal((PySetObject *) result, other) == 0)
1542         return result;
1543     Py_DECREF(result);
1544     return NULL;
1545 }
1546 
1547 static PyObject *
set_difference(PySetObject * so,PyObject * other)1548 set_difference(PySetObject *so, PyObject *other)
1549 {
1550     PyObject *result;
1551     PyObject *key;
1552     Py_hash_t hash;
1553     setentry *entry;
1554     Py_ssize_t pos = 0;
1555     int rv;
1556 
1557     if (PySet_GET_SIZE(so) == 0) {
1558         return set_copy(so);
1559     }
1560 
1561     if (!PyAnySet_Check(other)  && !PyDict_CheckExact(other)) {
1562         return set_copy_and_difference(so, other);
1563     }
1564 
1565     /* If len(so) much more than len(other), it's more efficient to simply copy
1566      * so and then iterate other looking for common elements. */
1567     if ((PySet_GET_SIZE(so) >> 2) > PyObject_Size(other)) {
1568         return set_copy_and_difference(so, other);
1569     }
1570 
1571     result = make_new_set_basetype(Py_TYPE(so), NULL);
1572     if (result == NULL)
1573         return NULL;
1574 
1575     if (PyDict_CheckExact(other)) {
1576         while (set_next(so, &pos, &entry)) {
1577             key = entry->key;
1578             hash = entry->hash;
1579             rv = _PyDict_Contains(other, key, hash);
1580             if (rv < 0) {
1581                 Py_DECREF(result);
1582                 return NULL;
1583             }
1584             if (!rv) {
1585                 if (set_add_entry((PySetObject *)result, key, hash)) {
1586                     Py_DECREF(result);
1587                     return NULL;
1588                 }
1589             }
1590         }
1591         return result;
1592     }
1593 
1594     /* Iterate over so, checking for common elements in other. */
1595     while (set_next(so, &pos, &entry)) {
1596         key = entry->key;
1597         hash = entry->hash;
1598         rv = set_contains_entry((PySetObject *)other, key, hash);
1599         if (rv < 0) {
1600             Py_DECREF(result);
1601             return NULL;
1602         }
1603         if (!rv) {
1604             if (set_add_entry((PySetObject *)result, key, hash)) {
1605                 Py_DECREF(result);
1606                 return NULL;
1607             }
1608         }
1609     }
1610     return result;
1611 }
1612 
1613 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1614 set_difference_multi(PySetObject *so, PyObject *args)
1615 {
1616     Py_ssize_t i;
1617     PyObject *result, *other;
1618 
1619     if (PyTuple_GET_SIZE(args) == 0)
1620         return set_copy(so);
1621 
1622     other = PyTuple_GET_ITEM(args, 0);
1623     result = set_difference(so, other);
1624     if (result == NULL)
1625         return NULL;
1626 
1627     for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1628         other = PyTuple_GET_ITEM(args, i);
1629         if (set_difference_update_internal((PySetObject *)result, other)) {
1630             Py_DECREF(result);
1631             return NULL;
1632         }
1633     }
1634     return result;
1635 }
1636 
1637 PyDoc_STRVAR(difference_doc,
1638 "Return the difference of two or more sets as a new set.\n\
1639 \n\
1640 (i.e. all elements that are in this set but not the others.)");
1641 static PyObject *
set_sub(PySetObject * so,PyObject * other)1642 set_sub(PySetObject *so, PyObject *other)
1643 {
1644     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1645         Py_RETURN_NOTIMPLEMENTED;
1646     return set_difference(so, other);
1647 }
1648 
1649 static PyObject *
set_isub(PySetObject * so,PyObject * other)1650 set_isub(PySetObject *so, PyObject *other)
1651 {
1652     if (!PyAnySet_Check(other))
1653         Py_RETURN_NOTIMPLEMENTED;
1654     if (set_difference_update_internal(so, other))
1655         return NULL;
1656     Py_INCREF(so);
1657     return (PyObject *)so;
1658 }
1659 
1660 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1661 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1662 {
1663     PySetObject *otherset;
1664     PyObject *key;
1665     Py_ssize_t pos = 0;
1666     Py_hash_t hash;
1667     setentry *entry;
1668     int rv;
1669 
1670     if ((PyObject *)so == other)
1671         return set_clear(so);
1672 
1673     if (PyDict_CheckExact(other)) {
1674         PyObject *value;
1675         while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1676             Py_INCREF(key);
1677             rv = set_discard_entry(so, key, hash);
1678             if (rv < 0) {
1679                 Py_DECREF(key);
1680                 return NULL;
1681             }
1682             if (rv == DISCARD_NOTFOUND) {
1683                 if (set_add_entry(so, key, hash)) {
1684                     Py_DECREF(key);
1685                     return NULL;
1686                 }
1687             }
1688             Py_DECREF(key);
1689         }
1690         Py_RETURN_NONE;
1691     }
1692 
1693     if (PyAnySet_Check(other)) {
1694         Py_INCREF(other);
1695         otherset = (PySetObject *)other;
1696     } else {
1697         otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1698         if (otherset == NULL)
1699             return NULL;
1700     }
1701 
1702     while (set_next(otherset, &pos, &entry)) {
1703         key = entry->key;
1704         hash = entry->hash;
1705         rv = set_discard_entry(so, key, hash);
1706         if (rv < 0) {
1707             Py_DECREF(otherset);
1708             return NULL;
1709         }
1710         if (rv == DISCARD_NOTFOUND) {
1711             if (set_add_entry(so, key, hash)) {
1712                 Py_DECREF(otherset);
1713                 return NULL;
1714             }
1715         }
1716     }
1717     Py_DECREF(otherset);
1718     Py_RETURN_NONE;
1719 }
1720 
1721 PyDoc_STRVAR(symmetric_difference_update_doc,
1722 "Update a set with the symmetric difference of itself and another.");
1723 
1724 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1725 set_symmetric_difference(PySetObject *so, PyObject *other)
1726 {
1727     PyObject *rv;
1728     PySetObject *otherset;
1729 
1730     otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1731     if (otherset == NULL)
1732         return NULL;
1733     rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1734     if (rv == NULL)
1735         return NULL;
1736     Py_DECREF(rv);
1737     return (PyObject *)otherset;
1738 }
1739 
1740 PyDoc_STRVAR(symmetric_difference_doc,
1741 "Return the symmetric difference of two sets as a new set.\n\
1742 \n\
1743 (i.e. all elements that are in exactly one of the sets.)");
1744 
1745 static PyObject *
set_xor(PySetObject * so,PyObject * other)1746 set_xor(PySetObject *so, PyObject *other)
1747 {
1748     if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1749         Py_RETURN_NOTIMPLEMENTED;
1750     return set_symmetric_difference(so, other);
1751 }
1752 
1753 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1754 set_ixor(PySetObject *so, PyObject *other)
1755 {
1756     PyObject *result;
1757 
1758     if (!PyAnySet_Check(other))
1759         Py_RETURN_NOTIMPLEMENTED;
1760     result = set_symmetric_difference_update(so, other);
1761     if (result == NULL)
1762         return NULL;
1763     Py_DECREF(result);
1764     Py_INCREF(so);
1765     return (PyObject *)so;
1766 }
1767 
1768 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1769 set_issubset(PySetObject *so, PyObject *other)
1770 {
1771     setentry *entry;
1772     Py_ssize_t pos = 0;
1773     int rv;
1774 
1775     if (!PyAnySet_Check(other)) {
1776         PyObject *tmp, *result;
1777         tmp = make_new_set(&PySet_Type, other);
1778         if (tmp == NULL)
1779             return NULL;
1780         result = set_issubset(so, tmp);
1781         Py_DECREF(tmp);
1782         return result;
1783     }
1784     if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1785         Py_RETURN_FALSE;
1786 
1787     while (set_next(so, &pos, &entry)) {
1788         rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
1789         if (rv < 0)
1790             return NULL;
1791         if (!rv)
1792             Py_RETURN_FALSE;
1793     }
1794     Py_RETURN_TRUE;
1795 }
1796 
1797 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1798 
1799 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1800 set_issuperset(PySetObject *so, PyObject *other)
1801 {
1802     PyObject *tmp, *result;
1803 
1804     if (!PyAnySet_Check(other)) {
1805         tmp = make_new_set(&PySet_Type, other);
1806         if (tmp == NULL)
1807             return NULL;
1808         result = set_issuperset(so, tmp);
1809         Py_DECREF(tmp);
1810         return result;
1811     }
1812     return set_issubset((PySetObject *)other, (PyObject *)so);
1813 }
1814 
1815 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1816 
1817 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1818 set_richcompare(PySetObject *v, PyObject *w, int op)
1819 {
1820     PyObject *r1;
1821     int r2;
1822 
1823     if(!PyAnySet_Check(w))
1824         Py_RETURN_NOTIMPLEMENTED;
1825 
1826     switch (op) {
1827     case Py_EQ:
1828         if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1829             Py_RETURN_FALSE;
1830         if (v->hash != -1  &&
1831             ((PySetObject *)w)->hash != -1 &&
1832             v->hash != ((PySetObject *)w)->hash)
1833             Py_RETURN_FALSE;
1834         return set_issubset(v, w);
1835     case Py_NE:
1836         r1 = set_richcompare(v, w, Py_EQ);
1837         if (r1 == NULL)
1838             return NULL;
1839         r2 = PyObject_IsTrue(r1);
1840         Py_DECREF(r1);
1841         if (r2 < 0)
1842             return NULL;
1843         return PyBool_FromLong(!r2);
1844     case Py_LE:
1845         return set_issubset(v, w);
1846     case Py_GE:
1847         return set_issuperset(v, w);
1848     case Py_LT:
1849         if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1850             Py_RETURN_FALSE;
1851         return set_issubset(v, w);
1852     case Py_GT:
1853         if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1854             Py_RETURN_FALSE;
1855         return set_issuperset(v, w);
1856     }
1857     Py_RETURN_NOTIMPLEMENTED;
1858 }
1859 
1860 static PyObject *
set_add(PySetObject * so,PyObject * key)1861 set_add(PySetObject *so, PyObject *key)
1862 {
1863     if (set_add_key(so, key))
1864         return NULL;
1865     Py_RETURN_NONE;
1866 }
1867 
1868 PyDoc_STRVAR(add_doc,
1869 "Add an element to a set.\n\
1870 \n\
1871 This has no effect if the element is already present.");
1872 
1873 static int
set_contains(PySetObject * so,PyObject * key)1874 set_contains(PySetObject *so, PyObject *key)
1875 {
1876     PyObject *tmpkey;
1877     int rv;
1878 
1879     rv = set_contains_key(so, key);
1880     if (rv < 0) {
1881         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1882             return -1;
1883         PyErr_Clear();
1884         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1885         if (tmpkey == NULL)
1886             return -1;
1887         rv = set_contains_key(so, tmpkey);
1888         Py_DECREF(tmpkey);
1889     }
1890     return rv;
1891 }
1892 
1893 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1894 set_direct_contains(PySetObject *so, PyObject *key)
1895 {
1896     long result;
1897 
1898     result = set_contains(so, key);
1899     if (result < 0)
1900         return NULL;
1901     return PyBool_FromLong(result);
1902 }
1903 
1904 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1905 
1906 static PyObject *
set_remove(PySetObject * so,PyObject * key)1907 set_remove(PySetObject *so, PyObject *key)
1908 {
1909     PyObject *tmpkey;
1910     int rv;
1911 
1912     rv = set_discard_key(so, key);
1913     if (rv < 0) {
1914         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1915             return NULL;
1916         PyErr_Clear();
1917         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1918         if (tmpkey == NULL)
1919             return NULL;
1920         rv = set_discard_key(so, tmpkey);
1921         Py_DECREF(tmpkey);
1922         if (rv < 0)
1923             return NULL;
1924     }
1925 
1926     if (rv == DISCARD_NOTFOUND) {
1927         _PyErr_SetKeyError(key);
1928         return NULL;
1929     }
1930     Py_RETURN_NONE;
1931 }
1932 
1933 PyDoc_STRVAR(remove_doc,
1934 "Remove an element from a set; it must be a member.\n\
1935 \n\
1936 If the element is not a member, raise a KeyError.");
1937 
1938 static PyObject *
set_discard(PySetObject * so,PyObject * key)1939 set_discard(PySetObject *so, PyObject *key)
1940 {
1941     PyObject *tmpkey;
1942     int rv;
1943 
1944     rv = set_discard_key(so, key);
1945     if (rv < 0) {
1946         if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1947             return NULL;
1948         PyErr_Clear();
1949         tmpkey = make_new_set(&PyFrozenSet_Type, key);
1950         if (tmpkey == NULL)
1951             return NULL;
1952         rv = set_discard_key(so, tmpkey);
1953         Py_DECREF(tmpkey);
1954         if (rv < 0)
1955             return NULL;
1956     }
1957     Py_RETURN_NONE;
1958 }
1959 
1960 PyDoc_STRVAR(discard_doc,
1961 "Remove an element from a set if it is a member.\n\
1962 \n\
1963 If the element is not a member, do nothing.");
1964 
1965 static PyObject *
set_reduce(PySetObject * so)1966 set_reduce(PySetObject *so)
1967 {
1968     PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1969     _Py_IDENTIFIER(__dict__);
1970 
1971     keys = PySequence_List((PyObject *)so);
1972     if (keys == NULL)
1973         goto done;
1974     args = PyTuple_Pack(1, keys);
1975     if (args == NULL)
1976         goto done;
1977     dict = _PyObject_GetAttrId((PyObject *)so, &PyId___dict__);
1978     if (dict == NULL) {
1979         PyErr_Clear();
1980         dict = Py_None;
1981         Py_INCREF(dict);
1982     }
1983     result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1984 done:
1985     Py_XDECREF(args);
1986     Py_XDECREF(keys);
1987     Py_XDECREF(dict);
1988     return result;
1989 }
1990 
1991 static PyObject *
set_sizeof(PySetObject * so)1992 set_sizeof(PySetObject *so)
1993 {
1994     Py_ssize_t res;
1995 
1996     res = _PyObject_SIZE(Py_TYPE(so));
1997     if (so->table != so->smalltable)
1998         res = res + (so->mask + 1) * sizeof(setentry);
1999     return PyLong_FromSsize_t(res);
2000 }
2001 
2002 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
2003 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)2004 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
2005 {
2006     PyObject *iterable = NULL;
2007 
2008     if (kwds != NULL && !_PyArg_NoKeywords("set()", kwds))
2009         return -1;
2010     if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
2011         return -1;
2012     if (self->fill)
2013         set_clear_internal(self);
2014     self->hash = -1;
2015     if (iterable == NULL)
2016         return 0;
2017     return set_update_internal(self, iterable);
2018 }
2019 
2020 static PySequenceMethods set_as_sequence = {
2021     set_len,                            /* sq_length */
2022     0,                                  /* sq_concat */
2023     0,                                  /* sq_repeat */
2024     0,                                  /* sq_item */
2025     0,                                  /* sq_slice */
2026     0,                                  /* sq_ass_item */
2027     0,                                  /* sq_ass_slice */
2028     (objobjproc)set_contains,           /* sq_contains */
2029 };
2030 
2031 /* set object ********************************************************/
2032 
2033 #ifdef Py_DEBUG
2034 static PyObject *test_c_api(PySetObject *so);
2035 
2036 PyDoc_STRVAR(test_c_api_doc, "Exercises C API.  Returns True.\n\
2037 All is well if assertions don't fail.");
2038 #endif
2039 
2040 static PyMethodDef set_methods[] = {
2041     {"add",             (PyCFunction)set_add,           METH_O,
2042      add_doc},
2043     {"clear",           (PyCFunction)set_clear,         METH_NOARGS,
2044      clear_doc},
2045     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2046      contains_doc},
2047     {"copy",            (PyCFunction)set_copy,          METH_NOARGS,
2048      copy_doc},
2049     {"discard",         (PyCFunction)set_discard,       METH_O,
2050      discard_doc},
2051     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2052      difference_doc},
2053     {"difference_update",       (PyCFunction)set_difference_update,     METH_VARARGS,
2054      difference_update_doc},
2055     {"intersection",(PyCFunction)set_intersection_multi,        METH_VARARGS,
2056      intersection_doc},
2057     {"intersection_update",(PyCFunction)set_intersection_update_multi,          METH_VARARGS,
2058      intersection_update_doc},
2059     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2060      isdisjoint_doc},
2061     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2062      issubset_doc},
2063     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2064      issuperset_doc},
2065     {"pop",             (PyCFunction)set_pop,           METH_NOARGS,
2066      pop_doc},
2067     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2068      reduce_doc},
2069     {"remove",          (PyCFunction)set_remove,        METH_O,
2070      remove_doc},
2071     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2072      sizeof_doc},
2073     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2074      symmetric_difference_doc},
2075     {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update,        METH_O,
2076      symmetric_difference_update_doc},
2077 #ifdef Py_DEBUG
2078     {"test_c_api",      (PyCFunction)test_c_api,        METH_NOARGS,
2079      test_c_api_doc},
2080 #endif
2081     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2082      union_doc},
2083     {"update",          (PyCFunction)set_update,        METH_VARARGS,
2084      update_doc},
2085     {NULL,              NULL}   /* sentinel */
2086 };
2087 
2088 static PyNumberMethods set_as_number = {
2089     0,                                  /*nb_add*/
2090     (binaryfunc)set_sub,                /*nb_subtract*/
2091     0,                                  /*nb_multiply*/
2092     0,                                  /*nb_remainder*/
2093     0,                                  /*nb_divmod*/
2094     0,                                  /*nb_power*/
2095     0,                                  /*nb_negative*/
2096     0,                                  /*nb_positive*/
2097     0,                                  /*nb_absolute*/
2098     0,                                  /*nb_bool*/
2099     0,                                  /*nb_invert*/
2100     0,                                  /*nb_lshift*/
2101     0,                                  /*nb_rshift*/
2102     (binaryfunc)set_and,                /*nb_and*/
2103     (binaryfunc)set_xor,                /*nb_xor*/
2104     (binaryfunc)set_or,                 /*nb_or*/
2105     0,                                  /*nb_int*/
2106     0,                                  /*nb_reserved*/
2107     0,                                  /*nb_float*/
2108     0,                                  /*nb_inplace_add*/
2109     (binaryfunc)set_isub,               /*nb_inplace_subtract*/
2110     0,                                  /*nb_inplace_multiply*/
2111     0,                                  /*nb_inplace_remainder*/
2112     0,                                  /*nb_inplace_power*/
2113     0,                                  /*nb_inplace_lshift*/
2114     0,                                  /*nb_inplace_rshift*/
2115     (binaryfunc)set_iand,               /*nb_inplace_and*/
2116     (binaryfunc)set_ixor,               /*nb_inplace_xor*/
2117     (binaryfunc)set_ior,                /*nb_inplace_or*/
2118 };
2119 
2120 PyDoc_STRVAR(set_doc,
2121 "set() -> new empty set object\n\
2122 set(iterable) -> new set object\n\
2123 \n\
2124 Build an unordered collection of unique elements.");
2125 
2126 PyTypeObject PySet_Type = {
2127     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2128     "set",                              /* tp_name */
2129     sizeof(PySetObject),                /* tp_basicsize */
2130     0,                                  /* tp_itemsize */
2131     /* methods */
2132     (destructor)set_dealloc,            /* tp_dealloc */
2133     0,                                  /* tp_print */
2134     0,                                  /* tp_getattr */
2135     0,                                  /* tp_setattr */
2136     0,                                  /* tp_reserved */
2137     (reprfunc)set_repr,                 /* tp_repr */
2138     &set_as_number,                     /* tp_as_number */
2139     &set_as_sequence,                   /* tp_as_sequence */
2140     0,                                  /* tp_as_mapping */
2141     PyObject_HashNotImplemented,        /* tp_hash */
2142     0,                                  /* tp_call */
2143     0,                                  /* tp_str */
2144     PyObject_GenericGetAttr,            /* tp_getattro */
2145     0,                                  /* tp_setattro */
2146     0,                                  /* tp_as_buffer */
2147     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2148         Py_TPFLAGS_BASETYPE,            /* tp_flags */
2149     set_doc,                            /* tp_doc */
2150     (traverseproc)set_traverse,         /* tp_traverse */
2151     (inquiry)set_clear_internal,        /* tp_clear */
2152     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2153     offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2154     (getiterfunc)set_iter,              /* tp_iter */
2155     0,                                  /* tp_iternext */
2156     set_methods,                        /* tp_methods */
2157     0,                                  /* tp_members */
2158     0,                                  /* tp_getset */
2159     0,                                  /* tp_base */
2160     0,                                  /* tp_dict */
2161     0,                                  /* tp_descr_get */
2162     0,                                  /* tp_descr_set */
2163     0,                                  /* tp_dictoffset */
2164     (initproc)set_init,                 /* tp_init */
2165     PyType_GenericAlloc,                /* tp_alloc */
2166     set_new,                            /* tp_new */
2167     PyObject_GC_Del,                    /* tp_free */
2168 };
2169 
2170 /* frozenset object ********************************************************/
2171 
2172 
2173 static PyMethodDef frozenset_methods[] = {
2174     {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2175      contains_doc},
2176     {"copy",            (PyCFunction)frozenset_copy,    METH_NOARGS,
2177      copy_doc},
2178     {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2179      difference_doc},
2180     {"intersection",    (PyCFunction)set_intersection_multi,    METH_VARARGS,
2181      intersection_doc},
2182     {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2183      isdisjoint_doc},
2184     {"issubset",        (PyCFunction)set_issubset,      METH_O,
2185      issubset_doc},
2186     {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2187      issuperset_doc},
2188     {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2189      reduce_doc},
2190     {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2191      sizeof_doc},
2192     {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2193      symmetric_difference_doc},
2194     {"union",           (PyCFunction)set_union,         METH_VARARGS,
2195      union_doc},
2196     {NULL,              NULL}   /* sentinel */
2197 };
2198 
2199 static PyNumberMethods frozenset_as_number = {
2200     0,                                  /*nb_add*/
2201     (binaryfunc)set_sub,                /*nb_subtract*/
2202     0,                                  /*nb_multiply*/
2203     0,                                  /*nb_remainder*/
2204     0,                                  /*nb_divmod*/
2205     0,                                  /*nb_power*/
2206     0,                                  /*nb_negative*/
2207     0,                                  /*nb_positive*/
2208     0,                                  /*nb_absolute*/
2209     0,                                  /*nb_bool*/
2210     0,                                  /*nb_invert*/
2211     0,                                  /*nb_lshift*/
2212     0,                                  /*nb_rshift*/
2213     (binaryfunc)set_and,                /*nb_and*/
2214     (binaryfunc)set_xor,                /*nb_xor*/
2215     (binaryfunc)set_or,                 /*nb_or*/
2216 };
2217 
2218 PyDoc_STRVAR(frozenset_doc,
2219 "frozenset() -> empty frozenset object\n\
2220 frozenset(iterable) -> frozenset object\n\
2221 \n\
2222 Build an immutable unordered collection of unique elements.");
2223 
2224 PyTypeObject PyFrozenSet_Type = {
2225     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2226     "frozenset",                        /* tp_name */
2227     sizeof(PySetObject),                /* tp_basicsize */
2228     0,                                  /* tp_itemsize */
2229     /* methods */
2230     (destructor)set_dealloc,            /* tp_dealloc */
2231     0,                                  /* tp_print */
2232     0,                                  /* tp_getattr */
2233     0,                                  /* tp_setattr */
2234     0,                                  /* tp_reserved */
2235     (reprfunc)set_repr,                 /* tp_repr */
2236     &frozenset_as_number,               /* tp_as_number */
2237     &set_as_sequence,                   /* tp_as_sequence */
2238     0,                                  /* tp_as_mapping */
2239     frozenset_hash,                     /* tp_hash */
2240     0,                                  /* tp_call */
2241     0,                                  /* tp_str */
2242     PyObject_GenericGetAttr,            /* tp_getattro */
2243     0,                                  /* tp_setattro */
2244     0,                                  /* tp_as_buffer */
2245     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2246         Py_TPFLAGS_BASETYPE,            /* tp_flags */
2247     frozenset_doc,                      /* tp_doc */
2248     (traverseproc)set_traverse,         /* tp_traverse */
2249     (inquiry)set_clear_internal,        /* tp_clear */
2250     (richcmpfunc)set_richcompare,       /* tp_richcompare */
2251     offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2252     (getiterfunc)set_iter,              /* tp_iter */
2253     0,                                  /* tp_iternext */
2254     frozenset_methods,                  /* tp_methods */
2255     0,                                  /* tp_members */
2256     0,                                  /* tp_getset */
2257     0,                                  /* tp_base */
2258     0,                                  /* tp_dict */
2259     0,                                  /* tp_descr_get */
2260     0,                                  /* tp_descr_set */
2261     0,                                  /* tp_dictoffset */
2262     0,                                  /* tp_init */
2263     PyType_GenericAlloc,                /* tp_alloc */
2264     frozenset_new,                      /* tp_new */
2265     PyObject_GC_Del,                    /* tp_free */
2266 };
2267 
2268 
2269 /***** C API functions *************************************************/
2270 
2271 PyObject *
PySet_New(PyObject * iterable)2272 PySet_New(PyObject *iterable)
2273 {
2274     return make_new_set(&PySet_Type, iterable);
2275 }
2276 
2277 PyObject *
PyFrozenSet_New(PyObject * iterable)2278 PyFrozenSet_New(PyObject *iterable)
2279 {
2280     return make_new_set(&PyFrozenSet_Type, iterable);
2281 }
2282 
2283 Py_ssize_t
PySet_Size(PyObject * anyset)2284 PySet_Size(PyObject *anyset)
2285 {
2286     if (!PyAnySet_Check(anyset)) {
2287         PyErr_BadInternalCall();
2288         return -1;
2289     }
2290     return PySet_GET_SIZE(anyset);
2291 }
2292 
2293 int
PySet_Clear(PyObject * set)2294 PySet_Clear(PyObject *set)
2295 {
2296     if (!PySet_Check(set)) {
2297         PyErr_BadInternalCall();
2298         return -1;
2299     }
2300     return set_clear_internal((PySetObject *)set);
2301 }
2302 
2303 int
PySet_Contains(PyObject * anyset,PyObject * key)2304 PySet_Contains(PyObject *anyset, PyObject *key)
2305 {
2306     if (!PyAnySet_Check(anyset)) {
2307         PyErr_BadInternalCall();
2308         return -1;
2309     }
2310     return set_contains_key((PySetObject *)anyset, key);
2311 }
2312 
2313 int
PySet_Discard(PyObject * set,PyObject * key)2314 PySet_Discard(PyObject *set, PyObject *key)
2315 {
2316     if (!PySet_Check(set)) {
2317         PyErr_BadInternalCall();
2318         return -1;
2319     }
2320     return set_discard_key((PySetObject *)set, key);
2321 }
2322 
2323 int
PySet_Add(PyObject * anyset,PyObject * key)2324 PySet_Add(PyObject *anyset, PyObject *key)
2325 {
2326     if (!PySet_Check(anyset) &&
2327         (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2328         PyErr_BadInternalCall();
2329         return -1;
2330     }
2331     return set_add_key((PySetObject *)anyset, key);
2332 }
2333 
2334 int
PySet_ClearFreeList(void)2335 PySet_ClearFreeList(void)
2336 {
2337     return 0;
2338 }
2339 
2340 void
PySet_Fini(void)2341 PySet_Fini(void)
2342 {
2343     Py_CLEAR(emptyfrozenset);
2344 }
2345 
2346 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2347 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2348 {
2349     setentry *entry;
2350 
2351     if (!PyAnySet_Check(set)) {
2352         PyErr_BadInternalCall();
2353         return -1;
2354     }
2355     if (set_next((PySetObject *)set, pos, &entry) == 0)
2356         return 0;
2357     *key = entry->key;
2358     *hash = entry->hash;
2359     return 1;
2360 }
2361 
2362 PyObject *
PySet_Pop(PyObject * set)2363 PySet_Pop(PyObject *set)
2364 {
2365     if (!PySet_Check(set)) {
2366         PyErr_BadInternalCall();
2367         return NULL;
2368     }
2369     return set_pop((PySetObject *)set);
2370 }
2371 
2372 int
_PySet_Update(PyObject * set,PyObject * iterable)2373 _PySet_Update(PyObject *set, PyObject *iterable)
2374 {
2375     if (!PySet_Check(set)) {
2376         PyErr_BadInternalCall();
2377         return -1;
2378     }
2379     return set_update_internal((PySetObject *)set, iterable);
2380 }
2381 
2382 /* Exported for the gdb plugin's benefit. */
2383 PyObject *_PySet_Dummy = dummy;
2384 
2385 #ifdef Py_DEBUG
2386 
2387 /* Test code to be called with any three element set.
2388    Returns True and original set is restored. */
2389 
2390 #define assertRaises(call_return_value, exception)              \
2391     do {                                                        \
2392         assert(call_return_value);                              \
2393         assert(PyErr_ExceptionMatches(exception));              \
2394         PyErr_Clear();                                          \
2395     } while(0)
2396 
2397 static PyObject *
test_c_api(PySetObject * so)2398 test_c_api(PySetObject *so)
2399 {
2400     Py_ssize_t count;
2401     char *s;
2402     Py_ssize_t i;
2403     PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2404     PyObject *ob = (PyObject *)so;
2405     Py_hash_t hash;
2406     PyObject *str;
2407 
2408     /* Verify preconditions */
2409     assert(PyAnySet_Check(ob));
2410     assert(PyAnySet_CheckExact(ob));
2411     assert(!PyFrozenSet_CheckExact(ob));
2412 
2413     /* so.clear(); so |= set("abc"); */
2414     str = PyUnicode_FromString("abc");
2415     if (str == NULL)
2416         return NULL;
2417     set_clear_internal(so);
2418     if (set_update_internal(so, str)) {
2419         Py_DECREF(str);
2420         return NULL;
2421     }
2422     Py_DECREF(str);
2423 
2424     /* Exercise type/size checks */
2425     assert(PySet_Size(ob) == 3);
2426     assert(PySet_GET_SIZE(ob) == 3);
2427 
2428     /* Raise TypeError for non-iterable constructor arguments */
2429     assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2430     assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2431 
2432     /* Raise TypeError for unhashable key */
2433     dup = PySet_New(ob);
2434     assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2435     assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2436     assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2437 
2438     /* Exercise successful pop, contains, add, and discard */
2439     elem = PySet_Pop(ob);
2440     assert(PySet_Contains(ob, elem) == 0);
2441     assert(PySet_GET_SIZE(ob) == 2);
2442     assert(PySet_Add(ob, elem) == 0);
2443     assert(PySet_Contains(ob, elem) == 1);
2444     assert(PySet_GET_SIZE(ob) == 3);
2445     assert(PySet_Discard(ob, elem) == 1);
2446     assert(PySet_GET_SIZE(ob) == 2);
2447     assert(PySet_Discard(ob, elem) == 0);
2448     assert(PySet_GET_SIZE(ob) == 2);
2449 
2450     /* Exercise clear */
2451     dup2 = PySet_New(dup);
2452     assert(PySet_Clear(dup2) == 0);
2453     assert(PySet_Size(dup2) == 0);
2454     Py_DECREF(dup2);
2455 
2456     /* Raise SystemError on clear or update of frozen set */
2457     f = PyFrozenSet_New(dup);
2458     assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2459     assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2460     assert(PySet_Add(f, elem) == 0);
2461     Py_INCREF(f);
2462     assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2463     Py_DECREF(f);
2464     Py_DECREF(f);
2465 
2466     /* Exercise direct iteration */
2467     i = 0, count = 0;
2468     while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2469         s = PyUnicode_AsUTF8(x);
2470         assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2471         count++;
2472     }
2473     assert(count == 3);
2474 
2475     /* Exercise updates */
2476     dup2 = PySet_New(NULL);
2477     assert(_PySet_Update(dup2, dup) == 0);
2478     assert(PySet_Size(dup2) == 3);
2479     assert(_PySet_Update(dup2, dup) == 0);
2480     assert(PySet_Size(dup2) == 3);
2481     Py_DECREF(dup2);
2482 
2483     /* Raise SystemError when self argument is not a set or frozenset. */
2484     t = PyTuple_New(0);
2485     assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2486     assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2487     Py_DECREF(t);
2488 
2489     /* Raise SystemError when self argument is not a set. */
2490     f = PyFrozenSet_New(dup);
2491     assert(PySet_Size(f) == 3);
2492     assert(PyFrozenSet_CheckExact(f));
2493     assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2494     assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2495     Py_DECREF(f);
2496 
2497     /* Raise KeyError when popping from an empty set */
2498     assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2499     Py_DECREF(ob);
2500     assert(PySet_GET_SIZE(ob) == 0);
2501     assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2502 
2503     /* Restore the set from the copy using the PyNumber API */
2504     assert(PyNumber_InPlaceOr(ob, dup) == ob);
2505     Py_DECREF(ob);
2506 
2507     /* Verify constructors accept NULL arguments */
2508     f = PySet_New(NULL);
2509     assert(f != NULL);
2510     assert(PySet_GET_SIZE(f) == 0);
2511     Py_DECREF(f);
2512     f = PyFrozenSet_New(NULL);
2513     assert(f != NULL);
2514     assert(PyFrozenSet_CheckExact(f));
2515     assert(PySet_GET_SIZE(f) == 0);
2516     Py_DECREF(f);
2517 
2518     Py_DECREF(elem);
2519     Py_DECREF(dup);
2520     Py_RETURN_TRUE;
2521 }
2522 
2523 #undef assertRaises
2524 
2525 #endif
2526 
2527 /***** Dummy Struct  *************************************************/
2528 
2529 static PyObject *
dummy_repr(PyObject * op)2530 dummy_repr(PyObject *op)
2531 {
2532     return PyUnicode_FromString("<dummy key>");
2533 }
2534 
2535 static void
dummy_dealloc(PyObject * ignore)2536 dummy_dealloc(PyObject* ignore)
2537 {
2538     Py_FatalError("deallocating <dummy key>");
2539 }
2540 
2541 static PyTypeObject _PySetDummy_Type = {
2542     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2543     "<dummy key> type",
2544     0,
2545     0,
2546     dummy_dealloc,      /*tp_dealloc*/ /*never called*/
2547     0,                  /*tp_print*/
2548     0,                  /*tp_getattr*/
2549     0,                  /*tp_setattr*/
2550     0,                  /*tp_reserved*/
2551     dummy_repr,         /*tp_repr*/
2552     0,                  /*tp_as_number*/
2553     0,                  /*tp_as_sequence*/
2554     0,                  /*tp_as_mapping*/
2555     0,                  /*tp_hash */
2556     0,                  /*tp_call */
2557     0,                  /*tp_str */
2558     0,                  /*tp_getattro */
2559     0,                  /*tp_setattro */
2560     0,                  /*tp_as_buffer */
2561     Py_TPFLAGS_DEFAULT, /*tp_flags */
2562 };
2563 
2564 static PyObject _dummy_struct = {
2565   _PyObject_EXTRA_INIT
2566   2, &_PySetDummy_Type
2567 };
2568 
2569