• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  
2  /* set object implementation
3  
4     Written and maintained by Raymond D. Hettinger <python@rcn.com>
5     Derived from Lib/sets.py and Objects/dictobject.c.
6  
7     The basic lookup function used by all operations.
8     This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9  
10     The initial probe index is computed as hash mod the table size.
11     Subsequent probe indices are computed as explained in Objects/dictobject.c.
12  
13     To improve cache locality, each probe inspects a series of consecutive
14     nearby entries before moving on to probes elsewhere in memory.  This leaves
15     us with a hybrid of linear probing and randomized probing.  The linear probing
16     reduces the cost of hash collisions because consecutive memory accesses
17     tend to be much cheaper than scattered probes.  After LINEAR_PROBES steps,
18     we then use more of the upper bits from the hash value and apply a simple
19     linear congruential random number genearator.  This helps break-up long
20     chains of collisions.
21  
22     All arithmetic on hash should ignore overflow.
23  
24     Unlike the dictionary implementation, the lookkey function can return
25     NULL if the rich comparison returns an error.
26  
27     Use cases for sets differ considerably from dictionaries where looked-up
28     keys are more likely to be present.  In contrast, sets are primarily
29     about membership testing where the presence of an element is not known in
30     advance.  Accordingly, the set implementation needs to optimize for both
31     the found and not-found case.
32  */
33  
34  #include "Python.h"
35  #include "pycore_object.h"        // _PyObject_GC_UNTRACK()
36  #include <stddef.h>               // offsetof()
37  
38  /* Object used as dummy key to fill deleted entries */
39  static PyObject _dummy_struct;
40  
41  #define dummy (&_dummy_struct)
42  
43  
44  /* ======================================================================== */
45  /* ======= Begin logic for probing the hash table ========================= */
46  
47  /* Set this to zero to turn-off linear probing */
48  #ifndef LINEAR_PROBES
49  #define LINEAR_PROBES 9
50  #endif
51  
52  /* This must be >= 1 */
53  #define PERTURB_SHIFT 5
54  
55  static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)56  set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
57  {
58      setentry *table;
59      setentry *entry;
60      size_t perturb = hash;
61      size_t mask = so->mask;
62      size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
63      int probes;
64      int cmp;
65  
66      while (1) {
67          entry = &so->table[i];
68          probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
69          do {
70              if (entry->hash == 0 && entry->key == NULL)
71                  return entry;
72              if (entry->hash == hash) {
73                  PyObject *startkey = entry->key;
74                  assert(startkey != dummy);
75                  if (startkey == key)
76                      return entry;
77                  if (PyUnicode_CheckExact(startkey)
78                      && PyUnicode_CheckExact(key)
79                      && _PyUnicode_EQ(startkey, key))
80                      return entry;
81                  table = so->table;
82                  Py_INCREF(startkey);
83                  cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
84                  Py_DECREF(startkey);
85                  if (cmp < 0)
86                      return NULL;
87                  if (table != so->table || entry->key != startkey)
88                      return set_lookkey(so, key, hash);
89                  if (cmp > 0)
90                      return entry;
91                  mask = so->mask;
92              }
93              entry++;
94          } while (probes--);
95          perturb >>= PERTURB_SHIFT;
96          i = (i * 5 + 1 + perturb) & mask;
97      }
98  }
99  
100  static int set_table_resize(PySetObject *, Py_ssize_t);
101  
102  static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)103  set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
104  {
105      setentry *table;
106      setentry *entry;
107      size_t perturb;
108      size_t mask;
109      size_t i;                       /* Unsigned for defined overflow behavior */
110      int probes;
111      int cmp;
112  
113      /* Pre-increment is necessary to prevent arbitrary code in the rich
114         comparison from deallocating the key just before the insertion. */
115      Py_INCREF(key);
116  
117    restart:
118  
119      mask = so->mask;
120      i = (size_t)hash & mask;
121      perturb = hash;
122  
123      while (1) {
124          entry = &so->table[i];
125          probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
126          do {
127              if (entry->hash == 0 && entry->key == NULL)
128                  goto found_unused;
129              if (entry->hash == hash) {
130                  PyObject *startkey = entry->key;
131                  assert(startkey != dummy);
132                  if (startkey == key)
133                      goto found_active;
134                  if (PyUnicode_CheckExact(startkey)
135                      && PyUnicode_CheckExact(key)
136                      && _PyUnicode_EQ(startkey, key))
137                      goto found_active;
138                  table = so->table;
139                  Py_INCREF(startkey);
140                  cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
141                  Py_DECREF(startkey);
142                  if (cmp > 0)
143                      goto found_active;
144                  if (cmp < 0)
145                      goto comparison_error;
146                  if (table != so->table || entry->key != startkey)
147                      goto restart;
148                  mask = so->mask;
149              }
150              entry++;
151          } while (probes--);
152          perturb >>= PERTURB_SHIFT;
153          i = (i * 5 + 1 + perturb) & mask;
154      }
155  
156    found_unused:
157      so->fill++;
158      so->used++;
159      entry->key = key;
160      entry->hash = hash;
161      if ((size_t)so->fill*5 < mask*3)
162          return 0;
163      return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
164  
165    found_active:
166      Py_DECREF(key);
167      return 0;
168  
169    comparison_error:
170      Py_DECREF(key);
171      return -1;
172  }
173  
174  /*
175  Internal routine used by set_table_resize() to insert an item which is
176  known to be absent from the set.  Besides the performance benefit,
177  there is also safety benefit since using set_add_entry() risks making
178  a callback in the middle of a set_table_resize(), see issue 1456209.
179  The caller is responsible for updating the key's reference count and
180  the setobject's fill and used fields.
181  */
182  static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)183  set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
184  {
185      setentry *entry;
186      size_t perturb = hash;
187      size_t i = (size_t)hash & mask;
188      size_t j;
189  
190      while (1) {
191          entry = &table[i];
192          if (entry->key == NULL)
193              goto found_null;
194          if (i + LINEAR_PROBES <= mask) {
195              for (j = 0; j < LINEAR_PROBES; j++) {
196                  entry++;
197                  if (entry->key == NULL)
198                      goto found_null;
199              }
200          }
201          perturb >>= PERTURB_SHIFT;
202          i = (i * 5 + 1 + perturb) & mask;
203      }
204    found_null:
205      entry->key = key;
206      entry->hash = hash;
207  }
208  
209  /* ======== End logic for probing the hash table ========================== */
210  /* ======================================================================== */
211  
212  /*
213  Restructure the table by allocating a new table and reinserting all
214  keys again.  When entries have been deleted, the new table may
215  actually be smaller than the old one.
216  */
217  static int
set_table_resize(PySetObject * so,Py_ssize_t minused)218  set_table_resize(PySetObject *so, Py_ssize_t minused)
219  {
220      setentry *oldtable, *newtable, *entry;
221      Py_ssize_t oldmask = so->mask;
222      size_t newmask;
223      int is_oldtable_malloced;
224      setentry small_copy[PySet_MINSIZE];
225  
226      assert(minused >= 0);
227  
228      /* Find the smallest table size > minused. */
229      /* XXX speed-up with intrinsics */
230      size_t newsize = PySet_MINSIZE;
231      while (newsize <= (size_t)minused) {
232          newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
233      }
234  
235      /* Get space for a new table. */
236      oldtable = so->table;
237      assert(oldtable != NULL);
238      is_oldtable_malloced = oldtable != so->smalltable;
239  
240      if (newsize == PySet_MINSIZE) {
241          /* A large table is shrinking, or we can't get any smaller. */
242          newtable = so->smalltable;
243          if (newtable == oldtable) {
244              if (so->fill == so->used) {
245                  /* No dummies, so no point doing anything. */
246                  return 0;
247              }
248              /* We're not going to resize it, but rebuild the
249                 table anyway to purge old dummy entries.
250                 Subtle:  This is *necessary* if fill==size,
251                 as set_lookkey needs at least one virgin slot to
252                 terminate failing searches.  If fill < size, it's
253                 merely desirable, as dummies slow searches. */
254              assert(so->fill > so->used);
255              memcpy(small_copy, oldtable, sizeof(small_copy));
256              oldtable = small_copy;
257          }
258      }
259      else {
260          newtable = PyMem_NEW(setentry, newsize);
261          if (newtable == NULL) {
262              PyErr_NoMemory();
263              return -1;
264          }
265      }
266  
267      /* Make the set empty, using the new table. */
268      assert(newtable != oldtable);
269      memset(newtable, 0, sizeof(setentry) * newsize);
270      so->mask = newsize - 1;
271      so->table = newtable;
272  
273      /* Copy the data over; this is refcount-neutral for active entries;
274         dummy entries aren't copied over, of course */
275      newmask = (size_t)so->mask;
276      if (so->fill == so->used) {
277          for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
278              if (entry->key != NULL) {
279                  set_insert_clean(newtable, newmask, entry->key, entry->hash);
280              }
281          }
282      } else {
283          so->fill = so->used;
284          for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
285              if (entry->key != NULL && entry->key != dummy) {
286                  set_insert_clean(newtable, newmask, entry->key, entry->hash);
287              }
288          }
289      }
290  
291      if (is_oldtable_malloced)
292          PyMem_DEL(oldtable);
293      return 0;
294  }
295  
296  static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)297  set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
298  {
299      setentry *entry;
300  
301      entry = set_lookkey(so, key, hash);
302      if (entry != NULL)
303          return entry->key != NULL;
304      return -1;
305  }
306  
307  #define DISCARD_NOTFOUND 0
308  #define DISCARD_FOUND 1
309  
310  static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)311  set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
312  {
313      setentry *entry;
314      PyObject *old_key;
315  
316      entry = set_lookkey(so, key, hash);
317      if (entry == NULL)
318          return -1;
319      if (entry->key == NULL)
320          return DISCARD_NOTFOUND;
321      old_key = entry->key;
322      entry->key = dummy;
323      entry->hash = -1;
324      so->used--;
325      Py_DECREF(old_key);
326      return DISCARD_FOUND;
327  }
328  
329  static int
set_add_key(PySetObject * so,PyObject * key)330  set_add_key(PySetObject *so, PyObject *key)
331  {
332      Py_hash_t hash;
333  
334      if (!PyUnicode_CheckExact(key) ||
335          (hash = ((PyASCIIObject *) key)->hash) == -1) {
336          hash = PyObject_Hash(key);
337          if (hash == -1)
338              return -1;
339      }
340      return set_add_entry(so, key, hash);
341  }
342  
343  static int
set_contains_key(PySetObject * so,PyObject * key)344  set_contains_key(PySetObject *so, PyObject *key)
345  {
346      Py_hash_t hash;
347  
348      if (!PyUnicode_CheckExact(key) ||
349          (hash = ((PyASCIIObject *) key)->hash) == -1) {
350          hash = PyObject_Hash(key);
351          if (hash == -1)
352              return -1;
353      }
354      return set_contains_entry(so, key, hash);
355  }
356  
357  static int
set_discard_key(PySetObject * so,PyObject * key)358  set_discard_key(PySetObject *so, PyObject *key)
359  {
360      Py_hash_t hash;
361  
362      if (!PyUnicode_CheckExact(key) ||
363          (hash = ((PyASCIIObject *) key)->hash) == -1) {
364          hash = PyObject_Hash(key);
365          if (hash == -1)
366              return -1;
367      }
368      return set_discard_entry(so, key, hash);
369  }
370  
371  static void
set_empty_to_minsize(PySetObject * so)372  set_empty_to_minsize(PySetObject *so)
373  {
374      memset(so->smalltable, 0, sizeof(so->smalltable));
375      so->fill = 0;
376      so->used = 0;
377      so->mask = PySet_MINSIZE - 1;
378      so->table = so->smalltable;
379      so->hash = -1;
380  }
381  
382  static int
set_clear_internal(PySetObject * so)383  set_clear_internal(PySetObject *so)
384  {
385      setentry *entry;
386      setentry *table = so->table;
387      Py_ssize_t fill = so->fill;
388      Py_ssize_t used = so->used;
389      int table_is_malloced = table != so->smalltable;
390      setentry small_copy[PySet_MINSIZE];
391  
392      assert (PyAnySet_Check(so));
393      assert(table != NULL);
394  
395      /* This is delicate.  During the process of clearing the set,
396       * decrefs can cause the set to mutate.  To avoid fatal confusion
397       * (voice of experience), we have to make the set empty before
398       * clearing the slots, and never refer to anything via so->ref while
399       * clearing.
400       */
401      if (table_is_malloced)
402          set_empty_to_minsize(so);
403  
404      else if (fill > 0) {
405          /* It's a small table with something that needs to be cleared.
406           * Afraid the only safe way is to copy the set entries into
407           * another small table first.
408           */
409          memcpy(small_copy, table, sizeof(small_copy));
410          table = small_copy;
411          set_empty_to_minsize(so);
412      }
413      /* else it's a small table that's already empty */
414  
415      /* Now we can finally clear things.  If C had refcounts, we could
416       * assert that the refcount on table is 1 now, i.e. that this function
417       * has unique access to it, so decref side-effects can't alter it.
418       */
419      for (entry = table; used > 0; entry++) {
420          if (entry->key && entry->key != dummy) {
421              used--;
422              Py_DECREF(entry->key);
423          }
424      }
425  
426      if (table_is_malloced)
427          PyMem_DEL(table);
428      return 0;
429  }
430  
431  /*
432   * Iterate over a set table.  Use like so:
433   *
434   *     Py_ssize_t pos;
435   *     setentry *entry;
436   *     pos = 0;   # important!  pos should not otherwise be changed by you
437   *     while (set_next(yourset, &pos, &entry)) {
438   *              Refer to borrowed reference in entry->key.
439   *     }
440   *
441   * CAUTION:  In general, it isn't safe to use set_next in a loop that
442   * mutates the table.
443   */
444  static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)445  set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
446  {
447      Py_ssize_t i;
448      Py_ssize_t mask;
449      setentry *entry;
450  
451      assert (PyAnySet_Check(so));
452      i = *pos_ptr;
453      assert(i >= 0);
454      mask = so->mask;
455      entry = &so->table[i];
456      while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
457          i++;
458          entry++;
459      }
460      *pos_ptr = i+1;
461      if (i > mask)
462          return 0;
463      assert(entry != NULL);
464      *entry_ptr = entry;
465      return 1;
466  }
467  
468  static void
set_dealloc(PySetObject * so)469  set_dealloc(PySetObject *so)
470  {
471      setentry *entry;
472      Py_ssize_t used = so->used;
473  
474      /* bpo-31095: UnTrack is needed before calling any callbacks */
475      PyObject_GC_UnTrack(so);
476      Py_TRASHCAN_BEGIN(so, set_dealloc)
477      if (so->weakreflist != NULL)
478          PyObject_ClearWeakRefs((PyObject *) so);
479  
480      for (entry = so->table; used > 0; entry++) {
481          if (entry->key && entry->key != dummy) {
482                  used--;
483                  Py_DECREF(entry->key);
484          }
485      }
486      if (so->table != so->smalltable)
487          PyMem_DEL(so->table);
488      Py_TYPE(so)->tp_free(so);
489      Py_TRASHCAN_END
490  }
491  
492  static PyObject *
set_repr(PySetObject * so)493  set_repr(PySetObject *so)
494  {
495      PyObject *result=NULL, *keys, *listrepr, *tmp;
496      int status = Py_ReprEnter((PyObject*)so);
497  
498      if (status != 0) {
499          if (status < 0)
500              return NULL;
501          return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
502      }
503  
504      /* shortcut for the empty set */
505      if (!so->used) {
506          Py_ReprLeave((PyObject*)so);
507          return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
508      }
509  
510      keys = PySequence_List((PyObject *)so);
511      if (keys == NULL)
512          goto done;
513  
514      /* repr(keys)[1:-1] */
515      listrepr = PyObject_Repr(keys);
516      Py_DECREF(keys);
517      if (listrepr == NULL)
518          goto done;
519      tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
520      Py_DECREF(listrepr);
521      if (tmp == NULL)
522          goto done;
523      listrepr = tmp;
524  
525      if (!Py_IS_TYPE(so, &PySet_Type))
526          result = PyUnicode_FromFormat("%s({%U})",
527                                        Py_TYPE(so)->tp_name,
528                                        listrepr);
529      else
530          result = PyUnicode_FromFormat("{%U}", listrepr);
531      Py_DECREF(listrepr);
532  done:
533      Py_ReprLeave((PyObject*)so);
534      return result;
535  }
536  
537  static Py_ssize_t
set_len(PyObject * so)538  set_len(PyObject *so)
539  {
540      return ((PySetObject *)so)->used;
541  }
542  
543  static int
set_merge(PySetObject * so,PyObject * otherset)544  set_merge(PySetObject *so, PyObject *otherset)
545  {
546      PySetObject *other;
547      PyObject *key;
548      Py_ssize_t i;
549      setentry *so_entry;
550      setentry *other_entry;
551  
552      assert (PyAnySet_Check(so));
553      assert (PyAnySet_Check(otherset));
554  
555      other = (PySetObject*)otherset;
556      if (other == so || other->used == 0)
557          /* a.update(a) or a.update(set()); nothing to do */
558          return 0;
559      /* Do one big resize at the start, rather than
560       * incrementally resizing as we insert new keys.  Expect
561       * that there will be no (or few) overlapping keys.
562       */
563      if ((so->fill + other->used)*5 >= so->mask*3) {
564          if (set_table_resize(so, (so->used + other->used)*2) != 0)
565              return -1;
566      }
567      so_entry = so->table;
568      other_entry = other->table;
569  
570      /* If our table is empty, and both tables have the same size, and
571         there are no dummies to eliminate, then just copy the pointers. */
572      if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
573          for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
574              key = other_entry->key;
575              if (key != NULL) {
576                  assert(so_entry->key == NULL);
577                  Py_INCREF(key);
578                  so_entry->key = key;
579                  so_entry->hash = other_entry->hash;
580              }
581          }
582          so->fill = other->fill;
583          so->used = other->used;
584          return 0;
585      }
586  
587      /* If our table is empty, we can use set_insert_clean() */
588      if (so->fill == 0) {
589          setentry *newtable = so->table;
590          size_t newmask = (size_t)so->mask;
591          so->fill = other->used;
592          so->used = other->used;
593          for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
594              key = other_entry->key;
595              if (key != NULL && key != dummy) {
596                  Py_INCREF(key);
597                  set_insert_clean(newtable, newmask, key, other_entry->hash);
598              }
599          }
600          return 0;
601      }
602  
603      /* We can't assure there are no duplicates, so do normal insertions */
604      for (i = 0; i <= other->mask; i++) {
605          other_entry = &other->table[i];
606          key = other_entry->key;
607          if (key != NULL && key != dummy) {
608              if (set_add_entry(so, key, other_entry->hash))
609                  return -1;
610          }
611      }
612      return 0;
613  }
614  
615  static PyObject *
set_pop(PySetObject * so,PyObject * Py_UNUSED (ignored))616  set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored))
617  {
618      /* Make sure the search finger is in bounds */
619      setentry *entry = so->table + (so->finger & so->mask);
620      setentry *limit = so->table + so->mask;
621      PyObject *key;
622  
623      if (so->used == 0) {
624          PyErr_SetString(PyExc_KeyError, "pop from an empty set");
625          return NULL;
626      }
627      while (entry->key == NULL || entry->key==dummy) {
628          entry++;
629          if (entry > limit)
630              entry = so->table;
631      }
632      key = entry->key;
633      entry->key = dummy;
634      entry->hash = -1;
635      so->used--;
636      so->finger = entry - so->table + 1;   /* next place to start */
637      return key;
638  }
639  
640  PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
641  Raises KeyError if the set is empty.");
642  
643  static int
set_traverse(PySetObject * so,visitproc visit,void * arg)644  set_traverse(PySetObject *so, visitproc visit, void *arg)
645  {
646      Py_ssize_t pos = 0;
647      setentry *entry;
648  
649      while (set_next(so, &pos, &entry))
650          Py_VISIT(entry->key);
651      return 0;
652  }
653  
654  /* Work to increase the bit dispersion for closely spaced hash values.
655     This is important because some use cases have many combinations of a
656     small number of elements with nearby hashes so that many distinct
657     combinations collapse to only a handful of distinct hash values. */
658  
659  static Py_uhash_t
_shuffle_bits(Py_uhash_t h)660  _shuffle_bits(Py_uhash_t h)
661  {
662      return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
663  }
664  
665  /* Most of the constants in this hash algorithm are randomly chosen
666     large primes with "interesting bit patterns" and that passed tests
667     for good collision statistics on a variety of problematic datasets
668     including powersets and graph structures (such as David Eppstein's
669     graph recipes in Lib/test/test_set.py) */
670  
671  static Py_hash_t
frozenset_hash(PyObject * self)672  frozenset_hash(PyObject *self)
673  {
674      PySetObject *so = (PySetObject *)self;
675      Py_uhash_t hash = 0;
676      setentry *entry;
677  
678      if (so->hash != -1)
679          return so->hash;
680  
681      /* Xor-in shuffled bits from every entry's hash field because xor is
682         commutative and a frozenset hash should be independent of order.
683  
684         For speed, include null entries and dummy entries and then
685         subtract out their effect afterwards so that the final hash
686         depends only on active entries.  This allows the code to be
687         vectorized by the compiler and it saves the unpredictable
688         branches that would arise when trying to exclude null and dummy
689         entries on every iteration. */
690  
691      for (entry = so->table; entry <= &so->table[so->mask]; entry++)
692          hash ^= _shuffle_bits(entry->hash);
693  
694      /* Remove the effect of an odd number of NULL entries */
695      if ((so->mask + 1 - so->fill) & 1)
696          hash ^= _shuffle_bits(0);
697  
698      /* Remove the effect of an odd number of dummy entries */
699      if ((so->fill - so->used) & 1)
700          hash ^= _shuffle_bits(-1);
701  
702      /* Factor in the number of active entries */
703      hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
704  
705      /* Disperse patterns arising in nested frozensets */
706      hash ^= (hash >> 11) ^ (hash >> 25);
707      hash = hash * 69069U + 907133923UL;
708  
709      /* -1 is reserved as an error code */
710      if (hash == (Py_uhash_t)-1)
711          hash = 590923713UL;
712  
713      so->hash = hash;
714      return hash;
715  }
716  
717  /***** Set iterator type ***********************************************/
718  
719  typedef struct {
720      PyObject_HEAD
721      PySetObject *si_set; /* Set to NULL when iterator is exhausted */
722      Py_ssize_t si_used;
723      Py_ssize_t si_pos;
724      Py_ssize_t len;
725  } setiterobject;
726  
727  static void
setiter_dealloc(setiterobject * si)728  setiter_dealloc(setiterobject *si)
729  {
730      /* bpo-31095: UnTrack is needed before calling any callbacks */
731      _PyObject_GC_UNTRACK(si);
732      Py_XDECREF(si->si_set);
733      PyObject_GC_Del(si);
734  }
735  
736  static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)737  setiter_traverse(setiterobject *si, visitproc visit, void *arg)
738  {
739      Py_VISIT(si->si_set);
740      return 0;
741  }
742  
743  static PyObject *
setiter_len(setiterobject * si,PyObject * Py_UNUSED (ignored))744  setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored))
745  {
746      Py_ssize_t len = 0;
747      if (si->si_set != NULL && si->si_used == si->si_set->used)
748          len = si->len;
749      return PyLong_FromSsize_t(len);
750  }
751  
752  PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
753  
754  static PyObject *setiter_iternext(setiterobject *si);
755  
756  static PyObject *
setiter_reduce(setiterobject * si,PyObject * Py_UNUSED (ignored))757  setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored))
758  {
759      _Py_IDENTIFIER(iter);
760      /* copy the iterator state */
761      setiterobject tmp = *si;
762      Py_XINCREF(tmp.si_set);
763  
764      /* iterate the temporary into a list */
765      PyObject *list = PySequence_List((PyObject*)&tmp);
766      Py_XDECREF(tmp.si_set);
767      if (list == NULL) {
768          return NULL;
769      }
770      return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), list);
771  }
772  
773  PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
774  
775  static PyMethodDef setiter_methods[] = {
776      {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
777      {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
778      {NULL,              NULL}           /* sentinel */
779  };
780  
setiter_iternext(setiterobject * si)781  static PyObject *setiter_iternext(setiterobject *si)
782  {
783      PyObject *key;
784      Py_ssize_t i, mask;
785      setentry *entry;
786      PySetObject *so = si->si_set;
787  
788      if (so == NULL)
789          return NULL;
790      assert (PyAnySet_Check(so));
791  
792      if (si->si_used != so->used) {
793          PyErr_SetString(PyExc_RuntimeError,
794                          "Set changed size during iteration");
795          si->si_used = -1; /* Make this state sticky */
796          return NULL;
797      }
798  
799      i = si->si_pos;
800      assert(i>=0);
801      entry = so->table;
802      mask = so->mask;
803      while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
804          i++;
805      si->si_pos = i+1;
806      if (i > mask)
807          goto fail;
808      si->len--;
809      key = entry[i].key;
810      Py_INCREF(key);
811      return key;
812  
813  fail:
814      si->si_set = NULL;
815      Py_DECREF(so);
816      return NULL;
817  }
818  
819  PyTypeObject PySetIter_Type = {
820      PyVarObject_HEAD_INIT(&PyType_Type, 0)
821      "set_iterator",                             /* tp_name */
822      sizeof(setiterobject),                      /* tp_basicsize */
823      0,                                          /* tp_itemsize */
824      /* methods */
825      (destructor)setiter_dealloc,                /* tp_dealloc */
826      0,                                          /* tp_vectorcall_offset */
827      0,                                          /* tp_getattr */
828      0,                                          /* tp_setattr */
829      0,                                          /* tp_as_async */
830      0,                                          /* tp_repr */
831      0,                                          /* tp_as_number */
832      0,                                          /* tp_as_sequence */
833      0,                                          /* tp_as_mapping */
834      0,                                          /* tp_hash */
835      0,                                          /* tp_call */
836      0,                                          /* tp_str */
837      PyObject_GenericGetAttr,                    /* tp_getattro */
838      0,                                          /* tp_setattro */
839      0,                                          /* tp_as_buffer */
840      Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
841      0,                                          /* tp_doc */
842      (traverseproc)setiter_traverse,             /* tp_traverse */
843      0,                                          /* tp_clear */
844      0,                                          /* tp_richcompare */
845      0,                                          /* tp_weaklistoffset */
846      PyObject_SelfIter,                          /* tp_iter */
847      (iternextfunc)setiter_iternext,             /* tp_iternext */
848      setiter_methods,                            /* tp_methods */
849      0,
850  };
851  
852  static PyObject *
set_iter(PySetObject * so)853  set_iter(PySetObject *so)
854  {
855      setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
856      if (si == NULL)
857          return NULL;
858      Py_INCREF(so);
859      si->si_set = so;
860      si->si_used = so->used;
861      si->si_pos = 0;
862      si->len = so->used;
863      _PyObject_GC_TRACK(si);
864      return (PyObject *)si;
865  }
866  
867  static int
set_update_internal(PySetObject * so,PyObject * other)868  set_update_internal(PySetObject *so, PyObject *other)
869  {
870      PyObject *key, *it;
871  
872      if (PyAnySet_Check(other))
873          return set_merge(so, other);
874  
875      if (PyDict_CheckExact(other)) {
876          PyObject *value;
877          Py_ssize_t pos = 0;
878          Py_hash_t hash;
879          Py_ssize_t dictsize = PyDict_GET_SIZE(other);
880  
881          /* Do one big resize at the start, rather than
882          * incrementally resizing as we insert new keys.  Expect
883          * that there will be no (or few) overlapping keys.
884          */
885          if (dictsize < 0)
886              return -1;
887          if ((so->fill + dictsize)*5 >= so->mask*3) {
888              if (set_table_resize(so, (so->used + dictsize)*2) != 0)
889                  return -1;
890          }
891          while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
892              if (set_add_entry(so, key, hash))
893                  return -1;
894          }
895          return 0;
896      }
897  
898      it = PyObject_GetIter(other);
899      if (it == NULL)
900          return -1;
901  
902      while ((key = PyIter_Next(it)) != NULL) {
903          if (set_add_key(so, key)) {
904              Py_DECREF(it);
905              Py_DECREF(key);
906              return -1;
907          }
908          Py_DECREF(key);
909      }
910      Py_DECREF(it);
911      if (PyErr_Occurred())
912          return -1;
913      return 0;
914  }
915  
916  static PyObject *
set_update(PySetObject * so,PyObject * args)917  set_update(PySetObject *so, PyObject *args)
918  {
919      Py_ssize_t i;
920  
921      for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
922          PyObject *other = PyTuple_GET_ITEM(args, i);
923          if (set_update_internal(so, other))
924              return NULL;
925      }
926      Py_RETURN_NONE;
927  }
928  
929  PyDoc_STRVAR(update_doc,
930  "Update a set with the union of itself and others.");
931  
932  /* XXX Todo:
933     If aligned memory allocations become available, make the
934     set object 64 byte aligned so that most of the fields
935     can be retrieved or updated in a single cache line.
936  */
937  
938  static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)939  make_new_set(PyTypeObject *type, PyObject *iterable)
940  {
941      assert(PyType_Check(type));
942      PySetObject *so;
943  
944      so = (PySetObject *)type->tp_alloc(type, 0);
945      if (so == NULL)
946          return NULL;
947  
948      so->fill = 0;
949      so->used = 0;
950      so->mask = PySet_MINSIZE - 1;
951      so->table = so->smalltable;
952      so->hash = -1;
953      so->finger = 0;
954      so->weakreflist = NULL;
955  
956      if (iterable != NULL) {
957          if (set_update_internal(so, iterable)) {
958              Py_DECREF(so);
959              return NULL;
960          }
961      }
962  
963      return (PyObject *)so;
964  }
965  
966  static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)967  make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
968  {
969      if (type != &PySet_Type && type != &PyFrozenSet_Type) {
970          if (PyType_IsSubtype(type, &PySet_Type))
971              type = &PySet_Type;
972          else
973              type = &PyFrozenSet_Type;
974      }
975      return make_new_set(type, iterable);
976  }
977  
978  /* The empty frozenset is a singleton */
979  static PyObject *emptyfrozenset = NULL;
980  
981  static PyObject *
make_new_frozenset(PyTypeObject * type,PyObject * iterable)982  make_new_frozenset(PyTypeObject *type, PyObject *iterable)
983  {
984      if (type != &PyFrozenSet_Type) {
985          return make_new_set(type, iterable);
986      }
987  
988      if (iterable != NULL) {
989          if (PyFrozenSet_CheckExact(iterable)) {
990              /* frozenset(f) is idempotent */
991              Py_INCREF(iterable);
992              return iterable;
993          }
994          PyObject *res = make_new_set((PyTypeObject *)type, iterable);
995          if (res == NULL || PySet_GET_SIZE(res) != 0) {
996              return res;
997          }
998          /* If the created frozenset is empty, return the empty frozenset singleton instead */
999          Py_DECREF(res);
1000      }
1001  
1002      // The empty frozenset is a singleton
1003      if (emptyfrozenset == NULL) {
1004          emptyfrozenset = make_new_set((PyTypeObject *)type, NULL);
1005      }
1006      Py_XINCREF(emptyfrozenset);
1007      return emptyfrozenset;
1008  }
1009  
1010  static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1011  frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1012  {
1013      PyObject *iterable = NULL;
1014  
1015      if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset", kwds)) {
1016          return NULL;
1017      }
1018  
1019      if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) {
1020          return NULL;
1021      }
1022  
1023      return make_new_frozenset(type, iterable);
1024  }
1025  
1026  static PyObject *
frozenset_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1027  frozenset_vectorcall(PyObject *type, PyObject * const*args,
1028                       size_t nargsf, PyObject *kwnames)
1029  {
1030      if (!_PyArg_NoKwnames("frozenset", kwnames)) {
1031          return NULL;
1032      }
1033  
1034      Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1035      if (!_PyArg_CheckPositional("frozenset", nargs, 0, 1)) {
1036          return NULL;
1037      }
1038  
1039      PyObject *iterable = (nargs ? args[0] : NULL);
1040      return make_new_frozenset((PyTypeObject *)type, iterable);
1041  }
1042  
1043  static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1044  set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1045  {
1046      return make_new_set(type, NULL);
1047  }
1048  
1049  /* set_swap_bodies() switches the contents of any two sets by moving their
1050     internal data pointers and, if needed, copying the internal smalltables.
1051     Semantically equivalent to:
1052  
1053       t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1054  
1055     The function always succeeds and it leaves both objects in a stable state.
1056     Useful for operations that update in-place (by allowing an intermediate
1057     result to be swapped into one of the original inputs).
1058  */
1059  
1060  static void
set_swap_bodies(PySetObject * a,PySetObject * b)1061  set_swap_bodies(PySetObject *a, PySetObject *b)
1062  {
1063      Py_ssize_t t;
1064      setentry *u;
1065      setentry tab[PySet_MINSIZE];
1066      Py_hash_t h;
1067  
1068      t = a->fill;     a->fill   = b->fill;        b->fill  = t;
1069      t = a->used;     a->used   = b->used;        b->used  = t;
1070      t = a->mask;     a->mask   = b->mask;        b->mask  = t;
1071  
1072      u = a->table;
1073      if (a->table == a->smalltable)
1074          u = b->smalltable;
1075      a->table  = b->table;
1076      if (b->table == b->smalltable)
1077          a->table = a->smalltable;
1078      b->table = u;
1079  
1080      if (a->table == a->smalltable || b->table == b->smalltable) {
1081          memcpy(tab, a->smalltable, sizeof(tab));
1082          memcpy(a->smalltable, b->smalltable, sizeof(tab));
1083          memcpy(b->smalltable, tab, sizeof(tab));
1084      }
1085  
1086      if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type)  &&
1087          PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1088          h = a->hash;     a->hash = b->hash;  b->hash = h;
1089      } else {
1090          a->hash = -1;
1091          b->hash = -1;
1092      }
1093  }
1094  
1095  static PyObject *
set_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1096  set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1097  {
1098      return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1099  }
1100  
1101  static PyObject *
frozenset_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1102  frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1103  {
1104      if (PyFrozenSet_CheckExact(so)) {
1105          Py_INCREF(so);
1106          return (PyObject *)so;
1107      }
1108      return set_copy(so, NULL);
1109  }
1110  
1111  PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1112  
1113  static PyObject *
set_clear(PySetObject * so,PyObject * Py_UNUSED (ignored))1114  set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored))
1115  {
1116      set_clear_internal(so);
1117      Py_RETURN_NONE;
1118  }
1119  
1120  PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1121  
1122  static PyObject *
set_union(PySetObject * so,PyObject * args)1123  set_union(PySetObject *so, PyObject *args)
1124  {
1125      PySetObject *result;
1126      PyObject *other;
1127      Py_ssize_t i;
1128  
1129      result = (PySetObject *)set_copy(so, NULL);
1130      if (result == NULL)
1131          return NULL;
1132  
1133      for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1134          other = PyTuple_GET_ITEM(args, i);
1135          if ((PyObject *)so == other)
1136              continue;
1137          if (set_update_internal(result, other)) {
1138              Py_DECREF(result);
1139              return NULL;
1140          }
1141      }
1142      return (PyObject *)result;
1143  }
1144  
1145  PyDoc_STRVAR(union_doc,
1146   "Return the union of sets as a new set.\n\
1147  \n\
1148  (i.e. all elements that are in either set.)");
1149  
1150  static PyObject *
set_or(PySetObject * so,PyObject * other)1151  set_or(PySetObject *so, PyObject *other)
1152  {
1153      PySetObject *result;
1154  
1155      if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1156          Py_RETURN_NOTIMPLEMENTED;
1157  
1158      result = (PySetObject *)set_copy(so, NULL);
1159      if (result == NULL)
1160          return NULL;
1161      if ((PyObject *)so == other)
1162          return (PyObject *)result;
1163      if (set_update_internal(result, other)) {
1164          Py_DECREF(result);
1165          return NULL;
1166      }
1167      return (PyObject *)result;
1168  }
1169  
1170  static PyObject *
set_ior(PySetObject * so,PyObject * other)1171  set_ior(PySetObject *so, PyObject *other)
1172  {
1173      if (!PyAnySet_Check(other))
1174          Py_RETURN_NOTIMPLEMENTED;
1175  
1176      if (set_update_internal(so, other))
1177          return NULL;
1178      Py_INCREF(so);
1179      return (PyObject *)so;
1180  }
1181  
1182  static PyObject *
set_intersection(PySetObject * so,PyObject * other)1183  set_intersection(PySetObject *so, PyObject *other)
1184  {
1185      PySetObject *result;
1186      PyObject *key, *it, *tmp;
1187      Py_hash_t hash;
1188      int rv;
1189  
1190      if ((PyObject *)so == other)
1191          return set_copy(so, NULL);
1192  
1193      result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1194      if (result == NULL)
1195          return NULL;
1196  
1197      if (PyAnySet_Check(other)) {
1198          Py_ssize_t pos = 0;
1199          setentry *entry;
1200  
1201          if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1202              tmp = (PyObject *)so;
1203              so = (PySetObject *)other;
1204              other = tmp;
1205          }
1206  
1207          while (set_next((PySetObject *)other, &pos, &entry)) {
1208              key = entry->key;
1209              hash = entry->hash;
1210              rv = set_contains_entry(so, key, hash);
1211              if (rv < 0) {
1212                  Py_DECREF(result);
1213                  return NULL;
1214              }
1215              if (rv) {
1216                  if (set_add_entry(result, key, hash)) {
1217                      Py_DECREF(result);
1218                      return NULL;
1219                  }
1220              }
1221          }
1222          return (PyObject *)result;
1223      }
1224  
1225      it = PyObject_GetIter(other);
1226      if (it == NULL) {
1227          Py_DECREF(result);
1228          return NULL;
1229      }
1230  
1231      while ((key = PyIter_Next(it)) != NULL) {
1232          hash = PyObject_Hash(key);
1233          if (hash == -1)
1234              goto error;
1235          rv = set_contains_entry(so, key, hash);
1236          if (rv < 0)
1237              goto error;
1238          if (rv) {
1239              if (set_add_entry(result, key, hash))
1240                  goto error;
1241          }
1242          Py_DECREF(key);
1243      }
1244      Py_DECREF(it);
1245      if (PyErr_Occurred()) {
1246          Py_DECREF(result);
1247          return NULL;
1248      }
1249      return (PyObject *)result;
1250    error:
1251      Py_DECREF(it);
1252      Py_DECREF(result);
1253      Py_DECREF(key);
1254      return NULL;
1255  }
1256  
1257  static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1258  set_intersection_multi(PySetObject *so, PyObject *args)
1259  {
1260      Py_ssize_t i;
1261      PyObject *result = (PyObject *)so;
1262  
1263      if (PyTuple_GET_SIZE(args) == 0)
1264          return set_copy(so, NULL);
1265  
1266      Py_INCREF(so);
1267      for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1268          PyObject *other = PyTuple_GET_ITEM(args, i);
1269          PyObject *newresult = set_intersection((PySetObject *)result, other);
1270          if (newresult == NULL) {
1271              Py_DECREF(result);
1272              return NULL;
1273          }
1274          Py_DECREF(result);
1275          result = newresult;
1276      }
1277      return result;
1278  }
1279  
1280  PyDoc_STRVAR(intersection_doc,
1281  "Return the intersection of two sets as a new set.\n\
1282  \n\
1283  (i.e. all elements that are in both sets.)");
1284  
1285  static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1286  set_intersection_update(PySetObject *so, PyObject *other)
1287  {
1288      PyObject *tmp;
1289  
1290      tmp = set_intersection(so, other);
1291      if (tmp == NULL)
1292          return NULL;
1293      set_swap_bodies(so, (PySetObject *)tmp);
1294      Py_DECREF(tmp);
1295      Py_RETURN_NONE;
1296  }
1297  
1298  static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1299  set_intersection_update_multi(PySetObject *so, PyObject *args)
1300  {
1301      PyObject *tmp;
1302  
1303      tmp = set_intersection_multi(so, args);
1304      if (tmp == NULL)
1305          return NULL;
1306      set_swap_bodies(so, (PySetObject *)tmp);
1307      Py_DECREF(tmp);
1308      Py_RETURN_NONE;
1309  }
1310  
1311  PyDoc_STRVAR(intersection_update_doc,
1312  "Update a set with the intersection of itself and another.");
1313  
1314  static PyObject *
set_and(PySetObject * so,PyObject * other)1315  set_and(PySetObject *so, PyObject *other)
1316  {
1317      if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1318          Py_RETURN_NOTIMPLEMENTED;
1319      return set_intersection(so, other);
1320  }
1321  
1322  static PyObject *
set_iand(PySetObject * so,PyObject * other)1323  set_iand(PySetObject *so, PyObject *other)
1324  {
1325      PyObject *result;
1326  
1327      if (!PyAnySet_Check(other))
1328          Py_RETURN_NOTIMPLEMENTED;
1329      result = set_intersection_update(so, other);
1330      if (result == NULL)
1331          return NULL;
1332      Py_DECREF(result);
1333      Py_INCREF(so);
1334      return (PyObject *)so;
1335  }
1336  
1337  static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1338  set_isdisjoint(PySetObject *so, PyObject *other)
1339  {
1340      PyObject *key, *it, *tmp;
1341      int rv;
1342  
1343      if ((PyObject *)so == other) {
1344          if (PySet_GET_SIZE(so) == 0)
1345              Py_RETURN_TRUE;
1346          else
1347              Py_RETURN_FALSE;
1348      }
1349  
1350      if (PyAnySet_CheckExact(other)) {
1351          Py_ssize_t pos = 0;
1352          setentry *entry;
1353  
1354          if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1355              tmp = (PyObject *)so;
1356              so = (PySetObject *)other;
1357              other = tmp;
1358          }
1359          while (set_next((PySetObject *)other, &pos, &entry)) {
1360              rv = set_contains_entry(so, entry->key, entry->hash);
1361              if (rv < 0)
1362                  return NULL;
1363              if (rv)
1364                  Py_RETURN_FALSE;
1365          }
1366          Py_RETURN_TRUE;
1367      }
1368  
1369      it = PyObject_GetIter(other);
1370      if (it == NULL)
1371          return NULL;
1372  
1373      while ((key = PyIter_Next(it)) != NULL) {
1374          Py_hash_t hash = PyObject_Hash(key);
1375  
1376          if (hash == -1) {
1377              Py_DECREF(key);
1378              Py_DECREF(it);
1379              return NULL;
1380          }
1381          rv = set_contains_entry(so, key, hash);
1382          Py_DECREF(key);
1383          if (rv < 0) {
1384              Py_DECREF(it);
1385              return NULL;
1386          }
1387          if (rv) {
1388              Py_DECREF(it);
1389              Py_RETURN_FALSE;
1390          }
1391      }
1392      Py_DECREF(it);
1393      if (PyErr_Occurred())
1394          return NULL;
1395      Py_RETURN_TRUE;
1396  }
1397  
1398  PyDoc_STRVAR(isdisjoint_doc,
1399  "Return True if two sets have a null intersection.");
1400  
1401  static int
set_difference_update_internal(PySetObject * so,PyObject * other)1402  set_difference_update_internal(PySetObject *so, PyObject *other)
1403  {
1404      if ((PyObject *)so == other)
1405          return set_clear_internal(so);
1406  
1407      if (PyAnySet_Check(other)) {
1408          setentry *entry;
1409          Py_ssize_t pos = 0;
1410  
1411          /* Optimization:  When the other set is more than 8 times
1412             larger than the base set, replace the other set with
1413             interesection of the two sets.
1414          */
1415          if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) {
1416              other = set_intersection(so, other);
1417              if (other == NULL)
1418                  return -1;
1419          } else {
1420              Py_INCREF(other);
1421          }
1422  
1423          while (set_next((PySetObject *)other, &pos, &entry))
1424              if (set_discard_entry(so, entry->key, entry->hash) < 0) {
1425                  Py_DECREF(other);
1426                  return -1;
1427              }
1428  
1429          Py_DECREF(other);
1430      } else {
1431          PyObject *key, *it;
1432          it = PyObject_GetIter(other);
1433          if (it == NULL)
1434              return -1;
1435  
1436          while ((key = PyIter_Next(it)) != NULL) {
1437              if (set_discard_key(so, key) < 0) {
1438                  Py_DECREF(it);
1439                  Py_DECREF(key);
1440                  return -1;
1441              }
1442              Py_DECREF(key);
1443          }
1444          Py_DECREF(it);
1445          if (PyErr_Occurred())
1446              return -1;
1447      }
1448      /* If more than 1/4th are dummies, then resize them away. */
1449      if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1450          return 0;
1451      return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1452  }
1453  
1454  static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1455  set_difference_update(PySetObject *so, PyObject *args)
1456  {
1457      Py_ssize_t i;
1458  
1459      for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1460          PyObject *other = PyTuple_GET_ITEM(args, i);
1461          if (set_difference_update_internal(so, other))
1462              return NULL;
1463      }
1464      Py_RETURN_NONE;
1465  }
1466  
1467  PyDoc_STRVAR(difference_update_doc,
1468  "Remove all elements of another set from this set.");
1469  
1470  static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1471  set_copy_and_difference(PySetObject *so, PyObject *other)
1472  {
1473      PyObject *result;
1474  
1475      result = set_copy(so, NULL);
1476      if (result == NULL)
1477          return NULL;
1478      if (set_difference_update_internal((PySetObject *) result, other) == 0)
1479          return result;
1480      Py_DECREF(result);
1481      return NULL;
1482  }
1483  
1484  static PyObject *
set_difference(PySetObject * so,PyObject * other)1485  set_difference(PySetObject *so, PyObject *other)
1486  {
1487      PyObject *result;
1488      PyObject *key;
1489      Py_hash_t hash;
1490      setentry *entry;
1491      Py_ssize_t pos = 0, other_size;
1492      int rv;
1493  
1494      if (PyAnySet_Check(other)) {
1495          other_size = PySet_GET_SIZE(other);
1496      }
1497      else if (PyDict_CheckExact(other)) {
1498          other_size = PyDict_GET_SIZE(other);
1499      }
1500      else {
1501          return set_copy_and_difference(so, other);
1502      }
1503  
1504      /* If len(so) much more than len(other), it's more efficient to simply copy
1505       * so and then iterate other looking for common elements. */
1506      if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1507          return set_copy_and_difference(so, other);
1508      }
1509  
1510      result = make_new_set_basetype(Py_TYPE(so), NULL);
1511      if (result == NULL)
1512          return NULL;
1513  
1514      if (PyDict_CheckExact(other)) {
1515          while (set_next(so, &pos, &entry)) {
1516              key = entry->key;
1517              hash = entry->hash;
1518              rv = _PyDict_Contains(other, key, hash);
1519              if (rv < 0) {
1520                  Py_DECREF(result);
1521                  return NULL;
1522              }
1523              if (!rv) {
1524                  if (set_add_entry((PySetObject *)result, key, hash)) {
1525                      Py_DECREF(result);
1526                      return NULL;
1527                  }
1528              }
1529          }
1530          return result;
1531      }
1532  
1533      /* Iterate over so, checking for common elements in other. */
1534      while (set_next(so, &pos, &entry)) {
1535          key = entry->key;
1536          hash = entry->hash;
1537          rv = set_contains_entry((PySetObject *)other, key, hash);
1538          if (rv < 0) {
1539              Py_DECREF(result);
1540              return NULL;
1541          }
1542          if (!rv) {
1543              if (set_add_entry((PySetObject *)result, key, hash)) {
1544                  Py_DECREF(result);
1545                  return NULL;
1546              }
1547          }
1548      }
1549      return result;
1550  }
1551  
1552  static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1553  set_difference_multi(PySetObject *so, PyObject *args)
1554  {
1555      Py_ssize_t i;
1556      PyObject *result, *other;
1557  
1558      if (PyTuple_GET_SIZE(args) == 0)
1559          return set_copy(so, NULL);
1560  
1561      other = PyTuple_GET_ITEM(args, 0);
1562      result = set_difference(so, other);
1563      if (result == NULL)
1564          return NULL;
1565  
1566      for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1567          other = PyTuple_GET_ITEM(args, i);
1568          if (set_difference_update_internal((PySetObject *)result, other)) {
1569              Py_DECREF(result);
1570              return NULL;
1571          }
1572      }
1573      return result;
1574  }
1575  
1576  PyDoc_STRVAR(difference_doc,
1577  "Return the difference of two or more sets as a new set.\n\
1578  \n\
1579  (i.e. all elements that are in this set but not the others.)");
1580  static PyObject *
set_sub(PySetObject * so,PyObject * other)1581  set_sub(PySetObject *so, PyObject *other)
1582  {
1583      if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1584          Py_RETURN_NOTIMPLEMENTED;
1585      return set_difference(so, other);
1586  }
1587  
1588  static PyObject *
set_isub(PySetObject * so,PyObject * other)1589  set_isub(PySetObject *so, PyObject *other)
1590  {
1591      if (!PyAnySet_Check(other))
1592          Py_RETURN_NOTIMPLEMENTED;
1593      if (set_difference_update_internal(so, other))
1594          return NULL;
1595      Py_INCREF(so);
1596      return (PyObject *)so;
1597  }
1598  
1599  static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1600  set_symmetric_difference_update(PySetObject *so, PyObject *other)
1601  {
1602      PySetObject *otherset;
1603      PyObject *key;
1604      Py_ssize_t pos = 0;
1605      Py_hash_t hash;
1606      setentry *entry;
1607      int rv;
1608  
1609      if ((PyObject *)so == other)
1610          return set_clear(so, NULL);
1611  
1612      if (PyDict_CheckExact(other)) {
1613          PyObject *value;
1614          while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1615              Py_INCREF(key);
1616              rv = set_discard_entry(so, key, hash);
1617              if (rv < 0) {
1618                  Py_DECREF(key);
1619                  return NULL;
1620              }
1621              if (rv == DISCARD_NOTFOUND) {
1622                  if (set_add_entry(so, key, hash)) {
1623                      Py_DECREF(key);
1624                      return NULL;
1625                  }
1626              }
1627              Py_DECREF(key);
1628          }
1629          Py_RETURN_NONE;
1630      }
1631  
1632      if (PyAnySet_Check(other)) {
1633          Py_INCREF(other);
1634          otherset = (PySetObject *)other;
1635      } else {
1636          otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1637          if (otherset == NULL)
1638              return NULL;
1639      }
1640  
1641      while (set_next(otherset, &pos, &entry)) {
1642          key = entry->key;
1643          hash = entry->hash;
1644          rv = set_discard_entry(so, key, hash);
1645          if (rv < 0) {
1646              Py_DECREF(otherset);
1647              return NULL;
1648          }
1649          if (rv == DISCARD_NOTFOUND) {
1650              if (set_add_entry(so, key, hash)) {
1651                  Py_DECREF(otherset);
1652                  return NULL;
1653              }
1654          }
1655      }
1656      Py_DECREF(otherset);
1657      Py_RETURN_NONE;
1658  }
1659  
1660  PyDoc_STRVAR(symmetric_difference_update_doc,
1661  "Update a set with the symmetric difference of itself and another.");
1662  
1663  static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1664  set_symmetric_difference(PySetObject *so, PyObject *other)
1665  {
1666      PyObject *rv;
1667      PySetObject *otherset;
1668  
1669      otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1670      if (otherset == NULL)
1671          return NULL;
1672      rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1673      if (rv == NULL) {
1674          Py_DECREF(otherset);
1675          return NULL;
1676      }
1677      Py_DECREF(rv);
1678      return (PyObject *)otherset;
1679  }
1680  
1681  PyDoc_STRVAR(symmetric_difference_doc,
1682  "Return the symmetric difference of two sets as a new set.\n\
1683  \n\
1684  (i.e. all elements that are in exactly one of the sets.)");
1685  
1686  static PyObject *
set_xor(PySetObject * so,PyObject * other)1687  set_xor(PySetObject *so, PyObject *other)
1688  {
1689      if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1690          Py_RETURN_NOTIMPLEMENTED;
1691      return set_symmetric_difference(so, other);
1692  }
1693  
1694  static PyObject *
set_ixor(PySetObject * so,PyObject * other)1695  set_ixor(PySetObject *so, PyObject *other)
1696  {
1697      PyObject *result;
1698  
1699      if (!PyAnySet_Check(other))
1700          Py_RETURN_NOTIMPLEMENTED;
1701      result = set_symmetric_difference_update(so, other);
1702      if (result == NULL)
1703          return NULL;
1704      Py_DECREF(result);
1705      Py_INCREF(so);
1706      return (PyObject *)so;
1707  }
1708  
1709  static PyObject *
set_issubset(PySetObject * so,PyObject * other)1710  set_issubset(PySetObject *so, PyObject *other)
1711  {
1712      setentry *entry;
1713      Py_ssize_t pos = 0;
1714      int rv;
1715  
1716      if (!PyAnySet_Check(other)) {
1717          PyObject *tmp, *result;
1718          tmp = make_new_set(&PySet_Type, other);
1719          if (tmp == NULL)
1720              return NULL;
1721          result = set_issubset(so, tmp);
1722          Py_DECREF(tmp);
1723          return result;
1724      }
1725      if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1726          Py_RETURN_FALSE;
1727  
1728      while (set_next(so, &pos, &entry)) {
1729          rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
1730          if (rv < 0)
1731              return NULL;
1732          if (!rv)
1733              Py_RETURN_FALSE;
1734      }
1735      Py_RETURN_TRUE;
1736  }
1737  
1738  PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1739  
1740  static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1741  set_issuperset(PySetObject *so, PyObject *other)
1742  {
1743      PyObject *tmp, *result;
1744  
1745      if (!PyAnySet_Check(other)) {
1746          tmp = make_new_set(&PySet_Type, other);
1747          if (tmp == NULL)
1748              return NULL;
1749          result = set_issuperset(so, tmp);
1750          Py_DECREF(tmp);
1751          return result;
1752      }
1753      return set_issubset((PySetObject *)other, (PyObject *)so);
1754  }
1755  
1756  PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1757  
1758  static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1759  set_richcompare(PySetObject *v, PyObject *w, int op)
1760  {
1761      PyObject *r1;
1762      int r2;
1763  
1764      if(!PyAnySet_Check(w))
1765          Py_RETURN_NOTIMPLEMENTED;
1766  
1767      switch (op) {
1768      case Py_EQ:
1769          if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1770              Py_RETURN_FALSE;
1771          if (v->hash != -1  &&
1772              ((PySetObject *)w)->hash != -1 &&
1773              v->hash != ((PySetObject *)w)->hash)
1774              Py_RETURN_FALSE;
1775          return set_issubset(v, w);
1776      case Py_NE:
1777          r1 = set_richcompare(v, w, Py_EQ);
1778          if (r1 == NULL)
1779              return NULL;
1780          r2 = PyObject_IsTrue(r1);
1781          Py_DECREF(r1);
1782          if (r2 < 0)
1783              return NULL;
1784          return PyBool_FromLong(!r2);
1785      case Py_LE:
1786          return set_issubset(v, w);
1787      case Py_GE:
1788          return set_issuperset(v, w);
1789      case Py_LT:
1790          if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1791              Py_RETURN_FALSE;
1792          return set_issubset(v, w);
1793      case Py_GT:
1794          if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1795              Py_RETURN_FALSE;
1796          return set_issuperset(v, w);
1797      }
1798      Py_RETURN_NOTIMPLEMENTED;
1799  }
1800  
1801  static PyObject *
set_add(PySetObject * so,PyObject * key)1802  set_add(PySetObject *so, PyObject *key)
1803  {
1804      if (set_add_key(so, key))
1805          return NULL;
1806      Py_RETURN_NONE;
1807  }
1808  
1809  PyDoc_STRVAR(add_doc,
1810  "Add an element to a set.\n\
1811  \n\
1812  This has no effect if the element is already present.");
1813  
1814  static int
set_contains(PySetObject * so,PyObject * key)1815  set_contains(PySetObject *so, PyObject *key)
1816  {
1817      PyObject *tmpkey;
1818      int rv;
1819  
1820      rv = set_contains_key(so, key);
1821      if (rv < 0) {
1822          if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1823              return -1;
1824          PyErr_Clear();
1825          tmpkey = make_new_set(&PyFrozenSet_Type, key);
1826          if (tmpkey == NULL)
1827              return -1;
1828          rv = set_contains_key(so, tmpkey);
1829          Py_DECREF(tmpkey);
1830      }
1831      return rv;
1832  }
1833  
1834  static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1835  set_direct_contains(PySetObject *so, PyObject *key)
1836  {
1837      long result;
1838  
1839      result = set_contains(so, key);
1840      if (result < 0)
1841          return NULL;
1842      return PyBool_FromLong(result);
1843  }
1844  
1845  PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1846  
1847  static PyObject *
set_remove(PySetObject * so,PyObject * key)1848  set_remove(PySetObject *so, PyObject *key)
1849  {
1850      PyObject *tmpkey;
1851      int rv;
1852  
1853      rv = set_discard_key(so, key);
1854      if (rv < 0) {
1855          if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1856              return NULL;
1857          PyErr_Clear();
1858          tmpkey = make_new_set(&PyFrozenSet_Type, key);
1859          if (tmpkey == NULL)
1860              return NULL;
1861          rv = set_discard_key(so, tmpkey);
1862          Py_DECREF(tmpkey);
1863          if (rv < 0)
1864              return NULL;
1865      }
1866  
1867      if (rv == DISCARD_NOTFOUND) {
1868          _PyErr_SetKeyError(key);
1869          return NULL;
1870      }
1871      Py_RETURN_NONE;
1872  }
1873  
1874  PyDoc_STRVAR(remove_doc,
1875  "Remove an element from a set; it must be a member.\n\
1876  \n\
1877  If the element is not a member, raise a KeyError.");
1878  
1879  static PyObject *
set_discard(PySetObject * so,PyObject * key)1880  set_discard(PySetObject *so, PyObject *key)
1881  {
1882      PyObject *tmpkey;
1883      int rv;
1884  
1885      rv = set_discard_key(so, key);
1886      if (rv < 0) {
1887          if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1888              return NULL;
1889          PyErr_Clear();
1890          tmpkey = make_new_set(&PyFrozenSet_Type, key);
1891          if (tmpkey == NULL)
1892              return NULL;
1893          rv = set_discard_key(so, tmpkey);
1894          Py_DECREF(tmpkey);
1895          if (rv < 0)
1896              return NULL;
1897      }
1898      Py_RETURN_NONE;
1899  }
1900  
1901  PyDoc_STRVAR(discard_doc,
1902  "Remove an element from a set if it is a member.\n\
1903  \n\
1904  If the element is not a member, do nothing.");
1905  
1906  static PyObject *
set_reduce(PySetObject * so,PyObject * Py_UNUSED (ignored))1907  set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored))
1908  {
1909      PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1910      _Py_IDENTIFIER(__dict__);
1911  
1912      keys = PySequence_List((PyObject *)so);
1913      if (keys == NULL)
1914          goto done;
1915      args = PyTuple_Pack(1, keys);
1916      if (args == NULL)
1917          goto done;
1918      if (_PyObject_LookupAttrId((PyObject *)so, &PyId___dict__, &dict) < 0) {
1919          goto done;
1920      }
1921      if (dict == NULL) {
1922          dict = Py_None;
1923          Py_INCREF(dict);
1924      }
1925      result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1926  done:
1927      Py_XDECREF(args);
1928      Py_XDECREF(keys);
1929      Py_XDECREF(dict);
1930      return result;
1931  }
1932  
1933  static PyObject *
set_sizeof(PySetObject * so,PyObject * Py_UNUSED (ignored))1934  set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored))
1935  {
1936      Py_ssize_t res;
1937  
1938      res = _PyObject_SIZE(Py_TYPE(so));
1939      if (so->table != so->smalltable)
1940          res = res + (so->mask + 1) * sizeof(setentry);
1941      return PyLong_FromSsize_t(res);
1942  }
1943  
1944  PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1945  static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1946  set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1947  {
1948      PyObject *iterable = NULL;
1949  
1950      if (!_PyArg_NoKeywords("set", kwds))
1951          return -1;
1952      if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
1953          return -1;
1954      if (self->fill)
1955          set_clear_internal(self);
1956      self->hash = -1;
1957      if (iterable == NULL)
1958          return 0;
1959      return set_update_internal(self, iterable);
1960  }
1961  
1962  static PyObject*
set_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1963  set_vectorcall(PyObject *type, PyObject * const*args,
1964                 size_t nargsf, PyObject *kwnames)
1965  {
1966      assert(PyType_Check(type));
1967  
1968      if (!_PyArg_NoKwnames("set", kwnames)) {
1969          return NULL;
1970      }
1971  
1972      Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1973      if (!_PyArg_CheckPositional("set", nargs, 0, 1)) {
1974          return NULL;
1975      }
1976  
1977      if (nargs) {
1978          return make_new_set((PyTypeObject *)type, args[0]);
1979      }
1980  
1981      return make_new_set((PyTypeObject *)type, NULL);
1982  }
1983  
1984  static PySequenceMethods set_as_sequence = {
1985      set_len,                            /* sq_length */
1986      0,                                  /* sq_concat */
1987      0,                                  /* sq_repeat */
1988      0,                                  /* sq_item */
1989      0,                                  /* sq_slice */
1990      0,                                  /* sq_ass_item */
1991      0,                                  /* sq_ass_slice */
1992      (objobjproc)set_contains,           /* sq_contains */
1993  };
1994  
1995  /* set object ********************************************************/
1996  
1997  #ifdef Py_DEBUG
1998  static PyObject *test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored));
1999  
2000  PyDoc_STRVAR(test_c_api_doc, "Exercises C API.  Returns True.\n\
2001  All is well if assertions don't fail.");
2002  #endif
2003  
2004  static PyMethodDef set_methods[] = {
2005      {"add",             (PyCFunction)set_add,           METH_O,
2006       add_doc},
2007      {"clear",           (PyCFunction)set_clear,         METH_NOARGS,
2008       clear_doc},
2009      {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2010       contains_doc},
2011      {"copy",            (PyCFunction)set_copy,          METH_NOARGS,
2012       copy_doc},
2013      {"discard",         (PyCFunction)set_discard,       METH_O,
2014       discard_doc},
2015      {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2016       difference_doc},
2017      {"difference_update",       (PyCFunction)set_difference_update,     METH_VARARGS,
2018       difference_update_doc},
2019      {"intersection",(PyCFunction)set_intersection_multi,        METH_VARARGS,
2020       intersection_doc},
2021      {"intersection_update",(PyCFunction)set_intersection_update_multi,          METH_VARARGS,
2022       intersection_update_doc},
2023      {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2024       isdisjoint_doc},
2025      {"issubset",        (PyCFunction)set_issubset,      METH_O,
2026       issubset_doc},
2027      {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2028       issuperset_doc},
2029      {"pop",             (PyCFunction)set_pop,           METH_NOARGS,
2030       pop_doc},
2031      {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2032       reduce_doc},
2033      {"remove",          (PyCFunction)set_remove,        METH_O,
2034       remove_doc},
2035      {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2036       sizeof_doc},
2037      {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2038       symmetric_difference_doc},
2039      {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update,        METH_O,
2040       symmetric_difference_update_doc},
2041  #ifdef Py_DEBUG
2042      {"test_c_api",      (PyCFunction)test_c_api,        METH_NOARGS,
2043       test_c_api_doc},
2044  #endif
2045      {"union",           (PyCFunction)set_union,         METH_VARARGS,
2046       union_doc},
2047      {"update",          (PyCFunction)set_update,        METH_VARARGS,
2048       update_doc},
2049      {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2050      {NULL,              NULL}   /* sentinel */
2051  };
2052  
2053  static PyNumberMethods set_as_number = {
2054      0,                                  /*nb_add*/
2055      (binaryfunc)set_sub,                /*nb_subtract*/
2056      0,                                  /*nb_multiply*/
2057      0,                                  /*nb_remainder*/
2058      0,                                  /*nb_divmod*/
2059      0,                                  /*nb_power*/
2060      0,                                  /*nb_negative*/
2061      0,                                  /*nb_positive*/
2062      0,                                  /*nb_absolute*/
2063      0,                                  /*nb_bool*/
2064      0,                                  /*nb_invert*/
2065      0,                                  /*nb_lshift*/
2066      0,                                  /*nb_rshift*/
2067      (binaryfunc)set_and,                /*nb_and*/
2068      (binaryfunc)set_xor,                /*nb_xor*/
2069      (binaryfunc)set_or,                 /*nb_or*/
2070      0,                                  /*nb_int*/
2071      0,                                  /*nb_reserved*/
2072      0,                                  /*nb_float*/
2073      0,                                  /*nb_inplace_add*/
2074      (binaryfunc)set_isub,               /*nb_inplace_subtract*/
2075      0,                                  /*nb_inplace_multiply*/
2076      0,                                  /*nb_inplace_remainder*/
2077      0,                                  /*nb_inplace_power*/
2078      0,                                  /*nb_inplace_lshift*/
2079      0,                                  /*nb_inplace_rshift*/
2080      (binaryfunc)set_iand,               /*nb_inplace_and*/
2081      (binaryfunc)set_ixor,               /*nb_inplace_xor*/
2082      (binaryfunc)set_ior,                /*nb_inplace_or*/
2083  };
2084  
2085  PyDoc_STRVAR(set_doc,
2086  "set() -> new empty set object\n\
2087  set(iterable) -> new set object\n\
2088  \n\
2089  Build an unordered collection of unique elements.");
2090  
2091  PyTypeObject PySet_Type = {
2092      PyVarObject_HEAD_INIT(&PyType_Type, 0)
2093      "set",                              /* tp_name */
2094      sizeof(PySetObject),                /* tp_basicsize */
2095      0,                                  /* tp_itemsize */
2096      /* methods */
2097      (destructor)set_dealloc,            /* tp_dealloc */
2098      0,                                  /* tp_vectorcall_offset */
2099      0,                                  /* tp_getattr */
2100      0,                                  /* tp_setattr */
2101      0,                                  /* tp_as_async */
2102      (reprfunc)set_repr,                 /* tp_repr */
2103      &set_as_number,                     /* tp_as_number */
2104      &set_as_sequence,                   /* tp_as_sequence */
2105      0,                                  /* tp_as_mapping */
2106      PyObject_HashNotImplemented,        /* tp_hash */
2107      0,                                  /* tp_call */
2108      0,                                  /* tp_str */
2109      PyObject_GenericGetAttr,            /* tp_getattro */
2110      0,                                  /* tp_setattro */
2111      0,                                  /* tp_as_buffer */
2112      Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2113          Py_TPFLAGS_BASETYPE,            /* tp_flags */
2114      set_doc,                            /* tp_doc */
2115      (traverseproc)set_traverse,         /* tp_traverse */
2116      (inquiry)set_clear_internal,        /* tp_clear */
2117      (richcmpfunc)set_richcompare,       /* tp_richcompare */
2118      offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2119      (getiterfunc)set_iter,              /* tp_iter */
2120      0,                                  /* tp_iternext */
2121      set_methods,                        /* tp_methods */
2122      0,                                  /* tp_members */
2123      0,                                  /* tp_getset */
2124      0,                                  /* tp_base */
2125      0,                                  /* tp_dict */
2126      0,                                  /* tp_descr_get */
2127      0,                                  /* tp_descr_set */
2128      0,                                  /* tp_dictoffset */
2129      (initproc)set_init,                 /* tp_init */
2130      PyType_GenericAlloc,                /* tp_alloc */
2131      set_new,                            /* tp_new */
2132      PyObject_GC_Del,                    /* tp_free */
2133      .tp_vectorcall = set_vectorcall,
2134  };
2135  
2136  /* frozenset object ********************************************************/
2137  
2138  
2139  static PyMethodDef frozenset_methods[] = {
2140      {"__contains__",(PyCFunction)set_direct_contains,           METH_O | METH_COEXIST,
2141       contains_doc},
2142      {"copy",            (PyCFunction)frozenset_copy,    METH_NOARGS,
2143       copy_doc},
2144      {"difference",      (PyCFunction)set_difference_multi,      METH_VARARGS,
2145       difference_doc},
2146      {"intersection",    (PyCFunction)set_intersection_multi,    METH_VARARGS,
2147       intersection_doc},
2148      {"isdisjoint",      (PyCFunction)set_isdisjoint,    METH_O,
2149       isdisjoint_doc},
2150      {"issubset",        (PyCFunction)set_issubset,      METH_O,
2151       issubset_doc},
2152      {"issuperset",      (PyCFunction)set_issuperset,    METH_O,
2153       issuperset_doc},
2154      {"__reduce__",      (PyCFunction)set_reduce,        METH_NOARGS,
2155       reduce_doc},
2156      {"__sizeof__",      (PyCFunction)set_sizeof,        METH_NOARGS,
2157       sizeof_doc},
2158      {"symmetric_difference",(PyCFunction)set_symmetric_difference,      METH_O,
2159       symmetric_difference_doc},
2160      {"union",           (PyCFunction)set_union,         METH_VARARGS,
2161       union_doc},
2162      {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2163      {NULL,              NULL}   /* sentinel */
2164  };
2165  
2166  static PyNumberMethods frozenset_as_number = {
2167      0,                                  /*nb_add*/
2168      (binaryfunc)set_sub,                /*nb_subtract*/
2169      0,                                  /*nb_multiply*/
2170      0,                                  /*nb_remainder*/
2171      0,                                  /*nb_divmod*/
2172      0,                                  /*nb_power*/
2173      0,                                  /*nb_negative*/
2174      0,                                  /*nb_positive*/
2175      0,                                  /*nb_absolute*/
2176      0,                                  /*nb_bool*/
2177      0,                                  /*nb_invert*/
2178      0,                                  /*nb_lshift*/
2179      0,                                  /*nb_rshift*/
2180      (binaryfunc)set_and,                /*nb_and*/
2181      (binaryfunc)set_xor,                /*nb_xor*/
2182      (binaryfunc)set_or,                 /*nb_or*/
2183  };
2184  
2185  PyDoc_STRVAR(frozenset_doc,
2186  "frozenset() -> empty frozenset object\n\
2187  frozenset(iterable) -> frozenset object\n\
2188  \n\
2189  Build an immutable unordered collection of unique elements.");
2190  
2191  PyTypeObject PyFrozenSet_Type = {
2192      PyVarObject_HEAD_INIT(&PyType_Type, 0)
2193      "frozenset",                        /* tp_name */
2194      sizeof(PySetObject),                /* tp_basicsize */
2195      0,                                  /* tp_itemsize */
2196      /* methods */
2197      (destructor)set_dealloc,            /* tp_dealloc */
2198      0,                                  /* tp_vectorcall_offset */
2199      0,                                  /* tp_getattr */
2200      0,                                  /* tp_setattr */
2201      0,                                  /* tp_as_async */
2202      (reprfunc)set_repr,                 /* tp_repr */
2203      &frozenset_as_number,               /* tp_as_number */
2204      &set_as_sequence,                   /* tp_as_sequence */
2205      0,                                  /* tp_as_mapping */
2206      frozenset_hash,                     /* tp_hash */
2207      0,                                  /* tp_call */
2208      0,                                  /* tp_str */
2209      PyObject_GenericGetAttr,            /* tp_getattro */
2210      0,                                  /* tp_setattro */
2211      0,                                  /* tp_as_buffer */
2212      Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2213          Py_TPFLAGS_BASETYPE,            /* tp_flags */
2214      frozenset_doc,                      /* tp_doc */
2215      (traverseproc)set_traverse,         /* tp_traverse */
2216      (inquiry)set_clear_internal,        /* tp_clear */
2217      (richcmpfunc)set_richcompare,       /* tp_richcompare */
2218      offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2219      (getiterfunc)set_iter,              /* tp_iter */
2220      0,                                  /* tp_iternext */
2221      frozenset_methods,                  /* tp_methods */
2222      0,                                  /* tp_members */
2223      0,                                  /* tp_getset */
2224      0,                                  /* tp_base */
2225      0,                                  /* tp_dict */
2226      0,                                  /* tp_descr_get */
2227      0,                                  /* tp_descr_set */
2228      0,                                  /* tp_dictoffset */
2229      0,                                  /* tp_init */
2230      PyType_GenericAlloc,                /* tp_alloc */
2231      frozenset_new,                      /* tp_new */
2232      PyObject_GC_Del,                    /* tp_free */
2233      .tp_vectorcall = frozenset_vectorcall,
2234  };
2235  
2236  
2237  /***** C API functions *************************************************/
2238  
2239  PyObject *
PySet_New(PyObject * iterable)2240  PySet_New(PyObject *iterable)
2241  {
2242      return make_new_set(&PySet_Type, iterable);
2243  }
2244  
2245  PyObject *
PyFrozenSet_New(PyObject * iterable)2246  PyFrozenSet_New(PyObject *iterable)
2247  {
2248      return make_new_set(&PyFrozenSet_Type, iterable);
2249  }
2250  
2251  Py_ssize_t
PySet_Size(PyObject * anyset)2252  PySet_Size(PyObject *anyset)
2253  {
2254      if (!PyAnySet_Check(anyset)) {
2255          PyErr_BadInternalCall();
2256          return -1;
2257      }
2258      return PySet_GET_SIZE(anyset);
2259  }
2260  
2261  int
PySet_Clear(PyObject * set)2262  PySet_Clear(PyObject *set)
2263  {
2264      if (!PySet_Check(set)) {
2265          PyErr_BadInternalCall();
2266          return -1;
2267      }
2268      return set_clear_internal((PySetObject *)set);
2269  }
2270  
2271  int
PySet_Contains(PyObject * anyset,PyObject * key)2272  PySet_Contains(PyObject *anyset, PyObject *key)
2273  {
2274      if (!PyAnySet_Check(anyset)) {
2275          PyErr_BadInternalCall();
2276          return -1;
2277      }
2278      return set_contains_key((PySetObject *)anyset, key);
2279  }
2280  
2281  int
PySet_Discard(PyObject * set,PyObject * key)2282  PySet_Discard(PyObject *set, PyObject *key)
2283  {
2284      if (!PySet_Check(set)) {
2285          PyErr_BadInternalCall();
2286          return -1;
2287      }
2288      return set_discard_key((PySetObject *)set, key);
2289  }
2290  
2291  int
PySet_Add(PyObject * anyset,PyObject * key)2292  PySet_Add(PyObject *anyset, PyObject *key)
2293  {
2294      if (!PySet_Check(anyset) &&
2295          (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2296          PyErr_BadInternalCall();
2297          return -1;
2298      }
2299      return set_add_key((PySetObject *)anyset, key);
2300  }
2301  
2302  void
_PySet_Fini(void)2303  _PySet_Fini(void)
2304  {
2305      Py_CLEAR(emptyfrozenset);
2306  }
2307  
2308  int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2309  _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2310  {
2311      setentry *entry;
2312  
2313      if (!PyAnySet_Check(set)) {
2314          PyErr_BadInternalCall();
2315          return -1;
2316      }
2317      if (set_next((PySetObject *)set, pos, &entry) == 0)
2318          return 0;
2319      *key = entry->key;
2320      *hash = entry->hash;
2321      return 1;
2322  }
2323  
2324  PyObject *
PySet_Pop(PyObject * set)2325  PySet_Pop(PyObject *set)
2326  {
2327      if (!PySet_Check(set)) {
2328          PyErr_BadInternalCall();
2329          return NULL;
2330      }
2331      return set_pop((PySetObject *)set, NULL);
2332  }
2333  
2334  int
_PySet_Update(PyObject * set,PyObject * iterable)2335  _PySet_Update(PyObject *set, PyObject *iterable)
2336  {
2337      if (!PySet_Check(set)) {
2338          PyErr_BadInternalCall();
2339          return -1;
2340      }
2341      return set_update_internal((PySetObject *)set, iterable);
2342  }
2343  
2344  /* Exported for the gdb plugin's benefit. */
2345  PyObject *_PySet_Dummy = dummy;
2346  
2347  #ifdef Py_DEBUG
2348  
2349  /* Test code to be called with any three element set.
2350     Returns True and original set is restored. */
2351  
2352  #define assertRaises(call_return_value, exception)              \
2353      do {                                                        \
2354          assert(call_return_value);                              \
2355          assert(PyErr_ExceptionMatches(exception));              \
2356          PyErr_Clear();                                          \
2357      } while(0)
2358  
2359  static PyObject *
test_c_api(PySetObject * so,PyObject * Py_UNUSED (ignored))2360  test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored))
2361  {
2362      Py_ssize_t count;
2363      const char *s;
2364      Py_ssize_t i;
2365      PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2366      PyObject *ob = (PyObject *)so;
2367      Py_hash_t hash;
2368      PyObject *str;
2369  
2370      /* Verify preconditions */
2371      assert(PyAnySet_Check(ob));
2372      assert(PyAnySet_CheckExact(ob));
2373      assert(!PyFrozenSet_CheckExact(ob));
2374  
2375      /* so.clear(); so |= set("abc"); */
2376      str = PyUnicode_FromString("abc");
2377      if (str == NULL)
2378          return NULL;
2379      set_clear_internal(so);
2380      if (set_update_internal(so, str)) {
2381          Py_DECREF(str);
2382          return NULL;
2383      }
2384      Py_DECREF(str);
2385  
2386      /* Exercise type/size checks */
2387      assert(PySet_Size(ob) == 3);
2388      assert(PySet_GET_SIZE(ob) == 3);
2389  
2390      /* Raise TypeError for non-iterable constructor arguments */
2391      assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2392      assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2393  
2394      /* Raise TypeError for unhashable key */
2395      dup = PySet_New(ob);
2396      assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2397      assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2398      assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2399  
2400      /* Exercise successful pop, contains, add, and discard */
2401      elem = PySet_Pop(ob);
2402      assert(PySet_Contains(ob, elem) == 0);
2403      assert(PySet_GET_SIZE(ob) == 2);
2404      assert(PySet_Add(ob, elem) == 0);
2405      assert(PySet_Contains(ob, elem) == 1);
2406      assert(PySet_GET_SIZE(ob) == 3);
2407      assert(PySet_Discard(ob, elem) == 1);
2408      assert(PySet_GET_SIZE(ob) == 2);
2409      assert(PySet_Discard(ob, elem) == 0);
2410      assert(PySet_GET_SIZE(ob) == 2);
2411  
2412      /* Exercise clear */
2413      dup2 = PySet_New(dup);
2414      assert(PySet_Clear(dup2) == 0);
2415      assert(PySet_Size(dup2) == 0);
2416      Py_DECREF(dup2);
2417  
2418      /* Raise SystemError on clear or update of frozen set */
2419      f = PyFrozenSet_New(dup);
2420      assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2421      assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2422      assert(PySet_Add(f, elem) == 0);
2423      Py_INCREF(f);
2424      assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2425      Py_DECREF(f);
2426      Py_DECREF(f);
2427  
2428      /* Exercise direct iteration */
2429      i = 0, count = 0;
2430      while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2431          s = PyUnicode_AsUTF8(x);
2432          assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2433          count++;
2434      }
2435      assert(count == 3);
2436  
2437      /* Exercise updates */
2438      dup2 = PySet_New(NULL);
2439      assert(_PySet_Update(dup2, dup) == 0);
2440      assert(PySet_Size(dup2) == 3);
2441      assert(_PySet_Update(dup2, dup) == 0);
2442      assert(PySet_Size(dup2) == 3);
2443      Py_DECREF(dup2);
2444  
2445      /* Raise SystemError when self argument is not a set or frozenset. */
2446      t = PyTuple_New(0);
2447      assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2448      assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2449      Py_DECREF(t);
2450  
2451      /* Raise SystemError when self argument is not a set. */
2452      f = PyFrozenSet_New(dup);
2453      assert(PySet_Size(f) == 3);
2454      assert(PyFrozenSet_CheckExact(f));
2455      assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2456      assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2457      Py_DECREF(f);
2458  
2459      /* Raise KeyError when popping from an empty set */
2460      assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2461      Py_DECREF(ob);
2462      assert(PySet_GET_SIZE(ob) == 0);
2463      assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2464  
2465      /* Restore the set from the copy using the PyNumber API */
2466      assert(PyNumber_InPlaceOr(ob, dup) == ob);
2467      Py_DECREF(ob);
2468  
2469      /* Verify constructors accept NULL arguments */
2470      f = PySet_New(NULL);
2471      assert(f != NULL);
2472      assert(PySet_GET_SIZE(f) == 0);
2473      Py_DECREF(f);
2474      f = PyFrozenSet_New(NULL);
2475      assert(f != NULL);
2476      assert(PyFrozenSet_CheckExact(f));
2477      assert(PySet_GET_SIZE(f) == 0);
2478      Py_DECREF(f);
2479  
2480      Py_DECREF(elem);
2481      Py_DECREF(dup);
2482      Py_RETURN_TRUE;
2483  }
2484  
2485  #undef assertRaises
2486  
2487  #endif
2488  
2489  /***** Dummy Struct  *************************************************/
2490  
2491  static PyObject *
dummy_repr(PyObject * op)2492  dummy_repr(PyObject *op)
2493  {
2494      return PyUnicode_FromString("<dummy key>");
2495  }
2496  
2497  static void _Py_NO_RETURN
dummy_dealloc(PyObject * ignore)2498  dummy_dealloc(PyObject* ignore)
2499  {
2500      Py_FatalError("deallocating <dummy key>");
2501  }
2502  
2503  static PyTypeObject _PySetDummy_Type = {
2504      PyVarObject_HEAD_INIT(&PyType_Type, 0)
2505      "<dummy key> type",
2506      0,
2507      0,
2508      dummy_dealloc,      /*tp_dealloc*/ /*never called*/
2509      0,                  /*tp_vectorcall_offset*/
2510      0,                  /*tp_getattr*/
2511      0,                  /*tp_setattr*/
2512      0,                  /*tp_as_async*/
2513      dummy_repr,         /*tp_repr*/
2514      0,                  /*tp_as_number*/
2515      0,                  /*tp_as_sequence*/
2516      0,                  /*tp_as_mapping*/
2517      0,                  /*tp_hash */
2518      0,                  /*tp_call */
2519      0,                  /*tp_str */
2520      0,                  /*tp_getattro */
2521      0,                  /*tp_setattro */
2522      0,                  /*tp_as_buffer */
2523      Py_TPFLAGS_DEFAULT, /*tp_flags */
2524  };
2525  
2526  static PyObject _dummy_struct = {
2527    _PyObject_EXTRA_INIT
2528    2, &_PySetDummy_Type
2529  };
2530  
2531