1
2 /* set object implementation
3
4 Written and maintained by Raymond D. Hettinger <python@rcn.com>
5 Derived from Lib/sets.py and Objects/dictobject.c.
6
7 The basic lookup function used by all operations.
8 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9
10 The initial probe index is computed as hash mod the table size.
11 Subsequent probe indices are computed as explained in Objects/dictobject.c.
12
13 To improve cache locality, each probe inspects a series of consecutive
14 nearby entries before moving on to probes elsewhere in memory. This leaves
15 us with a hybrid of linear probing and randomized probing. The linear probing
16 reduces the cost of hash collisions because consecutive memory accesses
17 tend to be much cheaper than scattered probes. After LINEAR_PROBES steps,
18 we then use more of the upper bits from the hash value and apply a simple
19 linear congruential random number generator. This helps break-up long
20 chains of collisions.
21
22 All arithmetic on hash should ignore overflow.
23
24 Unlike the dictionary implementation, the lookkey function can return
25 NULL if the rich comparison returns an error.
26
27 Use cases for sets differ considerably from dictionaries where looked-up
28 keys are more likely to be present. In contrast, sets are primarily
29 about membership testing where the presence of an element is not known in
30 advance. Accordingly, the set implementation needs to optimize for both
31 the found and not-found case.
32 */
33
34 #include "Python.h"
35 #include "pycore_object.h" // _PyObject_GC_UNTRACK()
36 #include <stddef.h> // offsetof()
37
38 /* Object used as dummy key to fill deleted entries */
39 static PyObject _dummy_struct;
40
41 #define dummy (&_dummy_struct)
42
43
44 /* ======================================================================== */
45 /* ======= Begin logic for probing the hash table ========================= */
46
47 /* Set this to zero to turn-off linear probing */
48 #ifndef LINEAR_PROBES
49 #define LINEAR_PROBES 9
50 #endif
51
52 /* This must be >= 1 */
53 #define PERTURB_SHIFT 5
54
55 static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)56 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
57 {
58 setentry *table;
59 setentry *entry;
60 size_t perturb = hash;
61 size_t mask = so->mask;
62 size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
63 int probes;
64 int cmp;
65
66 while (1) {
67 entry = &so->table[i];
68 probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
69 do {
70 if (entry->hash == 0 && entry->key == NULL)
71 return entry;
72 if (entry->hash == hash) {
73 PyObject *startkey = entry->key;
74 assert(startkey != dummy);
75 if (startkey == key)
76 return entry;
77 if (PyUnicode_CheckExact(startkey)
78 && PyUnicode_CheckExact(key)
79 && _PyUnicode_EQ(startkey, key))
80 return entry;
81 table = so->table;
82 Py_INCREF(startkey);
83 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
84 Py_DECREF(startkey);
85 if (cmp < 0)
86 return NULL;
87 if (table != so->table || entry->key != startkey)
88 return set_lookkey(so, key, hash);
89 if (cmp > 0)
90 return entry;
91 mask = so->mask;
92 }
93 entry++;
94 } while (probes--);
95 perturb >>= PERTURB_SHIFT;
96 i = (i * 5 + 1 + perturb) & mask;
97 }
98 }
99
100 static int set_table_resize(PySetObject *, Py_ssize_t);
101
102 static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)103 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
104 {
105 setentry *table;
106 setentry *freeslot;
107 setentry *entry;
108 size_t perturb;
109 size_t mask;
110 size_t i; /* Unsigned for defined overflow behavior */
111 int probes;
112 int cmp;
113
114 /* Pre-increment is necessary to prevent arbitrary code in the rich
115 comparison from deallocating the key just before the insertion. */
116 Py_INCREF(key);
117
118 restart:
119
120 mask = so->mask;
121 i = (size_t)hash & mask;
122 freeslot = NULL;
123 perturb = hash;
124
125 while (1) {
126 entry = &so->table[i];
127 probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
128 do {
129 if (entry->hash == 0 && entry->key == NULL)
130 goto found_unused_or_dummy;
131 if (entry->hash == hash) {
132 PyObject *startkey = entry->key;
133 assert(startkey != dummy);
134 if (startkey == key)
135 goto found_active;
136 if (PyUnicode_CheckExact(startkey)
137 && PyUnicode_CheckExact(key)
138 && _PyUnicode_EQ(startkey, key))
139 goto found_active;
140 table = so->table;
141 Py_INCREF(startkey);
142 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
143 Py_DECREF(startkey);
144 if (cmp > 0)
145 goto found_active;
146 if (cmp < 0)
147 goto comparison_error;
148 if (table != so->table || entry->key != startkey)
149 goto restart;
150 mask = so->mask;
151 }
152 else if (entry->hash == -1) {
153 assert (entry->key == dummy);
154 freeslot = entry;
155 }
156 entry++;
157 } while (probes--);
158 perturb >>= PERTURB_SHIFT;
159 i = (i * 5 + 1 + perturb) & mask;
160 }
161
162 found_unused_or_dummy:
163 if (freeslot == NULL)
164 goto found_unused;
165 so->used++;
166 freeslot->key = key;
167 freeslot->hash = hash;
168 return 0;
169
170 found_unused:
171 so->fill++;
172 so->used++;
173 entry->key = key;
174 entry->hash = hash;
175 if ((size_t)so->fill*5 < mask*3)
176 return 0;
177 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
178
179 found_active:
180 Py_DECREF(key);
181 return 0;
182
183 comparison_error:
184 Py_DECREF(key);
185 return -1;
186 }
187
188 /*
189 Internal routine used by set_table_resize() to insert an item which is
190 known to be absent from the set. Besides the performance benefit,
191 there is also safety benefit since using set_add_entry() risks making
192 a callback in the middle of a set_table_resize(), see issue 1456209.
193 The caller is responsible for updating the key's reference count and
194 the setobject's fill and used fields.
195 */
196 static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)197 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
198 {
199 setentry *entry;
200 size_t perturb = hash;
201 size_t i = (size_t)hash & mask;
202 size_t j;
203
204 while (1) {
205 entry = &table[i];
206 if (entry->key == NULL)
207 goto found_null;
208 if (i + LINEAR_PROBES <= mask) {
209 for (j = 0; j < LINEAR_PROBES; j++) {
210 entry++;
211 if (entry->key == NULL)
212 goto found_null;
213 }
214 }
215 perturb >>= PERTURB_SHIFT;
216 i = (i * 5 + 1 + perturb) & mask;
217 }
218 found_null:
219 entry->key = key;
220 entry->hash = hash;
221 }
222
223 /* ======== End logic for probing the hash table ========================== */
224 /* ======================================================================== */
225
226 /*
227 Restructure the table by allocating a new table and reinserting all
228 keys again. When entries have been deleted, the new table may
229 actually be smaller than the old one.
230 */
231 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)232 set_table_resize(PySetObject *so, Py_ssize_t minused)
233 {
234 setentry *oldtable, *newtable, *entry;
235 Py_ssize_t oldmask = so->mask;
236 size_t newmask;
237 int is_oldtable_malloced;
238 setentry small_copy[PySet_MINSIZE];
239
240 assert(minused >= 0);
241
242 /* Find the smallest table size > minused. */
243 /* XXX speed-up with intrinsics */
244 size_t newsize = PySet_MINSIZE;
245 while (newsize <= (size_t)minused) {
246 newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
247 }
248
249 /* Get space for a new table. */
250 oldtable = so->table;
251 assert(oldtable != NULL);
252 is_oldtable_malloced = oldtable != so->smalltable;
253
254 if (newsize == PySet_MINSIZE) {
255 /* A large table is shrinking, or we can't get any smaller. */
256 newtable = so->smalltable;
257 if (newtable == oldtable) {
258 if (so->fill == so->used) {
259 /* No dummies, so no point doing anything. */
260 return 0;
261 }
262 /* We're not going to resize it, but rebuild the
263 table anyway to purge old dummy entries.
264 Subtle: This is *necessary* if fill==size,
265 as set_lookkey needs at least one virgin slot to
266 terminate failing searches. If fill < size, it's
267 merely desirable, as dummies slow searches. */
268 assert(so->fill > so->used);
269 memcpy(small_copy, oldtable, sizeof(small_copy));
270 oldtable = small_copy;
271 }
272 }
273 else {
274 newtable = PyMem_NEW(setentry, newsize);
275 if (newtable == NULL) {
276 PyErr_NoMemory();
277 return -1;
278 }
279 }
280
281 /* Make the set empty, using the new table. */
282 assert(newtable != oldtable);
283 memset(newtable, 0, sizeof(setentry) * newsize);
284 so->mask = newsize - 1;
285 so->table = newtable;
286
287 /* Copy the data over; this is refcount-neutral for active entries;
288 dummy entries aren't copied over, of course */
289 newmask = (size_t)so->mask;
290 if (so->fill == so->used) {
291 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
292 if (entry->key != NULL) {
293 set_insert_clean(newtable, newmask, entry->key, entry->hash);
294 }
295 }
296 } else {
297 so->fill = so->used;
298 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
299 if (entry->key != NULL && entry->key != dummy) {
300 set_insert_clean(newtable, newmask, entry->key, entry->hash);
301 }
302 }
303 }
304
305 if (is_oldtable_malloced)
306 PyMem_Free(oldtable);
307 return 0;
308 }
309
310 static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)311 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
312 {
313 setentry *entry;
314
315 entry = set_lookkey(so, key, hash);
316 if (entry != NULL)
317 return entry->key != NULL;
318 return -1;
319 }
320
321 #define DISCARD_NOTFOUND 0
322 #define DISCARD_FOUND 1
323
324 static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)325 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
326 {
327 setentry *entry;
328 PyObject *old_key;
329
330 entry = set_lookkey(so, key, hash);
331 if (entry == NULL)
332 return -1;
333 if (entry->key == NULL)
334 return DISCARD_NOTFOUND;
335 old_key = entry->key;
336 entry->key = dummy;
337 entry->hash = -1;
338 so->used--;
339 Py_DECREF(old_key);
340 return DISCARD_FOUND;
341 }
342
343 static int
set_add_key(PySetObject * so,PyObject * key)344 set_add_key(PySetObject *so, PyObject *key)
345 {
346 Py_hash_t hash;
347
348 if (!PyUnicode_CheckExact(key) ||
349 (hash = ((PyASCIIObject *) key)->hash) == -1) {
350 hash = PyObject_Hash(key);
351 if (hash == -1)
352 return -1;
353 }
354 return set_add_entry(so, key, hash);
355 }
356
357 static int
set_contains_key(PySetObject * so,PyObject * key)358 set_contains_key(PySetObject *so, PyObject *key)
359 {
360 Py_hash_t hash;
361
362 if (!PyUnicode_CheckExact(key) ||
363 (hash = ((PyASCIIObject *) key)->hash) == -1) {
364 hash = PyObject_Hash(key);
365 if (hash == -1)
366 return -1;
367 }
368 return set_contains_entry(so, key, hash);
369 }
370
371 static int
set_discard_key(PySetObject * so,PyObject * key)372 set_discard_key(PySetObject *so, PyObject *key)
373 {
374 Py_hash_t hash;
375
376 if (!PyUnicode_CheckExact(key) ||
377 (hash = ((PyASCIIObject *) key)->hash) == -1) {
378 hash = PyObject_Hash(key);
379 if (hash == -1)
380 return -1;
381 }
382 return set_discard_entry(so, key, hash);
383 }
384
385 static void
set_empty_to_minsize(PySetObject * so)386 set_empty_to_minsize(PySetObject *so)
387 {
388 memset(so->smalltable, 0, sizeof(so->smalltable));
389 so->fill = 0;
390 so->used = 0;
391 so->mask = PySet_MINSIZE - 1;
392 so->table = so->smalltable;
393 so->hash = -1;
394 }
395
396 static int
set_clear_internal(PySetObject * so)397 set_clear_internal(PySetObject *so)
398 {
399 setentry *entry;
400 setentry *table = so->table;
401 Py_ssize_t fill = so->fill;
402 Py_ssize_t used = so->used;
403 int table_is_malloced = table != so->smalltable;
404 setentry small_copy[PySet_MINSIZE];
405
406 assert (PyAnySet_Check(so));
407 assert(table != NULL);
408
409 /* This is delicate. During the process of clearing the set,
410 * decrefs can cause the set to mutate. To avoid fatal confusion
411 * (voice of experience), we have to make the set empty before
412 * clearing the slots, and never refer to anything via so->ref while
413 * clearing.
414 */
415 if (table_is_malloced)
416 set_empty_to_minsize(so);
417
418 else if (fill > 0) {
419 /* It's a small table with something that needs to be cleared.
420 * Afraid the only safe way is to copy the set entries into
421 * another small table first.
422 */
423 memcpy(small_copy, table, sizeof(small_copy));
424 table = small_copy;
425 set_empty_to_minsize(so);
426 }
427 /* else it's a small table that's already empty */
428
429 /* Now we can finally clear things. If C had refcounts, we could
430 * assert that the refcount on table is 1 now, i.e. that this function
431 * has unique access to it, so decref side-effects can't alter it.
432 */
433 for (entry = table; used > 0; entry++) {
434 if (entry->key && entry->key != dummy) {
435 used--;
436 Py_DECREF(entry->key);
437 }
438 }
439
440 if (table_is_malloced)
441 PyMem_Free(table);
442 return 0;
443 }
444
445 /*
446 * Iterate over a set table. Use like so:
447 *
448 * Py_ssize_t pos;
449 * setentry *entry;
450 * pos = 0; # important! pos should not otherwise be changed by you
451 * while (set_next(yourset, &pos, &entry)) {
452 * Refer to borrowed reference in entry->key.
453 * }
454 *
455 * CAUTION: In general, it isn't safe to use set_next in a loop that
456 * mutates the table.
457 */
458 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)459 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
460 {
461 Py_ssize_t i;
462 Py_ssize_t mask;
463 setentry *entry;
464
465 assert (PyAnySet_Check(so));
466 i = *pos_ptr;
467 assert(i >= 0);
468 mask = so->mask;
469 entry = &so->table[i];
470 while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
471 i++;
472 entry++;
473 }
474 *pos_ptr = i+1;
475 if (i > mask)
476 return 0;
477 assert(entry != NULL);
478 *entry_ptr = entry;
479 return 1;
480 }
481
482 static void
set_dealloc(PySetObject * so)483 set_dealloc(PySetObject *so)
484 {
485 setentry *entry;
486 Py_ssize_t used = so->used;
487
488 /* bpo-31095: UnTrack is needed before calling any callbacks */
489 PyObject_GC_UnTrack(so);
490 Py_TRASHCAN_BEGIN(so, set_dealloc)
491 if (so->weakreflist != NULL)
492 PyObject_ClearWeakRefs((PyObject *) so);
493
494 for (entry = so->table; used > 0; entry++) {
495 if (entry->key && entry->key != dummy) {
496 used--;
497 Py_DECREF(entry->key);
498 }
499 }
500 if (so->table != so->smalltable)
501 PyMem_Free(so->table);
502 Py_TYPE(so)->tp_free(so);
503 Py_TRASHCAN_END
504 }
505
506 static PyObject *
set_repr(PySetObject * so)507 set_repr(PySetObject *so)
508 {
509 PyObject *result=NULL, *keys, *listrepr, *tmp;
510 int status = Py_ReprEnter((PyObject*)so);
511
512 if (status != 0) {
513 if (status < 0)
514 return NULL;
515 return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
516 }
517
518 /* shortcut for the empty set */
519 if (!so->used) {
520 Py_ReprLeave((PyObject*)so);
521 return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
522 }
523
524 keys = PySequence_List((PyObject *)so);
525 if (keys == NULL)
526 goto done;
527
528 /* repr(keys)[1:-1] */
529 listrepr = PyObject_Repr(keys);
530 Py_DECREF(keys);
531 if (listrepr == NULL)
532 goto done;
533 tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
534 Py_DECREF(listrepr);
535 if (tmp == NULL)
536 goto done;
537 listrepr = tmp;
538
539 if (!PySet_CheckExact(so))
540 result = PyUnicode_FromFormat("%s({%U})",
541 Py_TYPE(so)->tp_name,
542 listrepr);
543 else
544 result = PyUnicode_FromFormat("{%U}", listrepr);
545 Py_DECREF(listrepr);
546 done:
547 Py_ReprLeave((PyObject*)so);
548 return result;
549 }
550
551 static Py_ssize_t
set_len(PyObject * so)552 set_len(PyObject *so)
553 {
554 return ((PySetObject *)so)->used;
555 }
556
557 static int
set_merge(PySetObject * so,PyObject * otherset)558 set_merge(PySetObject *so, PyObject *otherset)
559 {
560 PySetObject *other;
561 PyObject *key;
562 Py_ssize_t i;
563 setentry *so_entry;
564 setentry *other_entry;
565
566 assert (PyAnySet_Check(so));
567 assert (PyAnySet_Check(otherset));
568
569 other = (PySetObject*)otherset;
570 if (other == so || other->used == 0)
571 /* a.update(a) or a.update(set()); nothing to do */
572 return 0;
573 /* Do one big resize at the start, rather than
574 * incrementally resizing as we insert new keys. Expect
575 * that there will be no (or few) overlapping keys.
576 */
577 if ((so->fill + other->used)*5 >= so->mask*3) {
578 if (set_table_resize(so, (so->used + other->used)*2) != 0)
579 return -1;
580 }
581 so_entry = so->table;
582 other_entry = other->table;
583
584 /* If our table is empty, and both tables have the same size, and
585 there are no dummies to eliminate, then just copy the pointers. */
586 if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
587 for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
588 key = other_entry->key;
589 if (key != NULL) {
590 assert(so_entry->key == NULL);
591 Py_INCREF(key);
592 so_entry->key = key;
593 so_entry->hash = other_entry->hash;
594 }
595 }
596 so->fill = other->fill;
597 so->used = other->used;
598 return 0;
599 }
600
601 /* If our table is empty, we can use set_insert_clean() */
602 if (so->fill == 0) {
603 setentry *newtable = so->table;
604 size_t newmask = (size_t)so->mask;
605 so->fill = other->used;
606 so->used = other->used;
607 for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
608 key = other_entry->key;
609 if (key != NULL && key != dummy) {
610 Py_INCREF(key);
611 set_insert_clean(newtable, newmask, key, other_entry->hash);
612 }
613 }
614 return 0;
615 }
616
617 /* We can't assure there are no duplicates, so do normal insertions */
618 for (i = 0; i <= other->mask; i++) {
619 other_entry = &other->table[i];
620 key = other_entry->key;
621 if (key != NULL && key != dummy) {
622 if (set_add_entry(so, key, other_entry->hash))
623 return -1;
624 }
625 }
626 return 0;
627 }
628
629 static PyObject *
set_pop(PySetObject * so,PyObject * Py_UNUSED (ignored))630 set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored))
631 {
632 /* Make sure the search finger is in bounds */
633 setentry *entry = so->table + (so->finger & so->mask);
634 setentry *limit = so->table + so->mask;
635 PyObject *key;
636
637 if (so->used == 0) {
638 PyErr_SetString(PyExc_KeyError, "pop from an empty set");
639 return NULL;
640 }
641 while (entry->key == NULL || entry->key==dummy) {
642 entry++;
643 if (entry > limit)
644 entry = so->table;
645 }
646 key = entry->key;
647 entry->key = dummy;
648 entry->hash = -1;
649 so->used--;
650 so->finger = entry - so->table + 1; /* next place to start */
651 return key;
652 }
653
654 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
655 Raises KeyError if the set is empty.");
656
657 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)658 set_traverse(PySetObject *so, visitproc visit, void *arg)
659 {
660 Py_ssize_t pos = 0;
661 setentry *entry;
662
663 while (set_next(so, &pos, &entry))
664 Py_VISIT(entry->key);
665 return 0;
666 }
667
668 /* Work to increase the bit dispersion for closely spaced hash values.
669 This is important because some use cases have many combinations of a
670 small number of elements with nearby hashes so that many distinct
671 combinations collapse to only a handful of distinct hash values. */
672
673 static Py_uhash_t
_shuffle_bits(Py_uhash_t h)674 _shuffle_bits(Py_uhash_t h)
675 {
676 return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
677 }
678
679 /* Most of the constants in this hash algorithm are randomly chosen
680 large primes with "interesting bit patterns" and that passed tests
681 for good collision statistics on a variety of problematic datasets
682 including powersets and graph structures (such as David Eppstein's
683 graph recipes in Lib/test/test_set.py) */
684
685 static Py_hash_t
frozenset_hash(PyObject * self)686 frozenset_hash(PyObject *self)
687 {
688 PySetObject *so = (PySetObject *)self;
689 Py_uhash_t hash = 0;
690 setentry *entry;
691
692 if (so->hash != -1)
693 return so->hash;
694
695 /* Xor-in shuffled bits from every entry's hash field because xor is
696 commutative and a frozenset hash should be independent of order.
697
698 For speed, include null entries and dummy entries and then
699 subtract out their effect afterwards so that the final hash
700 depends only on active entries. This allows the code to be
701 vectorized by the compiler and it saves the unpredictable
702 branches that would arise when trying to exclude null and dummy
703 entries on every iteration. */
704
705 for (entry = so->table; entry <= &so->table[so->mask]; entry++)
706 hash ^= _shuffle_bits(entry->hash);
707
708 /* Remove the effect of an odd number of NULL entries */
709 if ((so->mask + 1 - so->fill) & 1)
710 hash ^= _shuffle_bits(0);
711
712 /* Remove the effect of an odd number of dummy entries */
713 if ((so->fill - so->used) & 1)
714 hash ^= _shuffle_bits(-1);
715
716 /* Factor in the number of active entries */
717 hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
718
719 /* Disperse patterns arising in nested frozensets */
720 hash ^= (hash >> 11) ^ (hash >> 25);
721 hash = hash * 69069U + 907133923UL;
722
723 /* -1 is reserved as an error code */
724 if (hash == (Py_uhash_t)-1)
725 hash = 590923713UL;
726
727 so->hash = hash;
728 return hash;
729 }
730
731 /***** Set iterator type ***********************************************/
732
733 typedef struct {
734 PyObject_HEAD
735 PySetObject *si_set; /* Set to NULL when iterator is exhausted */
736 Py_ssize_t si_used;
737 Py_ssize_t si_pos;
738 Py_ssize_t len;
739 } setiterobject;
740
741 static void
setiter_dealloc(setiterobject * si)742 setiter_dealloc(setiterobject *si)
743 {
744 /* bpo-31095: UnTrack is needed before calling any callbacks */
745 _PyObject_GC_UNTRACK(si);
746 Py_XDECREF(si->si_set);
747 PyObject_GC_Del(si);
748 }
749
750 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)751 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
752 {
753 Py_VISIT(si->si_set);
754 return 0;
755 }
756
757 static PyObject *
setiter_len(setiterobject * si,PyObject * Py_UNUSED (ignored))758 setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored))
759 {
760 Py_ssize_t len = 0;
761 if (si->si_set != NULL && si->si_used == si->si_set->used)
762 len = si->len;
763 return PyLong_FromSsize_t(len);
764 }
765
766 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
767
768 static PyObject *setiter_iternext(setiterobject *si);
769
770 static PyObject *
setiter_reduce(setiterobject * si,PyObject * Py_UNUSED (ignored))771 setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored))
772 {
773 _Py_IDENTIFIER(iter);
774 /* copy the iterator state */
775 setiterobject tmp = *si;
776 Py_XINCREF(tmp.si_set);
777
778 /* iterate the temporary into a list */
779 PyObject *list = PySequence_List((PyObject*)&tmp);
780 Py_XDECREF(tmp.si_set);
781 if (list == NULL) {
782 return NULL;
783 }
784 return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), list);
785 }
786
787 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
788
789 static PyMethodDef setiter_methods[] = {
790 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
791 {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
792 {NULL, NULL} /* sentinel */
793 };
794
setiter_iternext(setiterobject * si)795 static PyObject *setiter_iternext(setiterobject *si)
796 {
797 PyObject *key;
798 Py_ssize_t i, mask;
799 setentry *entry;
800 PySetObject *so = si->si_set;
801
802 if (so == NULL)
803 return NULL;
804 assert (PyAnySet_Check(so));
805
806 if (si->si_used != so->used) {
807 PyErr_SetString(PyExc_RuntimeError,
808 "Set changed size during iteration");
809 si->si_used = -1; /* Make this state sticky */
810 return NULL;
811 }
812
813 i = si->si_pos;
814 assert(i>=0);
815 entry = so->table;
816 mask = so->mask;
817 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
818 i++;
819 si->si_pos = i+1;
820 if (i > mask)
821 goto fail;
822 si->len--;
823 key = entry[i].key;
824 Py_INCREF(key);
825 return key;
826
827 fail:
828 si->si_set = NULL;
829 Py_DECREF(so);
830 return NULL;
831 }
832
833 PyTypeObject PySetIter_Type = {
834 PyVarObject_HEAD_INIT(&PyType_Type, 0)
835 "set_iterator", /* tp_name */
836 sizeof(setiterobject), /* tp_basicsize */
837 0, /* tp_itemsize */
838 /* methods */
839 (destructor)setiter_dealloc, /* tp_dealloc */
840 0, /* tp_vectorcall_offset */
841 0, /* tp_getattr */
842 0, /* tp_setattr */
843 0, /* tp_as_async */
844 0, /* tp_repr */
845 0, /* tp_as_number */
846 0, /* tp_as_sequence */
847 0, /* tp_as_mapping */
848 0, /* tp_hash */
849 0, /* tp_call */
850 0, /* tp_str */
851 PyObject_GenericGetAttr, /* tp_getattro */
852 0, /* tp_setattro */
853 0, /* tp_as_buffer */
854 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
855 0, /* tp_doc */
856 (traverseproc)setiter_traverse, /* tp_traverse */
857 0, /* tp_clear */
858 0, /* tp_richcompare */
859 0, /* tp_weaklistoffset */
860 PyObject_SelfIter, /* tp_iter */
861 (iternextfunc)setiter_iternext, /* tp_iternext */
862 setiter_methods, /* tp_methods */
863 0,
864 };
865
866 static PyObject *
set_iter(PySetObject * so)867 set_iter(PySetObject *so)
868 {
869 setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
870 if (si == NULL)
871 return NULL;
872 Py_INCREF(so);
873 si->si_set = so;
874 si->si_used = so->used;
875 si->si_pos = 0;
876 si->len = so->used;
877 _PyObject_GC_TRACK(si);
878 return (PyObject *)si;
879 }
880
881 static int
set_update_internal(PySetObject * so,PyObject * other)882 set_update_internal(PySetObject *so, PyObject *other)
883 {
884 PyObject *key, *it;
885
886 if (PyAnySet_Check(other))
887 return set_merge(so, other);
888
889 if (PyDict_CheckExact(other)) {
890 PyObject *value;
891 Py_ssize_t pos = 0;
892 Py_hash_t hash;
893 Py_ssize_t dictsize = PyDict_GET_SIZE(other);
894
895 /* Do one big resize at the start, rather than
896 * incrementally resizing as we insert new keys. Expect
897 * that there will be no (or few) overlapping keys.
898 */
899 if (dictsize < 0)
900 return -1;
901 if ((so->fill + dictsize)*5 >= so->mask*3) {
902 if (set_table_resize(so, (so->used + dictsize)*2) != 0)
903 return -1;
904 }
905 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
906 if (set_add_entry(so, key, hash))
907 return -1;
908 }
909 return 0;
910 }
911
912 it = PyObject_GetIter(other);
913 if (it == NULL)
914 return -1;
915
916 while ((key = PyIter_Next(it)) != NULL) {
917 if (set_add_key(so, key)) {
918 Py_DECREF(it);
919 Py_DECREF(key);
920 return -1;
921 }
922 Py_DECREF(key);
923 }
924 Py_DECREF(it);
925 if (PyErr_Occurred())
926 return -1;
927 return 0;
928 }
929
930 static PyObject *
set_update(PySetObject * so,PyObject * args)931 set_update(PySetObject *so, PyObject *args)
932 {
933 Py_ssize_t i;
934
935 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
936 PyObject *other = PyTuple_GET_ITEM(args, i);
937 if (set_update_internal(so, other))
938 return NULL;
939 }
940 Py_RETURN_NONE;
941 }
942
943 PyDoc_STRVAR(update_doc,
944 "Update a set with the union of itself and others.");
945
946 /* XXX Todo:
947 If aligned memory allocations become available, make the
948 set object 64 byte aligned so that most of the fields
949 can be retrieved or updated in a single cache line.
950 */
951
952 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)953 make_new_set(PyTypeObject *type, PyObject *iterable)
954 {
955 assert(PyType_Check(type));
956 PySetObject *so;
957
958 so = (PySetObject *)type->tp_alloc(type, 0);
959 if (so == NULL)
960 return NULL;
961
962 so->fill = 0;
963 so->used = 0;
964 so->mask = PySet_MINSIZE - 1;
965 so->table = so->smalltable;
966 so->hash = -1;
967 so->finger = 0;
968 so->weakreflist = NULL;
969
970 if (iterable != NULL) {
971 if (set_update_internal(so, iterable)) {
972 Py_DECREF(so);
973 return NULL;
974 }
975 }
976
977 return (PyObject *)so;
978 }
979
980 static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)981 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
982 {
983 if (type != &PySet_Type && type != &PyFrozenSet_Type) {
984 if (PyType_IsSubtype(type, &PySet_Type))
985 type = &PySet_Type;
986 else
987 type = &PyFrozenSet_Type;
988 }
989 return make_new_set(type, iterable);
990 }
991
992 static PyObject *
make_new_frozenset(PyTypeObject * type,PyObject * iterable)993 make_new_frozenset(PyTypeObject *type, PyObject *iterable)
994 {
995 if (type != &PyFrozenSet_Type) {
996 return make_new_set(type, iterable);
997 }
998
999 if (iterable != NULL && PyFrozenSet_CheckExact(iterable)) {
1000 /* frozenset(f) is idempotent */
1001 Py_INCREF(iterable);
1002 return iterable;
1003 }
1004 return make_new_set((PyTypeObject *)type, iterable);
1005 }
1006
1007 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1008 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1009 {
1010 PyObject *iterable = NULL;
1011
1012 if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset", kwds)) {
1013 return NULL;
1014 }
1015
1016 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) {
1017 return NULL;
1018 }
1019
1020 return make_new_frozenset(type, iterable);
1021 }
1022
1023 static PyObject *
frozenset_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1024 frozenset_vectorcall(PyObject *type, PyObject * const*args,
1025 size_t nargsf, PyObject *kwnames)
1026 {
1027 if (!_PyArg_NoKwnames("frozenset", kwnames)) {
1028 return NULL;
1029 }
1030
1031 Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1032 if (!_PyArg_CheckPositional("frozenset", nargs, 0, 1)) {
1033 return NULL;
1034 }
1035
1036 PyObject *iterable = (nargs ? args[0] : NULL);
1037 return make_new_frozenset((PyTypeObject *)type, iterable);
1038 }
1039
1040 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1041 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1042 {
1043 return make_new_set(type, NULL);
1044 }
1045
1046 /* set_swap_bodies() switches the contents of any two sets by moving their
1047 internal data pointers and, if needed, copying the internal smalltables.
1048 Semantically equivalent to:
1049
1050 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1051
1052 The function always succeeds and it leaves both objects in a stable state.
1053 Useful for operations that update in-place (by allowing an intermediate
1054 result to be swapped into one of the original inputs).
1055 */
1056
1057 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1058 set_swap_bodies(PySetObject *a, PySetObject *b)
1059 {
1060 Py_ssize_t t;
1061 setentry *u;
1062 setentry tab[PySet_MINSIZE];
1063 Py_hash_t h;
1064
1065 t = a->fill; a->fill = b->fill; b->fill = t;
1066 t = a->used; a->used = b->used; b->used = t;
1067 t = a->mask; a->mask = b->mask; b->mask = t;
1068
1069 u = a->table;
1070 if (a->table == a->smalltable)
1071 u = b->smalltable;
1072 a->table = b->table;
1073 if (b->table == b->smalltable)
1074 a->table = a->smalltable;
1075 b->table = u;
1076
1077 if (a->table == a->smalltable || b->table == b->smalltable) {
1078 memcpy(tab, a->smalltable, sizeof(tab));
1079 memcpy(a->smalltable, b->smalltable, sizeof(tab));
1080 memcpy(b->smalltable, tab, sizeof(tab));
1081 }
1082
1083 if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) &&
1084 PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1085 h = a->hash; a->hash = b->hash; b->hash = h;
1086 } else {
1087 a->hash = -1;
1088 b->hash = -1;
1089 }
1090 }
1091
1092 static PyObject *
set_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1093 set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1094 {
1095 return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1096 }
1097
1098 static PyObject *
frozenset_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1099 frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1100 {
1101 if (PyFrozenSet_CheckExact(so)) {
1102 Py_INCREF(so);
1103 return (PyObject *)so;
1104 }
1105 return set_copy(so, NULL);
1106 }
1107
1108 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1109
1110 static PyObject *
set_clear(PySetObject * so,PyObject * Py_UNUSED (ignored))1111 set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored))
1112 {
1113 set_clear_internal(so);
1114 Py_RETURN_NONE;
1115 }
1116
1117 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1118
1119 static PyObject *
set_union(PySetObject * so,PyObject * args)1120 set_union(PySetObject *so, PyObject *args)
1121 {
1122 PySetObject *result;
1123 PyObject *other;
1124 Py_ssize_t i;
1125
1126 result = (PySetObject *)set_copy(so, NULL);
1127 if (result == NULL)
1128 return NULL;
1129
1130 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1131 other = PyTuple_GET_ITEM(args, i);
1132 if ((PyObject *)so == other)
1133 continue;
1134 if (set_update_internal(result, other)) {
1135 Py_DECREF(result);
1136 return NULL;
1137 }
1138 }
1139 return (PyObject *)result;
1140 }
1141
1142 PyDoc_STRVAR(union_doc,
1143 "Return the union of sets as a new set.\n\
1144 \n\
1145 (i.e. all elements that are in either set.)");
1146
1147 static PyObject *
set_or(PySetObject * so,PyObject * other)1148 set_or(PySetObject *so, PyObject *other)
1149 {
1150 PySetObject *result;
1151
1152 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1153 Py_RETURN_NOTIMPLEMENTED;
1154
1155 result = (PySetObject *)set_copy(so, NULL);
1156 if (result == NULL)
1157 return NULL;
1158 if ((PyObject *)so == other)
1159 return (PyObject *)result;
1160 if (set_update_internal(result, other)) {
1161 Py_DECREF(result);
1162 return NULL;
1163 }
1164 return (PyObject *)result;
1165 }
1166
1167 static PyObject *
set_ior(PySetObject * so,PyObject * other)1168 set_ior(PySetObject *so, PyObject *other)
1169 {
1170 if (!PyAnySet_Check(other))
1171 Py_RETURN_NOTIMPLEMENTED;
1172
1173 if (set_update_internal(so, other))
1174 return NULL;
1175 Py_INCREF(so);
1176 return (PyObject *)so;
1177 }
1178
1179 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1180 set_intersection(PySetObject *so, PyObject *other)
1181 {
1182 PySetObject *result;
1183 PyObject *key, *it, *tmp;
1184 Py_hash_t hash;
1185 int rv;
1186
1187 if ((PyObject *)so == other)
1188 return set_copy(so, NULL);
1189
1190 result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1191 if (result == NULL)
1192 return NULL;
1193
1194 if (PyAnySet_Check(other)) {
1195 Py_ssize_t pos = 0;
1196 setentry *entry;
1197
1198 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1199 tmp = (PyObject *)so;
1200 so = (PySetObject *)other;
1201 other = tmp;
1202 }
1203
1204 while (set_next((PySetObject *)other, &pos, &entry)) {
1205 key = entry->key;
1206 hash = entry->hash;
1207 rv = set_contains_entry(so, key, hash);
1208 if (rv < 0) {
1209 Py_DECREF(result);
1210 return NULL;
1211 }
1212 if (rv) {
1213 if (set_add_entry(result, key, hash)) {
1214 Py_DECREF(result);
1215 return NULL;
1216 }
1217 }
1218 }
1219 return (PyObject *)result;
1220 }
1221
1222 it = PyObject_GetIter(other);
1223 if (it == NULL) {
1224 Py_DECREF(result);
1225 return NULL;
1226 }
1227
1228 while ((key = PyIter_Next(it)) != NULL) {
1229 hash = PyObject_Hash(key);
1230 if (hash == -1)
1231 goto error;
1232 rv = set_contains_entry(so, key, hash);
1233 if (rv < 0)
1234 goto error;
1235 if (rv) {
1236 if (set_add_entry(result, key, hash))
1237 goto error;
1238 }
1239 Py_DECREF(key);
1240 }
1241 Py_DECREF(it);
1242 if (PyErr_Occurred()) {
1243 Py_DECREF(result);
1244 return NULL;
1245 }
1246 return (PyObject *)result;
1247 error:
1248 Py_DECREF(it);
1249 Py_DECREF(result);
1250 Py_DECREF(key);
1251 return NULL;
1252 }
1253
1254 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1255 set_intersection_multi(PySetObject *so, PyObject *args)
1256 {
1257 Py_ssize_t i;
1258 PyObject *result = (PyObject *)so;
1259
1260 if (PyTuple_GET_SIZE(args) == 0)
1261 return set_copy(so, NULL);
1262
1263 Py_INCREF(so);
1264 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1265 PyObject *other = PyTuple_GET_ITEM(args, i);
1266 PyObject *newresult = set_intersection((PySetObject *)result, other);
1267 if (newresult == NULL) {
1268 Py_DECREF(result);
1269 return NULL;
1270 }
1271 Py_DECREF(result);
1272 result = newresult;
1273 }
1274 return result;
1275 }
1276
1277 PyDoc_STRVAR(intersection_doc,
1278 "Return the intersection of two sets as a new set.\n\
1279 \n\
1280 (i.e. all elements that are in both sets.)");
1281
1282 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1283 set_intersection_update(PySetObject *so, PyObject *other)
1284 {
1285 PyObject *tmp;
1286
1287 tmp = set_intersection(so, other);
1288 if (tmp == NULL)
1289 return NULL;
1290 set_swap_bodies(so, (PySetObject *)tmp);
1291 Py_DECREF(tmp);
1292 Py_RETURN_NONE;
1293 }
1294
1295 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1296 set_intersection_update_multi(PySetObject *so, PyObject *args)
1297 {
1298 PyObject *tmp;
1299
1300 tmp = set_intersection_multi(so, args);
1301 if (tmp == NULL)
1302 return NULL;
1303 set_swap_bodies(so, (PySetObject *)tmp);
1304 Py_DECREF(tmp);
1305 Py_RETURN_NONE;
1306 }
1307
1308 PyDoc_STRVAR(intersection_update_doc,
1309 "Update a set with the intersection of itself and another.");
1310
1311 static PyObject *
set_and(PySetObject * so,PyObject * other)1312 set_and(PySetObject *so, PyObject *other)
1313 {
1314 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1315 Py_RETURN_NOTIMPLEMENTED;
1316 return set_intersection(so, other);
1317 }
1318
1319 static PyObject *
set_iand(PySetObject * so,PyObject * other)1320 set_iand(PySetObject *so, PyObject *other)
1321 {
1322 PyObject *result;
1323
1324 if (!PyAnySet_Check(other))
1325 Py_RETURN_NOTIMPLEMENTED;
1326 result = set_intersection_update(so, other);
1327 if (result == NULL)
1328 return NULL;
1329 Py_DECREF(result);
1330 Py_INCREF(so);
1331 return (PyObject *)so;
1332 }
1333
1334 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1335 set_isdisjoint(PySetObject *so, PyObject *other)
1336 {
1337 PyObject *key, *it, *tmp;
1338 int rv;
1339
1340 if ((PyObject *)so == other) {
1341 if (PySet_GET_SIZE(so) == 0)
1342 Py_RETURN_TRUE;
1343 else
1344 Py_RETURN_FALSE;
1345 }
1346
1347 if (PyAnySet_CheckExact(other)) {
1348 Py_ssize_t pos = 0;
1349 setentry *entry;
1350
1351 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1352 tmp = (PyObject *)so;
1353 so = (PySetObject *)other;
1354 other = tmp;
1355 }
1356 while (set_next((PySetObject *)other, &pos, &entry)) {
1357 rv = set_contains_entry(so, entry->key, entry->hash);
1358 if (rv < 0)
1359 return NULL;
1360 if (rv)
1361 Py_RETURN_FALSE;
1362 }
1363 Py_RETURN_TRUE;
1364 }
1365
1366 it = PyObject_GetIter(other);
1367 if (it == NULL)
1368 return NULL;
1369
1370 while ((key = PyIter_Next(it)) != NULL) {
1371 Py_hash_t hash = PyObject_Hash(key);
1372
1373 if (hash == -1) {
1374 Py_DECREF(key);
1375 Py_DECREF(it);
1376 return NULL;
1377 }
1378 rv = set_contains_entry(so, key, hash);
1379 Py_DECREF(key);
1380 if (rv < 0) {
1381 Py_DECREF(it);
1382 return NULL;
1383 }
1384 if (rv) {
1385 Py_DECREF(it);
1386 Py_RETURN_FALSE;
1387 }
1388 }
1389 Py_DECREF(it);
1390 if (PyErr_Occurred())
1391 return NULL;
1392 Py_RETURN_TRUE;
1393 }
1394
1395 PyDoc_STRVAR(isdisjoint_doc,
1396 "Return True if two sets have a null intersection.");
1397
1398 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1399 set_difference_update_internal(PySetObject *so, PyObject *other)
1400 {
1401 if ((PyObject *)so == other)
1402 return set_clear_internal(so);
1403
1404 if (PyAnySet_Check(other)) {
1405 setentry *entry;
1406 Py_ssize_t pos = 0;
1407
1408 /* Optimization: When the other set is more than 8 times
1409 larger than the base set, replace the other set with
1410 intersection of the two sets.
1411 */
1412 if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) {
1413 other = set_intersection(so, other);
1414 if (other == NULL)
1415 return -1;
1416 } else {
1417 Py_INCREF(other);
1418 }
1419
1420 while (set_next((PySetObject *)other, &pos, &entry))
1421 if (set_discard_entry(so, entry->key, entry->hash) < 0) {
1422 Py_DECREF(other);
1423 return -1;
1424 }
1425
1426 Py_DECREF(other);
1427 } else {
1428 PyObject *key, *it;
1429 it = PyObject_GetIter(other);
1430 if (it == NULL)
1431 return -1;
1432
1433 while ((key = PyIter_Next(it)) != NULL) {
1434 if (set_discard_key(so, key) < 0) {
1435 Py_DECREF(it);
1436 Py_DECREF(key);
1437 return -1;
1438 }
1439 Py_DECREF(key);
1440 }
1441 Py_DECREF(it);
1442 if (PyErr_Occurred())
1443 return -1;
1444 }
1445 /* If more than 1/4th are dummies, then resize them away. */
1446 if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1447 return 0;
1448 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1449 }
1450
1451 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1452 set_difference_update(PySetObject *so, PyObject *args)
1453 {
1454 Py_ssize_t i;
1455
1456 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1457 PyObject *other = PyTuple_GET_ITEM(args, i);
1458 if (set_difference_update_internal(so, other))
1459 return NULL;
1460 }
1461 Py_RETURN_NONE;
1462 }
1463
1464 PyDoc_STRVAR(difference_update_doc,
1465 "Remove all elements of another set from this set.");
1466
1467 static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1468 set_copy_and_difference(PySetObject *so, PyObject *other)
1469 {
1470 PyObject *result;
1471
1472 result = set_copy(so, NULL);
1473 if (result == NULL)
1474 return NULL;
1475 if (set_difference_update_internal((PySetObject *) result, other) == 0)
1476 return result;
1477 Py_DECREF(result);
1478 return NULL;
1479 }
1480
1481 static PyObject *
set_difference(PySetObject * so,PyObject * other)1482 set_difference(PySetObject *so, PyObject *other)
1483 {
1484 PyObject *result;
1485 PyObject *key;
1486 Py_hash_t hash;
1487 setentry *entry;
1488 Py_ssize_t pos = 0, other_size;
1489 int rv;
1490
1491 if (PyAnySet_Check(other)) {
1492 other_size = PySet_GET_SIZE(other);
1493 }
1494 else if (PyDict_CheckExact(other)) {
1495 other_size = PyDict_GET_SIZE(other);
1496 }
1497 else {
1498 return set_copy_and_difference(so, other);
1499 }
1500
1501 /* If len(so) much more than len(other), it's more efficient to simply copy
1502 * so and then iterate other looking for common elements. */
1503 if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1504 return set_copy_and_difference(so, other);
1505 }
1506
1507 result = make_new_set_basetype(Py_TYPE(so), NULL);
1508 if (result == NULL)
1509 return NULL;
1510
1511 if (PyDict_CheckExact(other)) {
1512 while (set_next(so, &pos, &entry)) {
1513 key = entry->key;
1514 hash = entry->hash;
1515 rv = _PyDict_Contains_KnownHash(other, key, hash);
1516 if (rv < 0) {
1517 Py_DECREF(result);
1518 return NULL;
1519 }
1520 if (!rv) {
1521 if (set_add_entry((PySetObject *)result, key, hash)) {
1522 Py_DECREF(result);
1523 return NULL;
1524 }
1525 }
1526 }
1527 return result;
1528 }
1529
1530 /* Iterate over so, checking for common elements in other. */
1531 while (set_next(so, &pos, &entry)) {
1532 key = entry->key;
1533 hash = entry->hash;
1534 rv = set_contains_entry((PySetObject *)other, key, hash);
1535 if (rv < 0) {
1536 Py_DECREF(result);
1537 return NULL;
1538 }
1539 if (!rv) {
1540 if (set_add_entry((PySetObject *)result, key, hash)) {
1541 Py_DECREF(result);
1542 return NULL;
1543 }
1544 }
1545 }
1546 return result;
1547 }
1548
1549 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1550 set_difference_multi(PySetObject *so, PyObject *args)
1551 {
1552 Py_ssize_t i;
1553 PyObject *result, *other;
1554
1555 if (PyTuple_GET_SIZE(args) == 0)
1556 return set_copy(so, NULL);
1557
1558 other = PyTuple_GET_ITEM(args, 0);
1559 result = set_difference(so, other);
1560 if (result == NULL)
1561 return NULL;
1562
1563 for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1564 other = PyTuple_GET_ITEM(args, i);
1565 if (set_difference_update_internal((PySetObject *)result, other)) {
1566 Py_DECREF(result);
1567 return NULL;
1568 }
1569 }
1570 return result;
1571 }
1572
1573 PyDoc_STRVAR(difference_doc,
1574 "Return the difference of two or more sets as a new set.\n\
1575 \n\
1576 (i.e. all elements that are in this set but not the others.)");
1577 static PyObject *
set_sub(PySetObject * so,PyObject * other)1578 set_sub(PySetObject *so, PyObject *other)
1579 {
1580 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1581 Py_RETURN_NOTIMPLEMENTED;
1582 return set_difference(so, other);
1583 }
1584
1585 static PyObject *
set_isub(PySetObject * so,PyObject * other)1586 set_isub(PySetObject *so, PyObject *other)
1587 {
1588 if (!PyAnySet_Check(other))
1589 Py_RETURN_NOTIMPLEMENTED;
1590 if (set_difference_update_internal(so, other))
1591 return NULL;
1592 Py_INCREF(so);
1593 return (PyObject *)so;
1594 }
1595
1596 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1597 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1598 {
1599 PySetObject *otherset;
1600 PyObject *key;
1601 Py_ssize_t pos = 0;
1602 Py_hash_t hash;
1603 setentry *entry;
1604 int rv;
1605
1606 if ((PyObject *)so == other)
1607 return set_clear(so, NULL);
1608
1609 if (PyDict_CheckExact(other)) {
1610 PyObject *value;
1611 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1612 Py_INCREF(key);
1613 rv = set_discard_entry(so, key, hash);
1614 if (rv < 0) {
1615 Py_DECREF(key);
1616 return NULL;
1617 }
1618 if (rv == DISCARD_NOTFOUND) {
1619 if (set_add_entry(so, key, hash)) {
1620 Py_DECREF(key);
1621 return NULL;
1622 }
1623 }
1624 Py_DECREF(key);
1625 }
1626 Py_RETURN_NONE;
1627 }
1628
1629 if (PyAnySet_Check(other)) {
1630 Py_INCREF(other);
1631 otherset = (PySetObject *)other;
1632 } else {
1633 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1634 if (otherset == NULL)
1635 return NULL;
1636 }
1637
1638 while (set_next(otherset, &pos, &entry)) {
1639 key = entry->key;
1640 hash = entry->hash;
1641 rv = set_discard_entry(so, key, hash);
1642 if (rv < 0) {
1643 Py_DECREF(otherset);
1644 return NULL;
1645 }
1646 if (rv == DISCARD_NOTFOUND) {
1647 if (set_add_entry(so, key, hash)) {
1648 Py_DECREF(otherset);
1649 return NULL;
1650 }
1651 }
1652 }
1653 Py_DECREF(otherset);
1654 Py_RETURN_NONE;
1655 }
1656
1657 PyDoc_STRVAR(symmetric_difference_update_doc,
1658 "Update a set with the symmetric difference of itself and another.");
1659
1660 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1661 set_symmetric_difference(PySetObject *so, PyObject *other)
1662 {
1663 PyObject *rv;
1664 PySetObject *otherset;
1665
1666 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1667 if (otherset == NULL)
1668 return NULL;
1669 rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1670 if (rv == NULL) {
1671 Py_DECREF(otherset);
1672 return NULL;
1673 }
1674 Py_DECREF(rv);
1675 return (PyObject *)otherset;
1676 }
1677
1678 PyDoc_STRVAR(symmetric_difference_doc,
1679 "Return the symmetric difference of two sets as a new set.\n\
1680 \n\
1681 (i.e. all elements that are in exactly one of the sets.)");
1682
1683 static PyObject *
set_xor(PySetObject * so,PyObject * other)1684 set_xor(PySetObject *so, PyObject *other)
1685 {
1686 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1687 Py_RETURN_NOTIMPLEMENTED;
1688 return set_symmetric_difference(so, other);
1689 }
1690
1691 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1692 set_ixor(PySetObject *so, PyObject *other)
1693 {
1694 PyObject *result;
1695
1696 if (!PyAnySet_Check(other))
1697 Py_RETURN_NOTIMPLEMENTED;
1698 result = set_symmetric_difference_update(so, other);
1699 if (result == NULL)
1700 return NULL;
1701 Py_DECREF(result);
1702 Py_INCREF(so);
1703 return (PyObject *)so;
1704 }
1705
1706 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1707 set_issubset(PySetObject *so, PyObject *other)
1708 {
1709 setentry *entry;
1710 Py_ssize_t pos = 0;
1711 int rv;
1712
1713 if (!PyAnySet_Check(other)) {
1714 PyObject *tmp, *result;
1715 tmp = make_new_set(&PySet_Type, other);
1716 if (tmp == NULL)
1717 return NULL;
1718 result = set_issubset(so, tmp);
1719 Py_DECREF(tmp);
1720 return result;
1721 }
1722 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1723 Py_RETURN_FALSE;
1724
1725 while (set_next(so, &pos, &entry)) {
1726 rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
1727 if (rv < 0)
1728 return NULL;
1729 if (!rv)
1730 Py_RETURN_FALSE;
1731 }
1732 Py_RETURN_TRUE;
1733 }
1734
1735 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1736
1737 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1738 set_issuperset(PySetObject *so, PyObject *other)
1739 {
1740 PyObject *tmp, *result;
1741
1742 if (!PyAnySet_Check(other)) {
1743 tmp = make_new_set(&PySet_Type, other);
1744 if (tmp == NULL)
1745 return NULL;
1746 result = set_issuperset(so, tmp);
1747 Py_DECREF(tmp);
1748 return result;
1749 }
1750 return set_issubset((PySetObject *)other, (PyObject *)so);
1751 }
1752
1753 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1754
1755 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1756 set_richcompare(PySetObject *v, PyObject *w, int op)
1757 {
1758 PyObject *r1;
1759 int r2;
1760
1761 if(!PyAnySet_Check(w))
1762 Py_RETURN_NOTIMPLEMENTED;
1763
1764 switch (op) {
1765 case Py_EQ:
1766 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1767 Py_RETURN_FALSE;
1768 if (v->hash != -1 &&
1769 ((PySetObject *)w)->hash != -1 &&
1770 v->hash != ((PySetObject *)w)->hash)
1771 Py_RETURN_FALSE;
1772 return set_issubset(v, w);
1773 case Py_NE:
1774 r1 = set_richcompare(v, w, Py_EQ);
1775 if (r1 == NULL)
1776 return NULL;
1777 r2 = PyObject_IsTrue(r1);
1778 Py_DECREF(r1);
1779 if (r2 < 0)
1780 return NULL;
1781 return PyBool_FromLong(!r2);
1782 case Py_LE:
1783 return set_issubset(v, w);
1784 case Py_GE:
1785 return set_issuperset(v, w);
1786 case Py_LT:
1787 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1788 Py_RETURN_FALSE;
1789 return set_issubset(v, w);
1790 case Py_GT:
1791 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1792 Py_RETURN_FALSE;
1793 return set_issuperset(v, w);
1794 }
1795 Py_RETURN_NOTIMPLEMENTED;
1796 }
1797
1798 static PyObject *
set_add(PySetObject * so,PyObject * key)1799 set_add(PySetObject *so, PyObject *key)
1800 {
1801 if (set_add_key(so, key))
1802 return NULL;
1803 Py_RETURN_NONE;
1804 }
1805
1806 PyDoc_STRVAR(add_doc,
1807 "Add an element to a set.\n\
1808 \n\
1809 This has no effect if the element is already present.");
1810
1811 static int
set_contains(PySetObject * so,PyObject * key)1812 set_contains(PySetObject *so, PyObject *key)
1813 {
1814 PyObject *tmpkey;
1815 int rv;
1816
1817 rv = set_contains_key(so, key);
1818 if (rv < 0) {
1819 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1820 return -1;
1821 PyErr_Clear();
1822 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1823 if (tmpkey == NULL)
1824 return -1;
1825 rv = set_contains_key(so, tmpkey);
1826 Py_DECREF(tmpkey);
1827 }
1828 return rv;
1829 }
1830
1831 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1832 set_direct_contains(PySetObject *so, PyObject *key)
1833 {
1834 long result;
1835
1836 result = set_contains(so, key);
1837 if (result < 0)
1838 return NULL;
1839 return PyBool_FromLong(result);
1840 }
1841
1842 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1843
1844 static PyObject *
set_remove(PySetObject * so,PyObject * key)1845 set_remove(PySetObject *so, PyObject *key)
1846 {
1847 PyObject *tmpkey;
1848 int rv;
1849
1850 rv = set_discard_key(so, key);
1851 if (rv < 0) {
1852 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1853 return NULL;
1854 PyErr_Clear();
1855 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1856 if (tmpkey == NULL)
1857 return NULL;
1858 rv = set_discard_key(so, tmpkey);
1859 Py_DECREF(tmpkey);
1860 if (rv < 0)
1861 return NULL;
1862 }
1863
1864 if (rv == DISCARD_NOTFOUND) {
1865 _PyErr_SetKeyError(key);
1866 return NULL;
1867 }
1868 Py_RETURN_NONE;
1869 }
1870
1871 PyDoc_STRVAR(remove_doc,
1872 "Remove an element from a set; it must be a member.\n\
1873 \n\
1874 If the element is not a member, raise a KeyError.");
1875
1876 static PyObject *
set_discard(PySetObject * so,PyObject * key)1877 set_discard(PySetObject *so, PyObject *key)
1878 {
1879 PyObject *tmpkey;
1880 int rv;
1881
1882 rv = set_discard_key(so, key);
1883 if (rv < 0) {
1884 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1885 return NULL;
1886 PyErr_Clear();
1887 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1888 if (tmpkey == NULL)
1889 return NULL;
1890 rv = set_discard_key(so, tmpkey);
1891 Py_DECREF(tmpkey);
1892 if (rv < 0)
1893 return NULL;
1894 }
1895 Py_RETURN_NONE;
1896 }
1897
1898 PyDoc_STRVAR(discard_doc,
1899 "Remove an element from a set if it is a member.\n\
1900 \n\
1901 If the element is not a member, do nothing.");
1902
1903 static PyObject *
set_reduce(PySetObject * so,PyObject * Py_UNUSED (ignored))1904 set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored))
1905 {
1906 PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1907 _Py_IDENTIFIER(__dict__);
1908
1909 keys = PySequence_List((PyObject *)so);
1910 if (keys == NULL)
1911 goto done;
1912 args = PyTuple_Pack(1, keys);
1913 if (args == NULL)
1914 goto done;
1915 if (_PyObject_LookupAttrId((PyObject *)so, &PyId___dict__, &dict) < 0) {
1916 goto done;
1917 }
1918 if (dict == NULL) {
1919 dict = Py_None;
1920 Py_INCREF(dict);
1921 }
1922 result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1923 done:
1924 Py_XDECREF(args);
1925 Py_XDECREF(keys);
1926 Py_XDECREF(dict);
1927 return result;
1928 }
1929
1930 static PyObject *
set_sizeof(PySetObject * so,PyObject * Py_UNUSED (ignored))1931 set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored))
1932 {
1933 Py_ssize_t res;
1934
1935 res = _PyObject_SIZE(Py_TYPE(so));
1936 if (so->table != so->smalltable)
1937 res = res + (so->mask + 1) * sizeof(setentry);
1938 return PyLong_FromSsize_t(res);
1939 }
1940
1941 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1942 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1943 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1944 {
1945 PyObject *iterable = NULL;
1946
1947 if (!_PyArg_NoKeywords("set", kwds))
1948 return -1;
1949 if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
1950 return -1;
1951 if (self->fill)
1952 set_clear_internal(self);
1953 self->hash = -1;
1954 if (iterable == NULL)
1955 return 0;
1956 return set_update_internal(self, iterable);
1957 }
1958
1959 static PyObject*
set_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1960 set_vectorcall(PyObject *type, PyObject * const*args,
1961 size_t nargsf, PyObject *kwnames)
1962 {
1963 assert(PyType_Check(type));
1964
1965 if (!_PyArg_NoKwnames("set", kwnames)) {
1966 return NULL;
1967 }
1968
1969 Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1970 if (!_PyArg_CheckPositional("set", nargs, 0, 1)) {
1971 return NULL;
1972 }
1973
1974 if (nargs) {
1975 return make_new_set((PyTypeObject *)type, args[0]);
1976 }
1977
1978 return make_new_set((PyTypeObject *)type, NULL);
1979 }
1980
1981 static PySequenceMethods set_as_sequence = {
1982 set_len, /* sq_length */
1983 0, /* sq_concat */
1984 0, /* sq_repeat */
1985 0, /* sq_item */
1986 0, /* sq_slice */
1987 0, /* sq_ass_item */
1988 0, /* sq_ass_slice */
1989 (objobjproc)set_contains, /* sq_contains */
1990 };
1991
1992 /* set object ********************************************************/
1993
1994 #ifdef Py_DEBUG
1995 static PyObject *test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored));
1996
1997 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
1998 All is well if assertions don't fail.");
1999 #endif
2000
2001 static PyMethodDef set_methods[] = {
2002 {"add", (PyCFunction)set_add, METH_O,
2003 add_doc},
2004 {"clear", (PyCFunction)set_clear, METH_NOARGS,
2005 clear_doc},
2006 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2007 contains_doc},
2008 {"copy", (PyCFunction)set_copy, METH_NOARGS,
2009 copy_doc},
2010 {"discard", (PyCFunction)set_discard, METH_O,
2011 discard_doc},
2012 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2013 difference_doc},
2014 {"difference_update", (PyCFunction)set_difference_update, METH_VARARGS,
2015 difference_update_doc},
2016 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2017 intersection_doc},
2018 {"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS,
2019 intersection_update_doc},
2020 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2021 isdisjoint_doc},
2022 {"issubset", (PyCFunction)set_issubset, METH_O,
2023 issubset_doc},
2024 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2025 issuperset_doc},
2026 {"pop", (PyCFunction)set_pop, METH_NOARGS,
2027 pop_doc},
2028 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2029 reduce_doc},
2030 {"remove", (PyCFunction)set_remove, METH_O,
2031 remove_doc},
2032 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2033 sizeof_doc},
2034 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2035 symmetric_difference_doc},
2036 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
2037 symmetric_difference_update_doc},
2038 #ifdef Py_DEBUG
2039 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
2040 test_c_api_doc},
2041 #endif
2042 {"union", (PyCFunction)set_union, METH_VARARGS,
2043 union_doc},
2044 {"update", (PyCFunction)set_update, METH_VARARGS,
2045 update_doc},
2046 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2047 {NULL, NULL} /* sentinel */
2048 };
2049
2050 static PyNumberMethods set_as_number = {
2051 0, /*nb_add*/
2052 (binaryfunc)set_sub, /*nb_subtract*/
2053 0, /*nb_multiply*/
2054 0, /*nb_remainder*/
2055 0, /*nb_divmod*/
2056 0, /*nb_power*/
2057 0, /*nb_negative*/
2058 0, /*nb_positive*/
2059 0, /*nb_absolute*/
2060 0, /*nb_bool*/
2061 0, /*nb_invert*/
2062 0, /*nb_lshift*/
2063 0, /*nb_rshift*/
2064 (binaryfunc)set_and, /*nb_and*/
2065 (binaryfunc)set_xor, /*nb_xor*/
2066 (binaryfunc)set_or, /*nb_or*/
2067 0, /*nb_int*/
2068 0, /*nb_reserved*/
2069 0, /*nb_float*/
2070 0, /*nb_inplace_add*/
2071 (binaryfunc)set_isub, /*nb_inplace_subtract*/
2072 0, /*nb_inplace_multiply*/
2073 0, /*nb_inplace_remainder*/
2074 0, /*nb_inplace_power*/
2075 0, /*nb_inplace_lshift*/
2076 0, /*nb_inplace_rshift*/
2077 (binaryfunc)set_iand, /*nb_inplace_and*/
2078 (binaryfunc)set_ixor, /*nb_inplace_xor*/
2079 (binaryfunc)set_ior, /*nb_inplace_or*/
2080 };
2081
2082 PyDoc_STRVAR(set_doc,
2083 "set() -> new empty set object\n\
2084 set(iterable) -> new set object\n\
2085 \n\
2086 Build an unordered collection of unique elements.");
2087
2088 PyTypeObject PySet_Type = {
2089 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2090 "set", /* tp_name */
2091 sizeof(PySetObject), /* tp_basicsize */
2092 0, /* tp_itemsize */
2093 /* methods */
2094 (destructor)set_dealloc, /* tp_dealloc */
2095 0, /* tp_vectorcall_offset */
2096 0, /* tp_getattr */
2097 0, /* tp_setattr */
2098 0, /* tp_as_async */
2099 (reprfunc)set_repr, /* tp_repr */
2100 &set_as_number, /* tp_as_number */
2101 &set_as_sequence, /* tp_as_sequence */
2102 0, /* tp_as_mapping */
2103 PyObject_HashNotImplemented, /* tp_hash */
2104 0, /* tp_call */
2105 0, /* tp_str */
2106 PyObject_GenericGetAttr, /* tp_getattro */
2107 0, /* tp_setattro */
2108 0, /* tp_as_buffer */
2109 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2110 Py_TPFLAGS_BASETYPE |
2111 _Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2112 set_doc, /* tp_doc */
2113 (traverseproc)set_traverse, /* tp_traverse */
2114 (inquiry)set_clear_internal, /* tp_clear */
2115 (richcmpfunc)set_richcompare, /* tp_richcompare */
2116 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2117 (getiterfunc)set_iter, /* tp_iter */
2118 0, /* tp_iternext */
2119 set_methods, /* tp_methods */
2120 0, /* tp_members */
2121 0, /* tp_getset */
2122 0, /* tp_base */
2123 0, /* tp_dict */
2124 0, /* tp_descr_get */
2125 0, /* tp_descr_set */
2126 0, /* tp_dictoffset */
2127 (initproc)set_init, /* tp_init */
2128 PyType_GenericAlloc, /* tp_alloc */
2129 set_new, /* tp_new */
2130 PyObject_GC_Del, /* tp_free */
2131 .tp_vectorcall = set_vectorcall,
2132 };
2133
2134 /* frozenset object ********************************************************/
2135
2136
2137 static PyMethodDef frozenset_methods[] = {
2138 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2139 contains_doc},
2140 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
2141 copy_doc},
2142 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2143 difference_doc},
2144 {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS,
2145 intersection_doc},
2146 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2147 isdisjoint_doc},
2148 {"issubset", (PyCFunction)set_issubset, METH_O,
2149 issubset_doc},
2150 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2151 issuperset_doc},
2152 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2153 reduce_doc},
2154 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2155 sizeof_doc},
2156 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2157 symmetric_difference_doc},
2158 {"union", (PyCFunction)set_union, METH_VARARGS,
2159 union_doc},
2160 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2161 {NULL, NULL} /* sentinel */
2162 };
2163
2164 static PyNumberMethods frozenset_as_number = {
2165 0, /*nb_add*/
2166 (binaryfunc)set_sub, /*nb_subtract*/
2167 0, /*nb_multiply*/
2168 0, /*nb_remainder*/
2169 0, /*nb_divmod*/
2170 0, /*nb_power*/
2171 0, /*nb_negative*/
2172 0, /*nb_positive*/
2173 0, /*nb_absolute*/
2174 0, /*nb_bool*/
2175 0, /*nb_invert*/
2176 0, /*nb_lshift*/
2177 0, /*nb_rshift*/
2178 (binaryfunc)set_and, /*nb_and*/
2179 (binaryfunc)set_xor, /*nb_xor*/
2180 (binaryfunc)set_or, /*nb_or*/
2181 };
2182
2183 PyDoc_STRVAR(frozenset_doc,
2184 "frozenset() -> empty frozenset object\n\
2185 frozenset(iterable) -> frozenset object\n\
2186 \n\
2187 Build an immutable unordered collection of unique elements.");
2188
2189 PyTypeObject PyFrozenSet_Type = {
2190 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2191 "frozenset", /* tp_name */
2192 sizeof(PySetObject), /* tp_basicsize */
2193 0, /* tp_itemsize */
2194 /* methods */
2195 (destructor)set_dealloc, /* tp_dealloc */
2196 0, /* tp_vectorcall_offset */
2197 0, /* tp_getattr */
2198 0, /* tp_setattr */
2199 0, /* tp_as_async */
2200 (reprfunc)set_repr, /* tp_repr */
2201 &frozenset_as_number, /* tp_as_number */
2202 &set_as_sequence, /* tp_as_sequence */
2203 0, /* tp_as_mapping */
2204 frozenset_hash, /* tp_hash */
2205 0, /* tp_call */
2206 0, /* tp_str */
2207 PyObject_GenericGetAttr, /* tp_getattro */
2208 0, /* tp_setattro */
2209 0, /* tp_as_buffer */
2210 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2211 Py_TPFLAGS_BASETYPE |
2212 _Py_TPFLAGS_MATCH_SELF, /* tp_flags */
2213 frozenset_doc, /* tp_doc */
2214 (traverseproc)set_traverse, /* tp_traverse */
2215 (inquiry)set_clear_internal, /* tp_clear */
2216 (richcmpfunc)set_richcompare, /* tp_richcompare */
2217 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2218 (getiterfunc)set_iter, /* tp_iter */
2219 0, /* tp_iternext */
2220 frozenset_methods, /* tp_methods */
2221 0, /* tp_members */
2222 0, /* tp_getset */
2223 0, /* tp_base */
2224 0, /* tp_dict */
2225 0, /* tp_descr_get */
2226 0, /* tp_descr_set */
2227 0, /* tp_dictoffset */
2228 0, /* tp_init */
2229 PyType_GenericAlloc, /* tp_alloc */
2230 frozenset_new, /* tp_new */
2231 PyObject_GC_Del, /* tp_free */
2232 .tp_vectorcall = frozenset_vectorcall,
2233 };
2234
2235
2236 /***** C API functions *************************************************/
2237
2238 PyObject *
PySet_New(PyObject * iterable)2239 PySet_New(PyObject *iterable)
2240 {
2241 return make_new_set(&PySet_Type, iterable);
2242 }
2243
2244 PyObject *
PyFrozenSet_New(PyObject * iterable)2245 PyFrozenSet_New(PyObject *iterable)
2246 {
2247 return make_new_set(&PyFrozenSet_Type, iterable);
2248 }
2249
2250 Py_ssize_t
PySet_Size(PyObject * anyset)2251 PySet_Size(PyObject *anyset)
2252 {
2253 if (!PyAnySet_Check(anyset)) {
2254 PyErr_BadInternalCall();
2255 return -1;
2256 }
2257 return PySet_GET_SIZE(anyset);
2258 }
2259
2260 int
PySet_Clear(PyObject * set)2261 PySet_Clear(PyObject *set)
2262 {
2263 if (!PySet_Check(set)) {
2264 PyErr_BadInternalCall();
2265 return -1;
2266 }
2267 return set_clear_internal((PySetObject *)set);
2268 }
2269
2270 int
PySet_Contains(PyObject * anyset,PyObject * key)2271 PySet_Contains(PyObject *anyset, PyObject *key)
2272 {
2273 if (!PyAnySet_Check(anyset)) {
2274 PyErr_BadInternalCall();
2275 return -1;
2276 }
2277 return set_contains_key((PySetObject *)anyset, key);
2278 }
2279
2280 int
PySet_Discard(PyObject * set,PyObject * key)2281 PySet_Discard(PyObject *set, PyObject *key)
2282 {
2283 if (!PySet_Check(set)) {
2284 PyErr_BadInternalCall();
2285 return -1;
2286 }
2287 return set_discard_key((PySetObject *)set, key);
2288 }
2289
2290 int
PySet_Add(PyObject * anyset,PyObject * key)2291 PySet_Add(PyObject *anyset, PyObject *key)
2292 {
2293 if (!PySet_Check(anyset) &&
2294 (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2295 PyErr_BadInternalCall();
2296 return -1;
2297 }
2298 return set_add_key((PySetObject *)anyset, key);
2299 }
2300
2301 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2302 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2303 {
2304 setentry *entry;
2305
2306 if (!PyAnySet_Check(set)) {
2307 PyErr_BadInternalCall();
2308 return -1;
2309 }
2310 if (set_next((PySetObject *)set, pos, &entry) == 0)
2311 return 0;
2312 *key = entry->key;
2313 *hash = entry->hash;
2314 return 1;
2315 }
2316
2317 PyObject *
PySet_Pop(PyObject * set)2318 PySet_Pop(PyObject *set)
2319 {
2320 if (!PySet_Check(set)) {
2321 PyErr_BadInternalCall();
2322 return NULL;
2323 }
2324 return set_pop((PySetObject *)set, NULL);
2325 }
2326
2327 int
_PySet_Update(PyObject * set,PyObject * iterable)2328 _PySet_Update(PyObject *set, PyObject *iterable)
2329 {
2330 if (!PySet_Check(set)) {
2331 PyErr_BadInternalCall();
2332 return -1;
2333 }
2334 return set_update_internal((PySetObject *)set, iterable);
2335 }
2336
2337 /* Exported for the gdb plugin's benefit. */
2338 PyObject *_PySet_Dummy = dummy;
2339
2340 #ifdef Py_DEBUG
2341
2342 /* Test code to be called with any three element set.
2343 Returns True and original set is restored. */
2344
2345 #define assertRaises(call_return_value, exception) \
2346 do { \
2347 assert(call_return_value); \
2348 assert(PyErr_ExceptionMatches(exception)); \
2349 PyErr_Clear(); \
2350 } while(0)
2351
2352 static PyObject *
test_c_api(PySetObject * so,PyObject * Py_UNUSED (ignored))2353 test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored))
2354 {
2355 Py_ssize_t count;
2356 const char *s;
2357 Py_ssize_t i;
2358 PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2359 PyObject *ob = (PyObject *)so;
2360 Py_hash_t hash;
2361 PyObject *str;
2362
2363 /* Verify preconditions */
2364 assert(PyAnySet_Check(ob));
2365 assert(PyAnySet_CheckExact(ob));
2366 assert(!PyFrozenSet_CheckExact(ob));
2367
2368 /* so.clear(); so |= set("abc"); */
2369 str = PyUnicode_FromString("abc");
2370 if (str == NULL)
2371 return NULL;
2372 set_clear_internal(so);
2373 if (set_update_internal(so, str)) {
2374 Py_DECREF(str);
2375 return NULL;
2376 }
2377 Py_DECREF(str);
2378
2379 /* Exercise type/size checks */
2380 assert(PySet_Size(ob) == 3);
2381 assert(PySet_GET_SIZE(ob) == 3);
2382
2383 /* Raise TypeError for non-iterable constructor arguments */
2384 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2385 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2386
2387 /* Raise TypeError for unhashable key */
2388 dup = PySet_New(ob);
2389 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2390 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2391 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2392
2393 /* Exercise successful pop, contains, add, and discard */
2394 elem = PySet_Pop(ob);
2395 assert(PySet_Contains(ob, elem) == 0);
2396 assert(PySet_GET_SIZE(ob) == 2);
2397 assert(PySet_Add(ob, elem) == 0);
2398 assert(PySet_Contains(ob, elem) == 1);
2399 assert(PySet_GET_SIZE(ob) == 3);
2400 assert(PySet_Discard(ob, elem) == 1);
2401 assert(PySet_GET_SIZE(ob) == 2);
2402 assert(PySet_Discard(ob, elem) == 0);
2403 assert(PySet_GET_SIZE(ob) == 2);
2404
2405 /* Exercise clear */
2406 dup2 = PySet_New(dup);
2407 assert(PySet_Clear(dup2) == 0);
2408 assert(PySet_Size(dup2) == 0);
2409 Py_DECREF(dup2);
2410
2411 /* Raise SystemError on clear or update of frozen set */
2412 f = PyFrozenSet_New(dup);
2413 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2414 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2415 assert(PySet_Add(f, elem) == 0);
2416 Py_INCREF(f);
2417 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2418 Py_DECREF(f);
2419 Py_DECREF(f);
2420
2421 /* Exercise direct iteration */
2422 i = 0, count = 0;
2423 while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2424 s = PyUnicode_AsUTF8(x);
2425 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2426 count++;
2427 }
2428 assert(count == 3);
2429
2430 /* Exercise updates */
2431 dup2 = PySet_New(NULL);
2432 assert(_PySet_Update(dup2, dup) == 0);
2433 assert(PySet_Size(dup2) == 3);
2434 assert(_PySet_Update(dup2, dup) == 0);
2435 assert(PySet_Size(dup2) == 3);
2436 Py_DECREF(dup2);
2437
2438 /* Raise SystemError when self argument is not a set or frozenset. */
2439 t = PyTuple_New(0);
2440 assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2441 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2442 Py_DECREF(t);
2443
2444 /* Raise SystemError when self argument is not a set. */
2445 f = PyFrozenSet_New(dup);
2446 assert(PySet_Size(f) == 3);
2447 assert(PyFrozenSet_CheckExact(f));
2448 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2449 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2450 Py_DECREF(f);
2451
2452 /* Raise KeyError when popping from an empty set */
2453 assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2454 Py_DECREF(ob);
2455 assert(PySet_GET_SIZE(ob) == 0);
2456 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2457
2458 /* Restore the set from the copy using the PyNumber API */
2459 assert(PyNumber_InPlaceOr(ob, dup) == ob);
2460 Py_DECREF(ob);
2461
2462 /* Verify constructors accept NULL arguments */
2463 f = PySet_New(NULL);
2464 assert(f != NULL);
2465 assert(PySet_GET_SIZE(f) == 0);
2466 Py_DECREF(f);
2467 f = PyFrozenSet_New(NULL);
2468 assert(f != NULL);
2469 assert(PyFrozenSet_CheckExact(f));
2470 assert(PySet_GET_SIZE(f) == 0);
2471 Py_DECREF(f);
2472
2473 Py_DECREF(elem);
2474 Py_DECREF(dup);
2475 Py_RETURN_TRUE;
2476 }
2477
2478 #undef assertRaises
2479
2480 #endif
2481
2482 /***** Dummy Struct *************************************************/
2483
2484 static PyObject *
dummy_repr(PyObject * op)2485 dummy_repr(PyObject *op)
2486 {
2487 return PyUnicode_FromString("<dummy key>");
2488 }
2489
2490 static void _Py_NO_RETURN
dummy_dealloc(PyObject * ignore)2491 dummy_dealloc(PyObject* ignore)
2492 {
2493 Py_FatalError("deallocating <dummy key>");
2494 }
2495
2496 static PyTypeObject _PySetDummy_Type = {
2497 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2498 "<dummy key> type",
2499 0,
2500 0,
2501 dummy_dealloc, /*tp_dealloc*/ /*never called*/
2502 0, /*tp_vectorcall_offset*/
2503 0, /*tp_getattr*/
2504 0, /*tp_setattr*/
2505 0, /*tp_as_async*/
2506 dummy_repr, /*tp_repr*/
2507 0, /*tp_as_number*/
2508 0, /*tp_as_sequence*/
2509 0, /*tp_as_mapping*/
2510 0, /*tp_hash */
2511 0, /*tp_call */
2512 0, /*tp_str */
2513 0, /*tp_getattro */
2514 0, /*tp_setattro */
2515 0, /*tp_as_buffer */
2516 Py_TPFLAGS_DEFAULT, /*tp_flags */
2517 };
2518
2519 static PyObject _dummy_struct = {
2520 _PyObject_EXTRA_INIT
2521 2, &_PySetDummy_Type
2522 };
2523