1
2 /* set object implementation
3
4 Written and maintained by Raymond D. Hettinger <python@rcn.com>
5 Derived from Lib/sets.py and Objects/dictobject.c.
6
7 The basic lookup function used by all operations.
8 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9
10 The initial probe index is computed as hash mod the table size.
11 Subsequent probe indices are computed as explained in Objects/dictobject.c.
12
13 To improve cache locality, each probe inspects a series of consecutive
14 nearby entries before moving on to probes elsewhere in memory. This leaves
15 us with a hybrid of linear probing and randomized probing. The linear probing
16 reduces the cost of hash collisions because consecutive memory accesses
17 tend to be much cheaper than scattered probes. After LINEAR_PROBES steps,
18 we then use more of the upper bits from the hash value and apply a simple
19 linear congruential random number genearator. This helps break-up long
20 chains of collisions.
21
22 All arithmetic on hash should ignore overflow.
23
24 Unlike the dictionary implementation, the lookkey function can return
25 NULL if the rich comparison returns an error.
26
27 Use cases for sets differ considerably from dictionaries where looked-up
28 keys are more likely to be present. In contrast, sets are primarily
29 about membership testing where the presence of an element is not known in
30 advance. Accordingly, the set implementation needs to optimize for both
31 the found and not-found case.
32 */
33
34 #include "Python.h"
35 #include "pycore_object.h" // _PyObject_GC_UNTRACK()
36 #include <stddef.h> // offsetof()
37
38 /* Object used as dummy key to fill deleted entries */
39 static PyObject _dummy_struct;
40
41 #define dummy (&_dummy_struct)
42
43
44 /* ======================================================================== */
45 /* ======= Begin logic for probing the hash table ========================= */
46
47 /* Set this to zero to turn-off linear probing */
48 #ifndef LINEAR_PROBES
49 #define LINEAR_PROBES 9
50 #endif
51
52 /* This must be >= 1 */
53 #define PERTURB_SHIFT 5
54
55 static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)56 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
57 {
58 setentry *table;
59 setentry *entry;
60 size_t perturb = hash;
61 size_t mask = so->mask;
62 size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
63 int probes;
64 int cmp;
65
66 while (1) {
67 entry = &so->table[i];
68 probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
69 do {
70 if (entry->hash == 0 && entry->key == NULL)
71 return entry;
72 if (entry->hash == hash) {
73 PyObject *startkey = entry->key;
74 assert(startkey != dummy);
75 if (startkey == key)
76 return entry;
77 if (PyUnicode_CheckExact(startkey)
78 && PyUnicode_CheckExact(key)
79 && _PyUnicode_EQ(startkey, key))
80 return entry;
81 table = so->table;
82 Py_INCREF(startkey);
83 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
84 Py_DECREF(startkey);
85 if (cmp < 0)
86 return NULL;
87 if (table != so->table || entry->key != startkey)
88 return set_lookkey(so, key, hash);
89 if (cmp > 0)
90 return entry;
91 mask = so->mask;
92 }
93 entry++;
94 } while (probes--);
95 perturb >>= PERTURB_SHIFT;
96 i = (i * 5 + 1 + perturb) & mask;
97 }
98 }
99
100 static int set_table_resize(PySetObject *, Py_ssize_t);
101
102 static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)103 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
104 {
105 setentry *table;
106 setentry *entry;
107 size_t perturb;
108 size_t mask;
109 size_t i; /* Unsigned for defined overflow behavior */
110 int probes;
111 int cmp;
112
113 /* Pre-increment is necessary to prevent arbitrary code in the rich
114 comparison from deallocating the key just before the insertion. */
115 Py_INCREF(key);
116
117 restart:
118
119 mask = so->mask;
120 i = (size_t)hash & mask;
121 perturb = hash;
122
123 while (1) {
124 entry = &so->table[i];
125 probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0;
126 do {
127 if (entry->hash == 0 && entry->key == NULL)
128 goto found_unused;
129 if (entry->hash == hash) {
130 PyObject *startkey = entry->key;
131 assert(startkey != dummy);
132 if (startkey == key)
133 goto found_active;
134 if (PyUnicode_CheckExact(startkey)
135 && PyUnicode_CheckExact(key)
136 && _PyUnicode_EQ(startkey, key))
137 goto found_active;
138 table = so->table;
139 Py_INCREF(startkey);
140 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
141 Py_DECREF(startkey);
142 if (cmp > 0)
143 goto found_active;
144 if (cmp < 0)
145 goto comparison_error;
146 if (table != so->table || entry->key != startkey)
147 goto restart;
148 mask = so->mask;
149 }
150 entry++;
151 } while (probes--);
152 perturb >>= PERTURB_SHIFT;
153 i = (i * 5 + 1 + perturb) & mask;
154 }
155
156 found_unused:
157 so->fill++;
158 so->used++;
159 entry->key = key;
160 entry->hash = hash;
161 if ((size_t)so->fill*5 < mask*3)
162 return 0;
163 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
164
165 found_active:
166 Py_DECREF(key);
167 return 0;
168
169 comparison_error:
170 Py_DECREF(key);
171 return -1;
172 }
173
174 /*
175 Internal routine used by set_table_resize() to insert an item which is
176 known to be absent from the set. Besides the performance benefit,
177 there is also safety benefit since using set_add_entry() risks making
178 a callback in the middle of a set_table_resize(), see issue 1456209.
179 The caller is responsible for updating the key's reference count and
180 the setobject's fill and used fields.
181 */
182 static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)183 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
184 {
185 setentry *entry;
186 size_t perturb = hash;
187 size_t i = (size_t)hash & mask;
188 size_t j;
189
190 while (1) {
191 entry = &table[i];
192 if (entry->key == NULL)
193 goto found_null;
194 if (i + LINEAR_PROBES <= mask) {
195 for (j = 0; j < LINEAR_PROBES; j++) {
196 entry++;
197 if (entry->key == NULL)
198 goto found_null;
199 }
200 }
201 perturb >>= PERTURB_SHIFT;
202 i = (i * 5 + 1 + perturb) & mask;
203 }
204 found_null:
205 entry->key = key;
206 entry->hash = hash;
207 }
208
209 /* ======== End logic for probing the hash table ========================== */
210 /* ======================================================================== */
211
212 /*
213 Restructure the table by allocating a new table and reinserting all
214 keys again. When entries have been deleted, the new table may
215 actually be smaller than the old one.
216 */
217 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)218 set_table_resize(PySetObject *so, Py_ssize_t minused)
219 {
220 setentry *oldtable, *newtable, *entry;
221 Py_ssize_t oldmask = so->mask;
222 size_t newmask;
223 int is_oldtable_malloced;
224 setentry small_copy[PySet_MINSIZE];
225
226 assert(minused >= 0);
227
228 /* Find the smallest table size > minused. */
229 /* XXX speed-up with intrinsics */
230 size_t newsize = PySet_MINSIZE;
231 while (newsize <= (size_t)minused) {
232 newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
233 }
234
235 /* Get space for a new table. */
236 oldtable = so->table;
237 assert(oldtable != NULL);
238 is_oldtable_malloced = oldtable != so->smalltable;
239
240 if (newsize == PySet_MINSIZE) {
241 /* A large table is shrinking, or we can't get any smaller. */
242 newtable = so->smalltable;
243 if (newtable == oldtable) {
244 if (so->fill == so->used) {
245 /* No dummies, so no point doing anything. */
246 return 0;
247 }
248 /* We're not going to resize it, but rebuild the
249 table anyway to purge old dummy entries.
250 Subtle: This is *necessary* if fill==size,
251 as set_lookkey needs at least one virgin slot to
252 terminate failing searches. If fill < size, it's
253 merely desirable, as dummies slow searches. */
254 assert(so->fill > so->used);
255 memcpy(small_copy, oldtable, sizeof(small_copy));
256 oldtable = small_copy;
257 }
258 }
259 else {
260 newtable = PyMem_NEW(setentry, newsize);
261 if (newtable == NULL) {
262 PyErr_NoMemory();
263 return -1;
264 }
265 }
266
267 /* Make the set empty, using the new table. */
268 assert(newtable != oldtable);
269 memset(newtable, 0, sizeof(setentry) * newsize);
270 so->mask = newsize - 1;
271 so->table = newtable;
272
273 /* Copy the data over; this is refcount-neutral for active entries;
274 dummy entries aren't copied over, of course */
275 newmask = (size_t)so->mask;
276 if (so->fill == so->used) {
277 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
278 if (entry->key != NULL) {
279 set_insert_clean(newtable, newmask, entry->key, entry->hash);
280 }
281 }
282 } else {
283 so->fill = so->used;
284 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
285 if (entry->key != NULL && entry->key != dummy) {
286 set_insert_clean(newtable, newmask, entry->key, entry->hash);
287 }
288 }
289 }
290
291 if (is_oldtable_malloced)
292 PyMem_DEL(oldtable);
293 return 0;
294 }
295
296 static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)297 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
298 {
299 setentry *entry;
300
301 entry = set_lookkey(so, key, hash);
302 if (entry != NULL)
303 return entry->key != NULL;
304 return -1;
305 }
306
307 #define DISCARD_NOTFOUND 0
308 #define DISCARD_FOUND 1
309
310 static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)311 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
312 {
313 setentry *entry;
314 PyObject *old_key;
315
316 entry = set_lookkey(so, key, hash);
317 if (entry == NULL)
318 return -1;
319 if (entry->key == NULL)
320 return DISCARD_NOTFOUND;
321 old_key = entry->key;
322 entry->key = dummy;
323 entry->hash = -1;
324 so->used--;
325 Py_DECREF(old_key);
326 return DISCARD_FOUND;
327 }
328
329 static int
set_add_key(PySetObject * so,PyObject * key)330 set_add_key(PySetObject *so, PyObject *key)
331 {
332 Py_hash_t hash;
333
334 if (!PyUnicode_CheckExact(key) ||
335 (hash = ((PyASCIIObject *) key)->hash) == -1) {
336 hash = PyObject_Hash(key);
337 if (hash == -1)
338 return -1;
339 }
340 return set_add_entry(so, key, hash);
341 }
342
343 static int
set_contains_key(PySetObject * so,PyObject * key)344 set_contains_key(PySetObject *so, PyObject *key)
345 {
346 Py_hash_t hash;
347
348 if (!PyUnicode_CheckExact(key) ||
349 (hash = ((PyASCIIObject *) key)->hash) == -1) {
350 hash = PyObject_Hash(key);
351 if (hash == -1)
352 return -1;
353 }
354 return set_contains_entry(so, key, hash);
355 }
356
357 static int
set_discard_key(PySetObject * so,PyObject * key)358 set_discard_key(PySetObject *so, PyObject *key)
359 {
360 Py_hash_t hash;
361
362 if (!PyUnicode_CheckExact(key) ||
363 (hash = ((PyASCIIObject *) key)->hash) == -1) {
364 hash = PyObject_Hash(key);
365 if (hash == -1)
366 return -1;
367 }
368 return set_discard_entry(so, key, hash);
369 }
370
371 static void
set_empty_to_minsize(PySetObject * so)372 set_empty_to_minsize(PySetObject *so)
373 {
374 memset(so->smalltable, 0, sizeof(so->smalltable));
375 so->fill = 0;
376 so->used = 0;
377 so->mask = PySet_MINSIZE - 1;
378 so->table = so->smalltable;
379 so->hash = -1;
380 }
381
382 static int
set_clear_internal(PySetObject * so)383 set_clear_internal(PySetObject *so)
384 {
385 setentry *entry;
386 setentry *table = so->table;
387 Py_ssize_t fill = so->fill;
388 Py_ssize_t used = so->used;
389 int table_is_malloced = table != so->smalltable;
390 setentry small_copy[PySet_MINSIZE];
391
392 assert (PyAnySet_Check(so));
393 assert(table != NULL);
394
395 /* This is delicate. During the process of clearing the set,
396 * decrefs can cause the set to mutate. To avoid fatal confusion
397 * (voice of experience), we have to make the set empty before
398 * clearing the slots, and never refer to anything via so->ref while
399 * clearing.
400 */
401 if (table_is_malloced)
402 set_empty_to_minsize(so);
403
404 else if (fill > 0) {
405 /* It's a small table with something that needs to be cleared.
406 * Afraid the only safe way is to copy the set entries into
407 * another small table first.
408 */
409 memcpy(small_copy, table, sizeof(small_copy));
410 table = small_copy;
411 set_empty_to_minsize(so);
412 }
413 /* else it's a small table that's already empty */
414
415 /* Now we can finally clear things. If C had refcounts, we could
416 * assert that the refcount on table is 1 now, i.e. that this function
417 * has unique access to it, so decref side-effects can't alter it.
418 */
419 for (entry = table; used > 0; entry++) {
420 if (entry->key && entry->key != dummy) {
421 used--;
422 Py_DECREF(entry->key);
423 }
424 }
425
426 if (table_is_malloced)
427 PyMem_DEL(table);
428 return 0;
429 }
430
431 /*
432 * Iterate over a set table. Use like so:
433 *
434 * Py_ssize_t pos;
435 * setentry *entry;
436 * pos = 0; # important! pos should not otherwise be changed by you
437 * while (set_next(yourset, &pos, &entry)) {
438 * Refer to borrowed reference in entry->key.
439 * }
440 *
441 * CAUTION: In general, it isn't safe to use set_next in a loop that
442 * mutates the table.
443 */
444 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)445 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
446 {
447 Py_ssize_t i;
448 Py_ssize_t mask;
449 setentry *entry;
450
451 assert (PyAnySet_Check(so));
452 i = *pos_ptr;
453 assert(i >= 0);
454 mask = so->mask;
455 entry = &so->table[i];
456 while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
457 i++;
458 entry++;
459 }
460 *pos_ptr = i+1;
461 if (i > mask)
462 return 0;
463 assert(entry != NULL);
464 *entry_ptr = entry;
465 return 1;
466 }
467
468 static void
set_dealloc(PySetObject * so)469 set_dealloc(PySetObject *so)
470 {
471 setentry *entry;
472 Py_ssize_t used = so->used;
473
474 /* bpo-31095: UnTrack is needed before calling any callbacks */
475 PyObject_GC_UnTrack(so);
476 Py_TRASHCAN_BEGIN(so, set_dealloc)
477 if (so->weakreflist != NULL)
478 PyObject_ClearWeakRefs((PyObject *) so);
479
480 for (entry = so->table; used > 0; entry++) {
481 if (entry->key && entry->key != dummy) {
482 used--;
483 Py_DECREF(entry->key);
484 }
485 }
486 if (so->table != so->smalltable)
487 PyMem_DEL(so->table);
488 Py_TYPE(so)->tp_free(so);
489 Py_TRASHCAN_END
490 }
491
492 static PyObject *
set_repr(PySetObject * so)493 set_repr(PySetObject *so)
494 {
495 PyObject *result=NULL, *keys, *listrepr, *tmp;
496 int status = Py_ReprEnter((PyObject*)so);
497
498 if (status != 0) {
499 if (status < 0)
500 return NULL;
501 return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
502 }
503
504 /* shortcut for the empty set */
505 if (!so->used) {
506 Py_ReprLeave((PyObject*)so);
507 return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
508 }
509
510 keys = PySequence_List((PyObject *)so);
511 if (keys == NULL)
512 goto done;
513
514 /* repr(keys)[1:-1] */
515 listrepr = PyObject_Repr(keys);
516 Py_DECREF(keys);
517 if (listrepr == NULL)
518 goto done;
519 tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
520 Py_DECREF(listrepr);
521 if (tmp == NULL)
522 goto done;
523 listrepr = tmp;
524
525 if (!Py_IS_TYPE(so, &PySet_Type))
526 result = PyUnicode_FromFormat("%s({%U})",
527 Py_TYPE(so)->tp_name,
528 listrepr);
529 else
530 result = PyUnicode_FromFormat("{%U}", listrepr);
531 Py_DECREF(listrepr);
532 done:
533 Py_ReprLeave((PyObject*)so);
534 return result;
535 }
536
537 static Py_ssize_t
set_len(PyObject * so)538 set_len(PyObject *so)
539 {
540 return ((PySetObject *)so)->used;
541 }
542
543 static int
set_merge(PySetObject * so,PyObject * otherset)544 set_merge(PySetObject *so, PyObject *otherset)
545 {
546 PySetObject *other;
547 PyObject *key;
548 Py_ssize_t i;
549 setentry *so_entry;
550 setentry *other_entry;
551
552 assert (PyAnySet_Check(so));
553 assert (PyAnySet_Check(otherset));
554
555 other = (PySetObject*)otherset;
556 if (other == so || other->used == 0)
557 /* a.update(a) or a.update(set()); nothing to do */
558 return 0;
559 /* Do one big resize at the start, rather than
560 * incrementally resizing as we insert new keys. Expect
561 * that there will be no (or few) overlapping keys.
562 */
563 if ((so->fill + other->used)*5 >= so->mask*3) {
564 if (set_table_resize(so, (so->used + other->used)*2) != 0)
565 return -1;
566 }
567 so_entry = so->table;
568 other_entry = other->table;
569
570 /* If our table is empty, and both tables have the same size, and
571 there are no dummies to eliminate, then just copy the pointers. */
572 if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
573 for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
574 key = other_entry->key;
575 if (key != NULL) {
576 assert(so_entry->key == NULL);
577 Py_INCREF(key);
578 so_entry->key = key;
579 so_entry->hash = other_entry->hash;
580 }
581 }
582 so->fill = other->fill;
583 so->used = other->used;
584 return 0;
585 }
586
587 /* If our table is empty, we can use set_insert_clean() */
588 if (so->fill == 0) {
589 setentry *newtable = so->table;
590 size_t newmask = (size_t)so->mask;
591 so->fill = other->used;
592 so->used = other->used;
593 for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
594 key = other_entry->key;
595 if (key != NULL && key != dummy) {
596 Py_INCREF(key);
597 set_insert_clean(newtable, newmask, key, other_entry->hash);
598 }
599 }
600 return 0;
601 }
602
603 /* We can't assure there are no duplicates, so do normal insertions */
604 for (i = 0; i <= other->mask; i++) {
605 other_entry = &other->table[i];
606 key = other_entry->key;
607 if (key != NULL && key != dummy) {
608 if (set_add_entry(so, key, other_entry->hash))
609 return -1;
610 }
611 }
612 return 0;
613 }
614
615 static PyObject *
set_pop(PySetObject * so,PyObject * Py_UNUSED (ignored))616 set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored))
617 {
618 /* Make sure the search finger is in bounds */
619 setentry *entry = so->table + (so->finger & so->mask);
620 setentry *limit = so->table + so->mask;
621 PyObject *key;
622
623 if (so->used == 0) {
624 PyErr_SetString(PyExc_KeyError, "pop from an empty set");
625 return NULL;
626 }
627 while (entry->key == NULL || entry->key==dummy) {
628 entry++;
629 if (entry > limit)
630 entry = so->table;
631 }
632 key = entry->key;
633 entry->key = dummy;
634 entry->hash = -1;
635 so->used--;
636 so->finger = entry - so->table + 1; /* next place to start */
637 return key;
638 }
639
640 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
641 Raises KeyError if the set is empty.");
642
643 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)644 set_traverse(PySetObject *so, visitproc visit, void *arg)
645 {
646 Py_ssize_t pos = 0;
647 setentry *entry;
648
649 while (set_next(so, &pos, &entry))
650 Py_VISIT(entry->key);
651 return 0;
652 }
653
654 /* Work to increase the bit dispersion for closely spaced hash values.
655 This is important because some use cases have many combinations of a
656 small number of elements with nearby hashes so that many distinct
657 combinations collapse to only a handful of distinct hash values. */
658
659 static Py_uhash_t
_shuffle_bits(Py_uhash_t h)660 _shuffle_bits(Py_uhash_t h)
661 {
662 return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
663 }
664
665 /* Most of the constants in this hash algorithm are randomly chosen
666 large primes with "interesting bit patterns" and that passed tests
667 for good collision statistics on a variety of problematic datasets
668 including powersets and graph structures (such as David Eppstein's
669 graph recipes in Lib/test/test_set.py) */
670
671 static Py_hash_t
frozenset_hash(PyObject * self)672 frozenset_hash(PyObject *self)
673 {
674 PySetObject *so = (PySetObject *)self;
675 Py_uhash_t hash = 0;
676 setentry *entry;
677
678 if (so->hash != -1)
679 return so->hash;
680
681 /* Xor-in shuffled bits from every entry's hash field because xor is
682 commutative and a frozenset hash should be independent of order.
683
684 For speed, include null entries and dummy entries and then
685 subtract out their effect afterwards so that the final hash
686 depends only on active entries. This allows the code to be
687 vectorized by the compiler and it saves the unpredictable
688 branches that would arise when trying to exclude null and dummy
689 entries on every iteration. */
690
691 for (entry = so->table; entry <= &so->table[so->mask]; entry++)
692 hash ^= _shuffle_bits(entry->hash);
693
694 /* Remove the effect of an odd number of NULL entries */
695 if ((so->mask + 1 - so->fill) & 1)
696 hash ^= _shuffle_bits(0);
697
698 /* Remove the effect of an odd number of dummy entries */
699 if ((so->fill - so->used) & 1)
700 hash ^= _shuffle_bits(-1);
701
702 /* Factor in the number of active entries */
703 hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
704
705 /* Disperse patterns arising in nested frozensets */
706 hash ^= (hash >> 11) ^ (hash >> 25);
707 hash = hash * 69069U + 907133923UL;
708
709 /* -1 is reserved as an error code */
710 if (hash == (Py_uhash_t)-1)
711 hash = 590923713UL;
712
713 so->hash = hash;
714 return hash;
715 }
716
717 /***** Set iterator type ***********************************************/
718
719 typedef struct {
720 PyObject_HEAD
721 PySetObject *si_set; /* Set to NULL when iterator is exhausted */
722 Py_ssize_t si_used;
723 Py_ssize_t si_pos;
724 Py_ssize_t len;
725 } setiterobject;
726
727 static void
setiter_dealloc(setiterobject * si)728 setiter_dealloc(setiterobject *si)
729 {
730 /* bpo-31095: UnTrack is needed before calling any callbacks */
731 _PyObject_GC_UNTRACK(si);
732 Py_XDECREF(si->si_set);
733 PyObject_GC_Del(si);
734 }
735
736 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)737 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
738 {
739 Py_VISIT(si->si_set);
740 return 0;
741 }
742
743 static PyObject *
setiter_len(setiterobject * si,PyObject * Py_UNUSED (ignored))744 setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored))
745 {
746 Py_ssize_t len = 0;
747 if (si->si_set != NULL && si->si_used == si->si_set->used)
748 len = si->len;
749 return PyLong_FromSsize_t(len);
750 }
751
752 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
753
754 static PyObject *setiter_iternext(setiterobject *si);
755
756 static PyObject *
setiter_reduce(setiterobject * si,PyObject * Py_UNUSED (ignored))757 setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored))
758 {
759 _Py_IDENTIFIER(iter);
760 /* copy the iterator state */
761 setiterobject tmp = *si;
762 Py_XINCREF(tmp.si_set);
763
764 /* iterate the temporary into a list */
765 PyObject *list = PySequence_List((PyObject*)&tmp);
766 Py_XDECREF(tmp.si_set);
767 if (list == NULL) {
768 return NULL;
769 }
770 return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), list);
771 }
772
773 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
774
775 static PyMethodDef setiter_methods[] = {
776 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
777 {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
778 {NULL, NULL} /* sentinel */
779 };
780
setiter_iternext(setiterobject * si)781 static PyObject *setiter_iternext(setiterobject *si)
782 {
783 PyObject *key;
784 Py_ssize_t i, mask;
785 setentry *entry;
786 PySetObject *so = si->si_set;
787
788 if (so == NULL)
789 return NULL;
790 assert (PyAnySet_Check(so));
791
792 if (si->si_used != so->used) {
793 PyErr_SetString(PyExc_RuntimeError,
794 "Set changed size during iteration");
795 si->si_used = -1; /* Make this state sticky */
796 return NULL;
797 }
798
799 i = si->si_pos;
800 assert(i>=0);
801 entry = so->table;
802 mask = so->mask;
803 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
804 i++;
805 si->si_pos = i+1;
806 if (i > mask)
807 goto fail;
808 si->len--;
809 key = entry[i].key;
810 Py_INCREF(key);
811 return key;
812
813 fail:
814 si->si_set = NULL;
815 Py_DECREF(so);
816 return NULL;
817 }
818
819 PyTypeObject PySetIter_Type = {
820 PyVarObject_HEAD_INIT(&PyType_Type, 0)
821 "set_iterator", /* tp_name */
822 sizeof(setiterobject), /* tp_basicsize */
823 0, /* tp_itemsize */
824 /* methods */
825 (destructor)setiter_dealloc, /* tp_dealloc */
826 0, /* tp_vectorcall_offset */
827 0, /* tp_getattr */
828 0, /* tp_setattr */
829 0, /* tp_as_async */
830 0, /* tp_repr */
831 0, /* tp_as_number */
832 0, /* tp_as_sequence */
833 0, /* tp_as_mapping */
834 0, /* tp_hash */
835 0, /* tp_call */
836 0, /* tp_str */
837 PyObject_GenericGetAttr, /* tp_getattro */
838 0, /* tp_setattro */
839 0, /* tp_as_buffer */
840 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
841 0, /* tp_doc */
842 (traverseproc)setiter_traverse, /* tp_traverse */
843 0, /* tp_clear */
844 0, /* tp_richcompare */
845 0, /* tp_weaklistoffset */
846 PyObject_SelfIter, /* tp_iter */
847 (iternextfunc)setiter_iternext, /* tp_iternext */
848 setiter_methods, /* tp_methods */
849 0,
850 };
851
852 static PyObject *
set_iter(PySetObject * so)853 set_iter(PySetObject *so)
854 {
855 setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
856 if (si == NULL)
857 return NULL;
858 Py_INCREF(so);
859 si->si_set = so;
860 si->si_used = so->used;
861 si->si_pos = 0;
862 si->len = so->used;
863 _PyObject_GC_TRACK(si);
864 return (PyObject *)si;
865 }
866
867 static int
set_update_internal(PySetObject * so,PyObject * other)868 set_update_internal(PySetObject *so, PyObject *other)
869 {
870 PyObject *key, *it;
871
872 if (PyAnySet_Check(other))
873 return set_merge(so, other);
874
875 if (PyDict_CheckExact(other)) {
876 PyObject *value;
877 Py_ssize_t pos = 0;
878 Py_hash_t hash;
879 Py_ssize_t dictsize = PyDict_GET_SIZE(other);
880
881 /* Do one big resize at the start, rather than
882 * incrementally resizing as we insert new keys. Expect
883 * that there will be no (or few) overlapping keys.
884 */
885 if (dictsize < 0)
886 return -1;
887 if ((so->fill + dictsize)*5 >= so->mask*3) {
888 if (set_table_resize(so, (so->used + dictsize)*2) != 0)
889 return -1;
890 }
891 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
892 if (set_add_entry(so, key, hash))
893 return -1;
894 }
895 return 0;
896 }
897
898 it = PyObject_GetIter(other);
899 if (it == NULL)
900 return -1;
901
902 while ((key = PyIter_Next(it)) != NULL) {
903 if (set_add_key(so, key)) {
904 Py_DECREF(it);
905 Py_DECREF(key);
906 return -1;
907 }
908 Py_DECREF(key);
909 }
910 Py_DECREF(it);
911 if (PyErr_Occurred())
912 return -1;
913 return 0;
914 }
915
916 static PyObject *
set_update(PySetObject * so,PyObject * args)917 set_update(PySetObject *so, PyObject *args)
918 {
919 Py_ssize_t i;
920
921 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
922 PyObject *other = PyTuple_GET_ITEM(args, i);
923 if (set_update_internal(so, other))
924 return NULL;
925 }
926 Py_RETURN_NONE;
927 }
928
929 PyDoc_STRVAR(update_doc,
930 "Update a set with the union of itself and others.");
931
932 /* XXX Todo:
933 If aligned memory allocations become available, make the
934 set object 64 byte aligned so that most of the fields
935 can be retrieved or updated in a single cache line.
936 */
937
938 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)939 make_new_set(PyTypeObject *type, PyObject *iterable)
940 {
941 assert(PyType_Check(type));
942 PySetObject *so;
943
944 so = (PySetObject *)type->tp_alloc(type, 0);
945 if (so == NULL)
946 return NULL;
947
948 so->fill = 0;
949 so->used = 0;
950 so->mask = PySet_MINSIZE - 1;
951 so->table = so->smalltable;
952 so->hash = -1;
953 so->finger = 0;
954 so->weakreflist = NULL;
955
956 if (iterable != NULL) {
957 if (set_update_internal(so, iterable)) {
958 Py_DECREF(so);
959 return NULL;
960 }
961 }
962
963 return (PyObject *)so;
964 }
965
966 static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)967 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
968 {
969 if (type != &PySet_Type && type != &PyFrozenSet_Type) {
970 if (PyType_IsSubtype(type, &PySet_Type))
971 type = &PySet_Type;
972 else
973 type = &PyFrozenSet_Type;
974 }
975 return make_new_set(type, iterable);
976 }
977
978 /* The empty frozenset is a singleton */
979 static PyObject *emptyfrozenset = NULL;
980
981 static PyObject *
make_new_frozenset(PyTypeObject * type,PyObject * iterable)982 make_new_frozenset(PyTypeObject *type, PyObject *iterable)
983 {
984 if (type != &PyFrozenSet_Type) {
985 return make_new_set(type, iterable);
986 }
987
988 if (iterable != NULL) {
989 if (PyFrozenSet_CheckExact(iterable)) {
990 /* frozenset(f) is idempotent */
991 Py_INCREF(iterable);
992 return iterable;
993 }
994 PyObject *res = make_new_set((PyTypeObject *)type, iterable);
995 if (res == NULL || PySet_GET_SIZE(res) != 0) {
996 return res;
997 }
998 /* If the created frozenset is empty, return the empty frozenset singleton instead */
999 Py_DECREF(res);
1000 }
1001
1002 // The empty frozenset is a singleton
1003 if (emptyfrozenset == NULL) {
1004 emptyfrozenset = make_new_set((PyTypeObject *)type, NULL);
1005 }
1006 Py_XINCREF(emptyfrozenset);
1007 return emptyfrozenset;
1008 }
1009
1010 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1011 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1012 {
1013 PyObject *iterable = NULL;
1014
1015 if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset", kwds)) {
1016 return NULL;
1017 }
1018
1019 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) {
1020 return NULL;
1021 }
1022
1023 return make_new_frozenset(type, iterable);
1024 }
1025
1026 static PyObject *
frozenset_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1027 frozenset_vectorcall(PyObject *type, PyObject * const*args,
1028 size_t nargsf, PyObject *kwnames)
1029 {
1030 if (!_PyArg_NoKwnames("frozenset", kwnames)) {
1031 return NULL;
1032 }
1033
1034 Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1035 if (!_PyArg_CheckPositional("frozenset", nargs, 0, 1)) {
1036 return NULL;
1037 }
1038
1039 PyObject *iterable = (nargs ? args[0] : NULL);
1040 return make_new_frozenset((PyTypeObject *)type, iterable);
1041 }
1042
1043 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1044 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1045 {
1046 return make_new_set(type, NULL);
1047 }
1048
1049 /* set_swap_bodies() switches the contents of any two sets by moving their
1050 internal data pointers and, if needed, copying the internal smalltables.
1051 Semantically equivalent to:
1052
1053 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1054
1055 The function always succeeds and it leaves both objects in a stable state.
1056 Useful for operations that update in-place (by allowing an intermediate
1057 result to be swapped into one of the original inputs).
1058 */
1059
1060 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1061 set_swap_bodies(PySetObject *a, PySetObject *b)
1062 {
1063 Py_ssize_t t;
1064 setentry *u;
1065 setentry tab[PySet_MINSIZE];
1066 Py_hash_t h;
1067
1068 t = a->fill; a->fill = b->fill; b->fill = t;
1069 t = a->used; a->used = b->used; b->used = t;
1070 t = a->mask; a->mask = b->mask; b->mask = t;
1071
1072 u = a->table;
1073 if (a->table == a->smalltable)
1074 u = b->smalltable;
1075 a->table = b->table;
1076 if (b->table == b->smalltable)
1077 a->table = a->smalltable;
1078 b->table = u;
1079
1080 if (a->table == a->smalltable || b->table == b->smalltable) {
1081 memcpy(tab, a->smalltable, sizeof(tab));
1082 memcpy(a->smalltable, b->smalltable, sizeof(tab));
1083 memcpy(b->smalltable, tab, sizeof(tab));
1084 }
1085
1086 if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) &&
1087 PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1088 h = a->hash; a->hash = b->hash; b->hash = h;
1089 } else {
1090 a->hash = -1;
1091 b->hash = -1;
1092 }
1093 }
1094
1095 static PyObject *
set_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1096 set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1097 {
1098 return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1099 }
1100
1101 static PyObject *
frozenset_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1102 frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1103 {
1104 if (PyFrozenSet_CheckExact(so)) {
1105 Py_INCREF(so);
1106 return (PyObject *)so;
1107 }
1108 return set_copy(so, NULL);
1109 }
1110
1111 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1112
1113 static PyObject *
set_clear(PySetObject * so,PyObject * Py_UNUSED (ignored))1114 set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored))
1115 {
1116 set_clear_internal(so);
1117 Py_RETURN_NONE;
1118 }
1119
1120 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1121
1122 static PyObject *
set_union(PySetObject * so,PyObject * args)1123 set_union(PySetObject *so, PyObject *args)
1124 {
1125 PySetObject *result;
1126 PyObject *other;
1127 Py_ssize_t i;
1128
1129 result = (PySetObject *)set_copy(so, NULL);
1130 if (result == NULL)
1131 return NULL;
1132
1133 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1134 other = PyTuple_GET_ITEM(args, i);
1135 if ((PyObject *)so == other)
1136 continue;
1137 if (set_update_internal(result, other)) {
1138 Py_DECREF(result);
1139 return NULL;
1140 }
1141 }
1142 return (PyObject *)result;
1143 }
1144
1145 PyDoc_STRVAR(union_doc,
1146 "Return the union of sets as a new set.\n\
1147 \n\
1148 (i.e. all elements that are in either set.)");
1149
1150 static PyObject *
set_or(PySetObject * so,PyObject * other)1151 set_or(PySetObject *so, PyObject *other)
1152 {
1153 PySetObject *result;
1154
1155 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1156 Py_RETURN_NOTIMPLEMENTED;
1157
1158 result = (PySetObject *)set_copy(so, NULL);
1159 if (result == NULL)
1160 return NULL;
1161 if ((PyObject *)so == other)
1162 return (PyObject *)result;
1163 if (set_update_internal(result, other)) {
1164 Py_DECREF(result);
1165 return NULL;
1166 }
1167 return (PyObject *)result;
1168 }
1169
1170 static PyObject *
set_ior(PySetObject * so,PyObject * other)1171 set_ior(PySetObject *so, PyObject *other)
1172 {
1173 if (!PyAnySet_Check(other))
1174 Py_RETURN_NOTIMPLEMENTED;
1175
1176 if (set_update_internal(so, other))
1177 return NULL;
1178 Py_INCREF(so);
1179 return (PyObject *)so;
1180 }
1181
1182 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1183 set_intersection(PySetObject *so, PyObject *other)
1184 {
1185 PySetObject *result;
1186 PyObject *key, *it, *tmp;
1187 Py_hash_t hash;
1188 int rv;
1189
1190 if ((PyObject *)so == other)
1191 return set_copy(so, NULL);
1192
1193 result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1194 if (result == NULL)
1195 return NULL;
1196
1197 if (PyAnySet_Check(other)) {
1198 Py_ssize_t pos = 0;
1199 setentry *entry;
1200
1201 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1202 tmp = (PyObject *)so;
1203 so = (PySetObject *)other;
1204 other = tmp;
1205 }
1206
1207 while (set_next((PySetObject *)other, &pos, &entry)) {
1208 key = entry->key;
1209 hash = entry->hash;
1210 rv = set_contains_entry(so, key, hash);
1211 if (rv < 0) {
1212 Py_DECREF(result);
1213 return NULL;
1214 }
1215 if (rv) {
1216 if (set_add_entry(result, key, hash)) {
1217 Py_DECREF(result);
1218 return NULL;
1219 }
1220 }
1221 }
1222 return (PyObject *)result;
1223 }
1224
1225 it = PyObject_GetIter(other);
1226 if (it == NULL) {
1227 Py_DECREF(result);
1228 return NULL;
1229 }
1230
1231 while ((key = PyIter_Next(it)) != NULL) {
1232 hash = PyObject_Hash(key);
1233 if (hash == -1)
1234 goto error;
1235 rv = set_contains_entry(so, key, hash);
1236 if (rv < 0)
1237 goto error;
1238 if (rv) {
1239 if (set_add_entry(result, key, hash))
1240 goto error;
1241 }
1242 Py_DECREF(key);
1243 }
1244 Py_DECREF(it);
1245 if (PyErr_Occurred()) {
1246 Py_DECREF(result);
1247 return NULL;
1248 }
1249 return (PyObject *)result;
1250 error:
1251 Py_DECREF(it);
1252 Py_DECREF(result);
1253 Py_DECREF(key);
1254 return NULL;
1255 }
1256
1257 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1258 set_intersection_multi(PySetObject *so, PyObject *args)
1259 {
1260 Py_ssize_t i;
1261 PyObject *result = (PyObject *)so;
1262
1263 if (PyTuple_GET_SIZE(args) == 0)
1264 return set_copy(so, NULL);
1265
1266 Py_INCREF(so);
1267 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1268 PyObject *other = PyTuple_GET_ITEM(args, i);
1269 PyObject *newresult = set_intersection((PySetObject *)result, other);
1270 if (newresult == NULL) {
1271 Py_DECREF(result);
1272 return NULL;
1273 }
1274 Py_DECREF(result);
1275 result = newresult;
1276 }
1277 return result;
1278 }
1279
1280 PyDoc_STRVAR(intersection_doc,
1281 "Return the intersection of two sets as a new set.\n\
1282 \n\
1283 (i.e. all elements that are in both sets.)");
1284
1285 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1286 set_intersection_update(PySetObject *so, PyObject *other)
1287 {
1288 PyObject *tmp;
1289
1290 tmp = set_intersection(so, other);
1291 if (tmp == NULL)
1292 return NULL;
1293 set_swap_bodies(so, (PySetObject *)tmp);
1294 Py_DECREF(tmp);
1295 Py_RETURN_NONE;
1296 }
1297
1298 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1299 set_intersection_update_multi(PySetObject *so, PyObject *args)
1300 {
1301 PyObject *tmp;
1302
1303 tmp = set_intersection_multi(so, args);
1304 if (tmp == NULL)
1305 return NULL;
1306 set_swap_bodies(so, (PySetObject *)tmp);
1307 Py_DECREF(tmp);
1308 Py_RETURN_NONE;
1309 }
1310
1311 PyDoc_STRVAR(intersection_update_doc,
1312 "Update a set with the intersection of itself and another.");
1313
1314 static PyObject *
set_and(PySetObject * so,PyObject * other)1315 set_and(PySetObject *so, PyObject *other)
1316 {
1317 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1318 Py_RETURN_NOTIMPLEMENTED;
1319 return set_intersection(so, other);
1320 }
1321
1322 static PyObject *
set_iand(PySetObject * so,PyObject * other)1323 set_iand(PySetObject *so, PyObject *other)
1324 {
1325 PyObject *result;
1326
1327 if (!PyAnySet_Check(other))
1328 Py_RETURN_NOTIMPLEMENTED;
1329 result = set_intersection_update(so, other);
1330 if (result == NULL)
1331 return NULL;
1332 Py_DECREF(result);
1333 Py_INCREF(so);
1334 return (PyObject *)so;
1335 }
1336
1337 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1338 set_isdisjoint(PySetObject *so, PyObject *other)
1339 {
1340 PyObject *key, *it, *tmp;
1341 int rv;
1342
1343 if ((PyObject *)so == other) {
1344 if (PySet_GET_SIZE(so) == 0)
1345 Py_RETURN_TRUE;
1346 else
1347 Py_RETURN_FALSE;
1348 }
1349
1350 if (PyAnySet_CheckExact(other)) {
1351 Py_ssize_t pos = 0;
1352 setentry *entry;
1353
1354 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1355 tmp = (PyObject *)so;
1356 so = (PySetObject *)other;
1357 other = tmp;
1358 }
1359 while (set_next((PySetObject *)other, &pos, &entry)) {
1360 rv = set_contains_entry(so, entry->key, entry->hash);
1361 if (rv < 0)
1362 return NULL;
1363 if (rv)
1364 Py_RETURN_FALSE;
1365 }
1366 Py_RETURN_TRUE;
1367 }
1368
1369 it = PyObject_GetIter(other);
1370 if (it == NULL)
1371 return NULL;
1372
1373 while ((key = PyIter_Next(it)) != NULL) {
1374 Py_hash_t hash = PyObject_Hash(key);
1375
1376 if (hash == -1) {
1377 Py_DECREF(key);
1378 Py_DECREF(it);
1379 return NULL;
1380 }
1381 rv = set_contains_entry(so, key, hash);
1382 Py_DECREF(key);
1383 if (rv < 0) {
1384 Py_DECREF(it);
1385 return NULL;
1386 }
1387 if (rv) {
1388 Py_DECREF(it);
1389 Py_RETURN_FALSE;
1390 }
1391 }
1392 Py_DECREF(it);
1393 if (PyErr_Occurred())
1394 return NULL;
1395 Py_RETURN_TRUE;
1396 }
1397
1398 PyDoc_STRVAR(isdisjoint_doc,
1399 "Return True if two sets have a null intersection.");
1400
1401 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1402 set_difference_update_internal(PySetObject *so, PyObject *other)
1403 {
1404 if ((PyObject *)so == other)
1405 return set_clear_internal(so);
1406
1407 if (PyAnySet_Check(other)) {
1408 setentry *entry;
1409 Py_ssize_t pos = 0;
1410
1411 /* Optimization: When the other set is more than 8 times
1412 larger than the base set, replace the other set with
1413 interesection of the two sets.
1414 */
1415 if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) {
1416 other = set_intersection(so, other);
1417 if (other == NULL)
1418 return -1;
1419 } else {
1420 Py_INCREF(other);
1421 }
1422
1423 while (set_next((PySetObject *)other, &pos, &entry))
1424 if (set_discard_entry(so, entry->key, entry->hash) < 0) {
1425 Py_DECREF(other);
1426 return -1;
1427 }
1428
1429 Py_DECREF(other);
1430 } else {
1431 PyObject *key, *it;
1432 it = PyObject_GetIter(other);
1433 if (it == NULL)
1434 return -1;
1435
1436 while ((key = PyIter_Next(it)) != NULL) {
1437 if (set_discard_key(so, key) < 0) {
1438 Py_DECREF(it);
1439 Py_DECREF(key);
1440 return -1;
1441 }
1442 Py_DECREF(key);
1443 }
1444 Py_DECREF(it);
1445 if (PyErr_Occurred())
1446 return -1;
1447 }
1448 /* If more than 1/4th are dummies, then resize them away. */
1449 if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1450 return 0;
1451 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1452 }
1453
1454 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1455 set_difference_update(PySetObject *so, PyObject *args)
1456 {
1457 Py_ssize_t i;
1458
1459 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1460 PyObject *other = PyTuple_GET_ITEM(args, i);
1461 if (set_difference_update_internal(so, other))
1462 return NULL;
1463 }
1464 Py_RETURN_NONE;
1465 }
1466
1467 PyDoc_STRVAR(difference_update_doc,
1468 "Remove all elements of another set from this set.");
1469
1470 static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1471 set_copy_and_difference(PySetObject *so, PyObject *other)
1472 {
1473 PyObject *result;
1474
1475 result = set_copy(so, NULL);
1476 if (result == NULL)
1477 return NULL;
1478 if (set_difference_update_internal((PySetObject *) result, other) == 0)
1479 return result;
1480 Py_DECREF(result);
1481 return NULL;
1482 }
1483
1484 static PyObject *
set_difference(PySetObject * so,PyObject * other)1485 set_difference(PySetObject *so, PyObject *other)
1486 {
1487 PyObject *result;
1488 PyObject *key;
1489 Py_hash_t hash;
1490 setentry *entry;
1491 Py_ssize_t pos = 0, other_size;
1492 int rv;
1493
1494 if (PyAnySet_Check(other)) {
1495 other_size = PySet_GET_SIZE(other);
1496 }
1497 else if (PyDict_CheckExact(other)) {
1498 other_size = PyDict_GET_SIZE(other);
1499 }
1500 else {
1501 return set_copy_and_difference(so, other);
1502 }
1503
1504 /* If len(so) much more than len(other), it's more efficient to simply copy
1505 * so and then iterate other looking for common elements. */
1506 if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1507 return set_copy_and_difference(so, other);
1508 }
1509
1510 result = make_new_set_basetype(Py_TYPE(so), NULL);
1511 if (result == NULL)
1512 return NULL;
1513
1514 if (PyDict_CheckExact(other)) {
1515 while (set_next(so, &pos, &entry)) {
1516 key = entry->key;
1517 hash = entry->hash;
1518 rv = _PyDict_Contains(other, key, hash);
1519 if (rv < 0) {
1520 Py_DECREF(result);
1521 return NULL;
1522 }
1523 if (!rv) {
1524 if (set_add_entry((PySetObject *)result, key, hash)) {
1525 Py_DECREF(result);
1526 return NULL;
1527 }
1528 }
1529 }
1530 return result;
1531 }
1532
1533 /* Iterate over so, checking for common elements in other. */
1534 while (set_next(so, &pos, &entry)) {
1535 key = entry->key;
1536 hash = entry->hash;
1537 rv = set_contains_entry((PySetObject *)other, key, hash);
1538 if (rv < 0) {
1539 Py_DECREF(result);
1540 return NULL;
1541 }
1542 if (!rv) {
1543 if (set_add_entry((PySetObject *)result, key, hash)) {
1544 Py_DECREF(result);
1545 return NULL;
1546 }
1547 }
1548 }
1549 return result;
1550 }
1551
1552 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1553 set_difference_multi(PySetObject *so, PyObject *args)
1554 {
1555 Py_ssize_t i;
1556 PyObject *result, *other;
1557
1558 if (PyTuple_GET_SIZE(args) == 0)
1559 return set_copy(so, NULL);
1560
1561 other = PyTuple_GET_ITEM(args, 0);
1562 result = set_difference(so, other);
1563 if (result == NULL)
1564 return NULL;
1565
1566 for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1567 other = PyTuple_GET_ITEM(args, i);
1568 if (set_difference_update_internal((PySetObject *)result, other)) {
1569 Py_DECREF(result);
1570 return NULL;
1571 }
1572 }
1573 return result;
1574 }
1575
1576 PyDoc_STRVAR(difference_doc,
1577 "Return the difference of two or more sets as a new set.\n\
1578 \n\
1579 (i.e. all elements that are in this set but not the others.)");
1580 static PyObject *
set_sub(PySetObject * so,PyObject * other)1581 set_sub(PySetObject *so, PyObject *other)
1582 {
1583 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1584 Py_RETURN_NOTIMPLEMENTED;
1585 return set_difference(so, other);
1586 }
1587
1588 static PyObject *
set_isub(PySetObject * so,PyObject * other)1589 set_isub(PySetObject *so, PyObject *other)
1590 {
1591 if (!PyAnySet_Check(other))
1592 Py_RETURN_NOTIMPLEMENTED;
1593 if (set_difference_update_internal(so, other))
1594 return NULL;
1595 Py_INCREF(so);
1596 return (PyObject *)so;
1597 }
1598
1599 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1600 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1601 {
1602 PySetObject *otherset;
1603 PyObject *key;
1604 Py_ssize_t pos = 0;
1605 Py_hash_t hash;
1606 setentry *entry;
1607 int rv;
1608
1609 if ((PyObject *)so == other)
1610 return set_clear(so, NULL);
1611
1612 if (PyDict_CheckExact(other)) {
1613 PyObject *value;
1614 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1615 Py_INCREF(key);
1616 rv = set_discard_entry(so, key, hash);
1617 if (rv < 0) {
1618 Py_DECREF(key);
1619 return NULL;
1620 }
1621 if (rv == DISCARD_NOTFOUND) {
1622 if (set_add_entry(so, key, hash)) {
1623 Py_DECREF(key);
1624 return NULL;
1625 }
1626 }
1627 Py_DECREF(key);
1628 }
1629 Py_RETURN_NONE;
1630 }
1631
1632 if (PyAnySet_Check(other)) {
1633 Py_INCREF(other);
1634 otherset = (PySetObject *)other;
1635 } else {
1636 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1637 if (otherset == NULL)
1638 return NULL;
1639 }
1640
1641 while (set_next(otherset, &pos, &entry)) {
1642 key = entry->key;
1643 hash = entry->hash;
1644 rv = set_discard_entry(so, key, hash);
1645 if (rv < 0) {
1646 Py_DECREF(otherset);
1647 return NULL;
1648 }
1649 if (rv == DISCARD_NOTFOUND) {
1650 if (set_add_entry(so, key, hash)) {
1651 Py_DECREF(otherset);
1652 return NULL;
1653 }
1654 }
1655 }
1656 Py_DECREF(otherset);
1657 Py_RETURN_NONE;
1658 }
1659
1660 PyDoc_STRVAR(symmetric_difference_update_doc,
1661 "Update a set with the symmetric difference of itself and another.");
1662
1663 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1664 set_symmetric_difference(PySetObject *so, PyObject *other)
1665 {
1666 PyObject *rv;
1667 PySetObject *otherset;
1668
1669 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1670 if (otherset == NULL)
1671 return NULL;
1672 rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1673 if (rv == NULL) {
1674 Py_DECREF(otherset);
1675 return NULL;
1676 }
1677 Py_DECREF(rv);
1678 return (PyObject *)otherset;
1679 }
1680
1681 PyDoc_STRVAR(symmetric_difference_doc,
1682 "Return the symmetric difference of two sets as a new set.\n\
1683 \n\
1684 (i.e. all elements that are in exactly one of the sets.)");
1685
1686 static PyObject *
set_xor(PySetObject * so,PyObject * other)1687 set_xor(PySetObject *so, PyObject *other)
1688 {
1689 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1690 Py_RETURN_NOTIMPLEMENTED;
1691 return set_symmetric_difference(so, other);
1692 }
1693
1694 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1695 set_ixor(PySetObject *so, PyObject *other)
1696 {
1697 PyObject *result;
1698
1699 if (!PyAnySet_Check(other))
1700 Py_RETURN_NOTIMPLEMENTED;
1701 result = set_symmetric_difference_update(so, other);
1702 if (result == NULL)
1703 return NULL;
1704 Py_DECREF(result);
1705 Py_INCREF(so);
1706 return (PyObject *)so;
1707 }
1708
1709 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1710 set_issubset(PySetObject *so, PyObject *other)
1711 {
1712 setentry *entry;
1713 Py_ssize_t pos = 0;
1714 int rv;
1715
1716 if (!PyAnySet_Check(other)) {
1717 PyObject *tmp, *result;
1718 tmp = make_new_set(&PySet_Type, other);
1719 if (tmp == NULL)
1720 return NULL;
1721 result = set_issubset(so, tmp);
1722 Py_DECREF(tmp);
1723 return result;
1724 }
1725 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1726 Py_RETURN_FALSE;
1727
1728 while (set_next(so, &pos, &entry)) {
1729 rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
1730 if (rv < 0)
1731 return NULL;
1732 if (!rv)
1733 Py_RETURN_FALSE;
1734 }
1735 Py_RETURN_TRUE;
1736 }
1737
1738 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1739
1740 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1741 set_issuperset(PySetObject *so, PyObject *other)
1742 {
1743 PyObject *tmp, *result;
1744
1745 if (!PyAnySet_Check(other)) {
1746 tmp = make_new_set(&PySet_Type, other);
1747 if (tmp == NULL)
1748 return NULL;
1749 result = set_issuperset(so, tmp);
1750 Py_DECREF(tmp);
1751 return result;
1752 }
1753 return set_issubset((PySetObject *)other, (PyObject *)so);
1754 }
1755
1756 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1757
1758 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1759 set_richcompare(PySetObject *v, PyObject *w, int op)
1760 {
1761 PyObject *r1;
1762 int r2;
1763
1764 if(!PyAnySet_Check(w))
1765 Py_RETURN_NOTIMPLEMENTED;
1766
1767 switch (op) {
1768 case Py_EQ:
1769 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1770 Py_RETURN_FALSE;
1771 if (v->hash != -1 &&
1772 ((PySetObject *)w)->hash != -1 &&
1773 v->hash != ((PySetObject *)w)->hash)
1774 Py_RETURN_FALSE;
1775 return set_issubset(v, w);
1776 case Py_NE:
1777 r1 = set_richcompare(v, w, Py_EQ);
1778 if (r1 == NULL)
1779 return NULL;
1780 r2 = PyObject_IsTrue(r1);
1781 Py_DECREF(r1);
1782 if (r2 < 0)
1783 return NULL;
1784 return PyBool_FromLong(!r2);
1785 case Py_LE:
1786 return set_issubset(v, w);
1787 case Py_GE:
1788 return set_issuperset(v, w);
1789 case Py_LT:
1790 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1791 Py_RETURN_FALSE;
1792 return set_issubset(v, w);
1793 case Py_GT:
1794 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1795 Py_RETURN_FALSE;
1796 return set_issuperset(v, w);
1797 }
1798 Py_RETURN_NOTIMPLEMENTED;
1799 }
1800
1801 static PyObject *
set_add(PySetObject * so,PyObject * key)1802 set_add(PySetObject *so, PyObject *key)
1803 {
1804 if (set_add_key(so, key))
1805 return NULL;
1806 Py_RETURN_NONE;
1807 }
1808
1809 PyDoc_STRVAR(add_doc,
1810 "Add an element to a set.\n\
1811 \n\
1812 This has no effect if the element is already present.");
1813
1814 static int
set_contains(PySetObject * so,PyObject * key)1815 set_contains(PySetObject *so, PyObject *key)
1816 {
1817 PyObject *tmpkey;
1818 int rv;
1819
1820 rv = set_contains_key(so, key);
1821 if (rv < 0) {
1822 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1823 return -1;
1824 PyErr_Clear();
1825 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1826 if (tmpkey == NULL)
1827 return -1;
1828 rv = set_contains_key(so, tmpkey);
1829 Py_DECREF(tmpkey);
1830 }
1831 return rv;
1832 }
1833
1834 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1835 set_direct_contains(PySetObject *so, PyObject *key)
1836 {
1837 long result;
1838
1839 result = set_contains(so, key);
1840 if (result < 0)
1841 return NULL;
1842 return PyBool_FromLong(result);
1843 }
1844
1845 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1846
1847 static PyObject *
set_remove(PySetObject * so,PyObject * key)1848 set_remove(PySetObject *so, PyObject *key)
1849 {
1850 PyObject *tmpkey;
1851 int rv;
1852
1853 rv = set_discard_key(so, key);
1854 if (rv < 0) {
1855 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1856 return NULL;
1857 PyErr_Clear();
1858 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1859 if (tmpkey == NULL)
1860 return NULL;
1861 rv = set_discard_key(so, tmpkey);
1862 Py_DECREF(tmpkey);
1863 if (rv < 0)
1864 return NULL;
1865 }
1866
1867 if (rv == DISCARD_NOTFOUND) {
1868 _PyErr_SetKeyError(key);
1869 return NULL;
1870 }
1871 Py_RETURN_NONE;
1872 }
1873
1874 PyDoc_STRVAR(remove_doc,
1875 "Remove an element from a set; it must be a member.\n\
1876 \n\
1877 If the element is not a member, raise a KeyError.");
1878
1879 static PyObject *
set_discard(PySetObject * so,PyObject * key)1880 set_discard(PySetObject *so, PyObject *key)
1881 {
1882 PyObject *tmpkey;
1883 int rv;
1884
1885 rv = set_discard_key(so, key);
1886 if (rv < 0) {
1887 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1888 return NULL;
1889 PyErr_Clear();
1890 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1891 if (tmpkey == NULL)
1892 return NULL;
1893 rv = set_discard_key(so, tmpkey);
1894 Py_DECREF(tmpkey);
1895 if (rv < 0)
1896 return NULL;
1897 }
1898 Py_RETURN_NONE;
1899 }
1900
1901 PyDoc_STRVAR(discard_doc,
1902 "Remove an element from a set if it is a member.\n\
1903 \n\
1904 If the element is not a member, do nothing.");
1905
1906 static PyObject *
set_reduce(PySetObject * so,PyObject * Py_UNUSED (ignored))1907 set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored))
1908 {
1909 PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1910 _Py_IDENTIFIER(__dict__);
1911
1912 keys = PySequence_List((PyObject *)so);
1913 if (keys == NULL)
1914 goto done;
1915 args = PyTuple_Pack(1, keys);
1916 if (args == NULL)
1917 goto done;
1918 if (_PyObject_LookupAttrId((PyObject *)so, &PyId___dict__, &dict) < 0) {
1919 goto done;
1920 }
1921 if (dict == NULL) {
1922 dict = Py_None;
1923 Py_INCREF(dict);
1924 }
1925 result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1926 done:
1927 Py_XDECREF(args);
1928 Py_XDECREF(keys);
1929 Py_XDECREF(dict);
1930 return result;
1931 }
1932
1933 static PyObject *
set_sizeof(PySetObject * so,PyObject * Py_UNUSED (ignored))1934 set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored))
1935 {
1936 Py_ssize_t res;
1937
1938 res = _PyObject_SIZE(Py_TYPE(so));
1939 if (so->table != so->smalltable)
1940 res = res + (so->mask + 1) * sizeof(setentry);
1941 return PyLong_FromSsize_t(res);
1942 }
1943
1944 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1945 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1946 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1947 {
1948 PyObject *iterable = NULL;
1949
1950 if (!_PyArg_NoKeywords("set", kwds))
1951 return -1;
1952 if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
1953 return -1;
1954 if (self->fill)
1955 set_clear_internal(self);
1956 self->hash = -1;
1957 if (iterable == NULL)
1958 return 0;
1959 return set_update_internal(self, iterable);
1960 }
1961
1962 static PyObject*
set_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1963 set_vectorcall(PyObject *type, PyObject * const*args,
1964 size_t nargsf, PyObject *kwnames)
1965 {
1966 assert(PyType_Check(type));
1967
1968 if (!_PyArg_NoKwnames("set", kwnames)) {
1969 return NULL;
1970 }
1971
1972 Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
1973 if (!_PyArg_CheckPositional("set", nargs, 0, 1)) {
1974 return NULL;
1975 }
1976
1977 if (nargs) {
1978 return make_new_set((PyTypeObject *)type, args[0]);
1979 }
1980
1981 return make_new_set((PyTypeObject *)type, NULL);
1982 }
1983
1984 static PySequenceMethods set_as_sequence = {
1985 set_len, /* sq_length */
1986 0, /* sq_concat */
1987 0, /* sq_repeat */
1988 0, /* sq_item */
1989 0, /* sq_slice */
1990 0, /* sq_ass_item */
1991 0, /* sq_ass_slice */
1992 (objobjproc)set_contains, /* sq_contains */
1993 };
1994
1995 /* set object ********************************************************/
1996
1997 #ifdef Py_DEBUG
1998 static PyObject *test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored));
1999
2000 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
2001 All is well if assertions don't fail.");
2002 #endif
2003
2004 static PyMethodDef set_methods[] = {
2005 {"add", (PyCFunction)set_add, METH_O,
2006 add_doc},
2007 {"clear", (PyCFunction)set_clear, METH_NOARGS,
2008 clear_doc},
2009 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2010 contains_doc},
2011 {"copy", (PyCFunction)set_copy, METH_NOARGS,
2012 copy_doc},
2013 {"discard", (PyCFunction)set_discard, METH_O,
2014 discard_doc},
2015 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2016 difference_doc},
2017 {"difference_update", (PyCFunction)set_difference_update, METH_VARARGS,
2018 difference_update_doc},
2019 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2020 intersection_doc},
2021 {"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS,
2022 intersection_update_doc},
2023 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2024 isdisjoint_doc},
2025 {"issubset", (PyCFunction)set_issubset, METH_O,
2026 issubset_doc},
2027 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2028 issuperset_doc},
2029 {"pop", (PyCFunction)set_pop, METH_NOARGS,
2030 pop_doc},
2031 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2032 reduce_doc},
2033 {"remove", (PyCFunction)set_remove, METH_O,
2034 remove_doc},
2035 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2036 sizeof_doc},
2037 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2038 symmetric_difference_doc},
2039 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
2040 symmetric_difference_update_doc},
2041 #ifdef Py_DEBUG
2042 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
2043 test_c_api_doc},
2044 #endif
2045 {"union", (PyCFunction)set_union, METH_VARARGS,
2046 union_doc},
2047 {"update", (PyCFunction)set_update, METH_VARARGS,
2048 update_doc},
2049 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2050 {NULL, NULL} /* sentinel */
2051 };
2052
2053 static PyNumberMethods set_as_number = {
2054 0, /*nb_add*/
2055 (binaryfunc)set_sub, /*nb_subtract*/
2056 0, /*nb_multiply*/
2057 0, /*nb_remainder*/
2058 0, /*nb_divmod*/
2059 0, /*nb_power*/
2060 0, /*nb_negative*/
2061 0, /*nb_positive*/
2062 0, /*nb_absolute*/
2063 0, /*nb_bool*/
2064 0, /*nb_invert*/
2065 0, /*nb_lshift*/
2066 0, /*nb_rshift*/
2067 (binaryfunc)set_and, /*nb_and*/
2068 (binaryfunc)set_xor, /*nb_xor*/
2069 (binaryfunc)set_or, /*nb_or*/
2070 0, /*nb_int*/
2071 0, /*nb_reserved*/
2072 0, /*nb_float*/
2073 0, /*nb_inplace_add*/
2074 (binaryfunc)set_isub, /*nb_inplace_subtract*/
2075 0, /*nb_inplace_multiply*/
2076 0, /*nb_inplace_remainder*/
2077 0, /*nb_inplace_power*/
2078 0, /*nb_inplace_lshift*/
2079 0, /*nb_inplace_rshift*/
2080 (binaryfunc)set_iand, /*nb_inplace_and*/
2081 (binaryfunc)set_ixor, /*nb_inplace_xor*/
2082 (binaryfunc)set_ior, /*nb_inplace_or*/
2083 };
2084
2085 PyDoc_STRVAR(set_doc,
2086 "set() -> new empty set object\n\
2087 set(iterable) -> new set object\n\
2088 \n\
2089 Build an unordered collection of unique elements.");
2090
2091 PyTypeObject PySet_Type = {
2092 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2093 "set", /* tp_name */
2094 sizeof(PySetObject), /* tp_basicsize */
2095 0, /* tp_itemsize */
2096 /* methods */
2097 (destructor)set_dealloc, /* tp_dealloc */
2098 0, /* tp_vectorcall_offset */
2099 0, /* tp_getattr */
2100 0, /* tp_setattr */
2101 0, /* tp_as_async */
2102 (reprfunc)set_repr, /* tp_repr */
2103 &set_as_number, /* tp_as_number */
2104 &set_as_sequence, /* tp_as_sequence */
2105 0, /* tp_as_mapping */
2106 PyObject_HashNotImplemented, /* tp_hash */
2107 0, /* tp_call */
2108 0, /* tp_str */
2109 PyObject_GenericGetAttr, /* tp_getattro */
2110 0, /* tp_setattro */
2111 0, /* tp_as_buffer */
2112 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2113 Py_TPFLAGS_BASETYPE, /* tp_flags */
2114 set_doc, /* tp_doc */
2115 (traverseproc)set_traverse, /* tp_traverse */
2116 (inquiry)set_clear_internal, /* tp_clear */
2117 (richcmpfunc)set_richcompare, /* tp_richcompare */
2118 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2119 (getiterfunc)set_iter, /* tp_iter */
2120 0, /* tp_iternext */
2121 set_methods, /* tp_methods */
2122 0, /* tp_members */
2123 0, /* tp_getset */
2124 0, /* tp_base */
2125 0, /* tp_dict */
2126 0, /* tp_descr_get */
2127 0, /* tp_descr_set */
2128 0, /* tp_dictoffset */
2129 (initproc)set_init, /* tp_init */
2130 PyType_GenericAlloc, /* tp_alloc */
2131 set_new, /* tp_new */
2132 PyObject_GC_Del, /* tp_free */
2133 .tp_vectorcall = set_vectorcall,
2134 };
2135
2136 /* frozenset object ********************************************************/
2137
2138
2139 static PyMethodDef frozenset_methods[] = {
2140 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2141 contains_doc},
2142 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
2143 copy_doc},
2144 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2145 difference_doc},
2146 {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS,
2147 intersection_doc},
2148 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2149 isdisjoint_doc},
2150 {"issubset", (PyCFunction)set_issubset, METH_O,
2151 issubset_doc},
2152 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2153 issuperset_doc},
2154 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2155 reduce_doc},
2156 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2157 sizeof_doc},
2158 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2159 symmetric_difference_doc},
2160 {"union", (PyCFunction)set_union, METH_VARARGS,
2161 union_doc},
2162 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
2163 {NULL, NULL} /* sentinel */
2164 };
2165
2166 static PyNumberMethods frozenset_as_number = {
2167 0, /*nb_add*/
2168 (binaryfunc)set_sub, /*nb_subtract*/
2169 0, /*nb_multiply*/
2170 0, /*nb_remainder*/
2171 0, /*nb_divmod*/
2172 0, /*nb_power*/
2173 0, /*nb_negative*/
2174 0, /*nb_positive*/
2175 0, /*nb_absolute*/
2176 0, /*nb_bool*/
2177 0, /*nb_invert*/
2178 0, /*nb_lshift*/
2179 0, /*nb_rshift*/
2180 (binaryfunc)set_and, /*nb_and*/
2181 (binaryfunc)set_xor, /*nb_xor*/
2182 (binaryfunc)set_or, /*nb_or*/
2183 };
2184
2185 PyDoc_STRVAR(frozenset_doc,
2186 "frozenset() -> empty frozenset object\n\
2187 frozenset(iterable) -> frozenset object\n\
2188 \n\
2189 Build an immutable unordered collection of unique elements.");
2190
2191 PyTypeObject PyFrozenSet_Type = {
2192 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2193 "frozenset", /* tp_name */
2194 sizeof(PySetObject), /* tp_basicsize */
2195 0, /* tp_itemsize */
2196 /* methods */
2197 (destructor)set_dealloc, /* tp_dealloc */
2198 0, /* tp_vectorcall_offset */
2199 0, /* tp_getattr */
2200 0, /* tp_setattr */
2201 0, /* tp_as_async */
2202 (reprfunc)set_repr, /* tp_repr */
2203 &frozenset_as_number, /* tp_as_number */
2204 &set_as_sequence, /* tp_as_sequence */
2205 0, /* tp_as_mapping */
2206 frozenset_hash, /* tp_hash */
2207 0, /* tp_call */
2208 0, /* tp_str */
2209 PyObject_GenericGetAttr, /* tp_getattro */
2210 0, /* tp_setattro */
2211 0, /* tp_as_buffer */
2212 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2213 Py_TPFLAGS_BASETYPE, /* tp_flags */
2214 frozenset_doc, /* tp_doc */
2215 (traverseproc)set_traverse, /* tp_traverse */
2216 (inquiry)set_clear_internal, /* tp_clear */
2217 (richcmpfunc)set_richcompare, /* tp_richcompare */
2218 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2219 (getiterfunc)set_iter, /* tp_iter */
2220 0, /* tp_iternext */
2221 frozenset_methods, /* tp_methods */
2222 0, /* tp_members */
2223 0, /* tp_getset */
2224 0, /* tp_base */
2225 0, /* tp_dict */
2226 0, /* tp_descr_get */
2227 0, /* tp_descr_set */
2228 0, /* tp_dictoffset */
2229 0, /* tp_init */
2230 PyType_GenericAlloc, /* tp_alloc */
2231 frozenset_new, /* tp_new */
2232 PyObject_GC_Del, /* tp_free */
2233 .tp_vectorcall = frozenset_vectorcall,
2234 };
2235
2236
2237 /***** C API functions *************************************************/
2238
2239 PyObject *
PySet_New(PyObject * iterable)2240 PySet_New(PyObject *iterable)
2241 {
2242 return make_new_set(&PySet_Type, iterable);
2243 }
2244
2245 PyObject *
PyFrozenSet_New(PyObject * iterable)2246 PyFrozenSet_New(PyObject *iterable)
2247 {
2248 return make_new_set(&PyFrozenSet_Type, iterable);
2249 }
2250
2251 Py_ssize_t
PySet_Size(PyObject * anyset)2252 PySet_Size(PyObject *anyset)
2253 {
2254 if (!PyAnySet_Check(anyset)) {
2255 PyErr_BadInternalCall();
2256 return -1;
2257 }
2258 return PySet_GET_SIZE(anyset);
2259 }
2260
2261 int
PySet_Clear(PyObject * set)2262 PySet_Clear(PyObject *set)
2263 {
2264 if (!PySet_Check(set)) {
2265 PyErr_BadInternalCall();
2266 return -1;
2267 }
2268 return set_clear_internal((PySetObject *)set);
2269 }
2270
2271 int
PySet_Contains(PyObject * anyset,PyObject * key)2272 PySet_Contains(PyObject *anyset, PyObject *key)
2273 {
2274 if (!PyAnySet_Check(anyset)) {
2275 PyErr_BadInternalCall();
2276 return -1;
2277 }
2278 return set_contains_key((PySetObject *)anyset, key);
2279 }
2280
2281 int
PySet_Discard(PyObject * set,PyObject * key)2282 PySet_Discard(PyObject *set, PyObject *key)
2283 {
2284 if (!PySet_Check(set)) {
2285 PyErr_BadInternalCall();
2286 return -1;
2287 }
2288 return set_discard_key((PySetObject *)set, key);
2289 }
2290
2291 int
PySet_Add(PyObject * anyset,PyObject * key)2292 PySet_Add(PyObject *anyset, PyObject *key)
2293 {
2294 if (!PySet_Check(anyset) &&
2295 (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2296 PyErr_BadInternalCall();
2297 return -1;
2298 }
2299 return set_add_key((PySetObject *)anyset, key);
2300 }
2301
2302 void
_PySet_Fini(void)2303 _PySet_Fini(void)
2304 {
2305 Py_CLEAR(emptyfrozenset);
2306 }
2307
2308 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2309 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2310 {
2311 setentry *entry;
2312
2313 if (!PyAnySet_Check(set)) {
2314 PyErr_BadInternalCall();
2315 return -1;
2316 }
2317 if (set_next((PySetObject *)set, pos, &entry) == 0)
2318 return 0;
2319 *key = entry->key;
2320 *hash = entry->hash;
2321 return 1;
2322 }
2323
2324 PyObject *
PySet_Pop(PyObject * set)2325 PySet_Pop(PyObject *set)
2326 {
2327 if (!PySet_Check(set)) {
2328 PyErr_BadInternalCall();
2329 return NULL;
2330 }
2331 return set_pop((PySetObject *)set, NULL);
2332 }
2333
2334 int
_PySet_Update(PyObject * set,PyObject * iterable)2335 _PySet_Update(PyObject *set, PyObject *iterable)
2336 {
2337 if (!PySet_Check(set)) {
2338 PyErr_BadInternalCall();
2339 return -1;
2340 }
2341 return set_update_internal((PySetObject *)set, iterable);
2342 }
2343
2344 /* Exported for the gdb plugin's benefit. */
2345 PyObject *_PySet_Dummy = dummy;
2346
2347 #ifdef Py_DEBUG
2348
2349 /* Test code to be called with any three element set.
2350 Returns True and original set is restored. */
2351
2352 #define assertRaises(call_return_value, exception) \
2353 do { \
2354 assert(call_return_value); \
2355 assert(PyErr_ExceptionMatches(exception)); \
2356 PyErr_Clear(); \
2357 } while(0)
2358
2359 static PyObject *
test_c_api(PySetObject * so,PyObject * Py_UNUSED (ignored))2360 test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored))
2361 {
2362 Py_ssize_t count;
2363 const char *s;
2364 Py_ssize_t i;
2365 PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2366 PyObject *ob = (PyObject *)so;
2367 Py_hash_t hash;
2368 PyObject *str;
2369
2370 /* Verify preconditions */
2371 assert(PyAnySet_Check(ob));
2372 assert(PyAnySet_CheckExact(ob));
2373 assert(!PyFrozenSet_CheckExact(ob));
2374
2375 /* so.clear(); so |= set("abc"); */
2376 str = PyUnicode_FromString("abc");
2377 if (str == NULL)
2378 return NULL;
2379 set_clear_internal(so);
2380 if (set_update_internal(so, str)) {
2381 Py_DECREF(str);
2382 return NULL;
2383 }
2384 Py_DECREF(str);
2385
2386 /* Exercise type/size checks */
2387 assert(PySet_Size(ob) == 3);
2388 assert(PySet_GET_SIZE(ob) == 3);
2389
2390 /* Raise TypeError for non-iterable constructor arguments */
2391 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2392 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2393
2394 /* Raise TypeError for unhashable key */
2395 dup = PySet_New(ob);
2396 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2397 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2398 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2399
2400 /* Exercise successful pop, contains, add, and discard */
2401 elem = PySet_Pop(ob);
2402 assert(PySet_Contains(ob, elem) == 0);
2403 assert(PySet_GET_SIZE(ob) == 2);
2404 assert(PySet_Add(ob, elem) == 0);
2405 assert(PySet_Contains(ob, elem) == 1);
2406 assert(PySet_GET_SIZE(ob) == 3);
2407 assert(PySet_Discard(ob, elem) == 1);
2408 assert(PySet_GET_SIZE(ob) == 2);
2409 assert(PySet_Discard(ob, elem) == 0);
2410 assert(PySet_GET_SIZE(ob) == 2);
2411
2412 /* Exercise clear */
2413 dup2 = PySet_New(dup);
2414 assert(PySet_Clear(dup2) == 0);
2415 assert(PySet_Size(dup2) == 0);
2416 Py_DECREF(dup2);
2417
2418 /* Raise SystemError on clear or update of frozen set */
2419 f = PyFrozenSet_New(dup);
2420 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2421 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2422 assert(PySet_Add(f, elem) == 0);
2423 Py_INCREF(f);
2424 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2425 Py_DECREF(f);
2426 Py_DECREF(f);
2427
2428 /* Exercise direct iteration */
2429 i = 0, count = 0;
2430 while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2431 s = PyUnicode_AsUTF8(x);
2432 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2433 count++;
2434 }
2435 assert(count == 3);
2436
2437 /* Exercise updates */
2438 dup2 = PySet_New(NULL);
2439 assert(_PySet_Update(dup2, dup) == 0);
2440 assert(PySet_Size(dup2) == 3);
2441 assert(_PySet_Update(dup2, dup) == 0);
2442 assert(PySet_Size(dup2) == 3);
2443 Py_DECREF(dup2);
2444
2445 /* Raise SystemError when self argument is not a set or frozenset. */
2446 t = PyTuple_New(0);
2447 assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2448 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2449 Py_DECREF(t);
2450
2451 /* Raise SystemError when self argument is not a set. */
2452 f = PyFrozenSet_New(dup);
2453 assert(PySet_Size(f) == 3);
2454 assert(PyFrozenSet_CheckExact(f));
2455 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2456 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2457 Py_DECREF(f);
2458
2459 /* Raise KeyError when popping from an empty set */
2460 assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2461 Py_DECREF(ob);
2462 assert(PySet_GET_SIZE(ob) == 0);
2463 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2464
2465 /* Restore the set from the copy using the PyNumber API */
2466 assert(PyNumber_InPlaceOr(ob, dup) == ob);
2467 Py_DECREF(ob);
2468
2469 /* Verify constructors accept NULL arguments */
2470 f = PySet_New(NULL);
2471 assert(f != NULL);
2472 assert(PySet_GET_SIZE(f) == 0);
2473 Py_DECREF(f);
2474 f = PyFrozenSet_New(NULL);
2475 assert(f != NULL);
2476 assert(PyFrozenSet_CheckExact(f));
2477 assert(PySet_GET_SIZE(f) == 0);
2478 Py_DECREF(f);
2479
2480 Py_DECREF(elem);
2481 Py_DECREF(dup);
2482 Py_RETURN_TRUE;
2483 }
2484
2485 #undef assertRaises
2486
2487 #endif
2488
2489 /***** Dummy Struct *************************************************/
2490
2491 static PyObject *
dummy_repr(PyObject * op)2492 dummy_repr(PyObject *op)
2493 {
2494 return PyUnicode_FromString("<dummy key>");
2495 }
2496
2497 static void _Py_NO_RETURN
dummy_dealloc(PyObject * ignore)2498 dummy_dealloc(PyObject* ignore)
2499 {
2500 Py_FatalError("deallocating <dummy key>");
2501 }
2502
2503 static PyTypeObject _PySetDummy_Type = {
2504 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2505 "<dummy key> type",
2506 0,
2507 0,
2508 dummy_dealloc, /*tp_dealloc*/ /*never called*/
2509 0, /*tp_vectorcall_offset*/
2510 0, /*tp_getattr*/
2511 0, /*tp_setattr*/
2512 0, /*tp_as_async*/
2513 dummy_repr, /*tp_repr*/
2514 0, /*tp_as_number*/
2515 0, /*tp_as_sequence*/
2516 0, /*tp_as_mapping*/
2517 0, /*tp_hash */
2518 0, /*tp_call */
2519 0, /*tp_str */
2520 0, /*tp_getattro */
2521 0, /*tp_setattro */
2522 0, /*tp_as_buffer */
2523 Py_TPFLAGS_DEFAULT, /*tp_flags */
2524 };
2525
2526 static PyObject _dummy_struct = {
2527 _PyObject_EXTRA_INIT
2528 2, &_PySetDummy_Type
2529 };
2530
2531