1
2 /* set object implementation
3
4 Written and maintained by Raymond D. Hettinger <python@rcn.com>
5 Derived from Lib/sets.py and Objects/dictobject.c.
6
7 The basic lookup function used by all operations.
8 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9
10 The initial probe index is computed as hash mod the table size.
11 Subsequent probe indices are computed as explained in Objects/dictobject.c.
12
13 To improve cache locality, each probe inspects a series of consecutive
14 nearby entries before moving on to probes elsewhere in memory. This leaves
15 us with a hybrid of linear probing and randomized probing. The linear probing
16 reduces the cost of hash collisions because consecutive memory accesses
17 tend to be much cheaper than scattered probes. After LINEAR_PROBES steps,
18 we then use more of the upper bits from the hash value and apply a simple
19 linear congruential random number genearator. This helps break-up long
20 chains of collisions.
21
22 All arithmetic on hash should ignore overflow.
23
24 Unlike the dictionary implementation, the lookkey function can return
25 NULL if the rich comparison returns an error.
26
27 Use cases for sets differ considerably from dictionaries where looked-up
28 keys are more likely to be present. In contrast, sets are primarily
29 about membership testing where the presence of an element is not known in
30 advance. Accordingly, the set implementation needs to optimize for both
31 the found and not-found case.
32 */
33
34 #include "Python.h"
35 #include "pycore_object.h"
36 #include "pycore_pystate.h"
37 #include "structmember.h"
38
39 /* Object used as dummy key to fill deleted entries */
40 static PyObject _dummy_struct;
41
42 #define dummy (&_dummy_struct)
43
44
45 /* ======================================================================== */
46 /* ======= Begin logic for probing the hash table ========================= */
47
48 /* Set this to zero to turn-off linear probing */
49 #ifndef LINEAR_PROBES
50 #define LINEAR_PROBES 9
51 #endif
52
53 /* This must be >= 1 */
54 #define PERTURB_SHIFT 5
55
56 static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)57 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
58 {
59 setentry *table;
60 setentry *entry;
61 size_t perturb;
62 size_t mask = so->mask;
63 size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
64 size_t j;
65 int cmp;
66
67 entry = &so->table[i];
68 if (entry->key == NULL)
69 return entry;
70
71 perturb = hash;
72
73 while (1) {
74 if (entry->hash == hash) {
75 PyObject *startkey = entry->key;
76 /* startkey cannot be a dummy because the dummy hash field is -1 */
77 assert(startkey != dummy);
78 if (startkey == key)
79 return entry;
80 if (PyUnicode_CheckExact(startkey)
81 && PyUnicode_CheckExact(key)
82 && _PyUnicode_EQ(startkey, key))
83 return entry;
84 table = so->table;
85 Py_INCREF(startkey);
86 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
87 Py_DECREF(startkey);
88 if (cmp < 0) /* unlikely */
89 return NULL;
90 if (table != so->table || entry->key != startkey) /* unlikely */
91 return set_lookkey(so, key, hash);
92 if (cmp > 0) /* likely */
93 return entry;
94 mask = so->mask; /* help avoid a register spill */
95 }
96
97 if (i + LINEAR_PROBES <= mask) {
98 for (j = 0 ; j < LINEAR_PROBES ; j++) {
99 entry++;
100 if (entry->hash == 0 && entry->key == NULL)
101 return entry;
102 if (entry->hash == hash) {
103 PyObject *startkey = entry->key;
104 assert(startkey != dummy);
105 if (startkey == key)
106 return entry;
107 if (PyUnicode_CheckExact(startkey)
108 && PyUnicode_CheckExact(key)
109 && _PyUnicode_EQ(startkey, key))
110 return entry;
111 table = so->table;
112 Py_INCREF(startkey);
113 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
114 Py_DECREF(startkey);
115 if (cmp < 0)
116 return NULL;
117 if (table != so->table || entry->key != startkey)
118 return set_lookkey(so, key, hash);
119 if (cmp > 0)
120 return entry;
121 mask = so->mask;
122 }
123 }
124 }
125
126 perturb >>= PERTURB_SHIFT;
127 i = (i * 5 + 1 + perturb) & mask;
128
129 entry = &so->table[i];
130 if (entry->key == NULL)
131 return entry;
132 }
133 }
134
135 static int set_table_resize(PySetObject *, Py_ssize_t);
136
137 static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)138 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
139 {
140 setentry *table;
141 setentry *freeslot;
142 setentry *entry;
143 size_t perturb;
144 size_t mask;
145 size_t i; /* Unsigned for defined overflow behavior */
146 size_t j;
147 int cmp;
148
149 /* Pre-increment is necessary to prevent arbitrary code in the rich
150 comparison from deallocating the key just before the insertion. */
151 Py_INCREF(key);
152
153 restart:
154
155 mask = so->mask;
156 i = (size_t)hash & mask;
157
158 entry = &so->table[i];
159 if (entry->key == NULL)
160 goto found_unused;
161
162 freeslot = NULL;
163 perturb = hash;
164
165 while (1) {
166 if (entry->hash == hash) {
167 PyObject *startkey = entry->key;
168 /* startkey cannot be a dummy because the dummy hash field is -1 */
169 assert(startkey != dummy);
170 if (startkey == key)
171 goto found_active;
172 if (PyUnicode_CheckExact(startkey)
173 && PyUnicode_CheckExact(key)
174 && _PyUnicode_EQ(startkey, key))
175 goto found_active;
176 table = so->table;
177 Py_INCREF(startkey);
178 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
179 Py_DECREF(startkey);
180 if (cmp > 0) /* likely */
181 goto found_active;
182 if (cmp < 0)
183 goto comparison_error;
184 /* Continuing the search from the current entry only makes
185 sense if the table and entry are unchanged; otherwise,
186 we have to restart from the beginning */
187 if (table != so->table || entry->key != startkey)
188 goto restart;
189 mask = so->mask; /* help avoid a register spill */
190 }
191 else if (entry->hash == -1)
192 freeslot = entry;
193
194 if (i + LINEAR_PROBES <= mask) {
195 for (j = 0 ; j < LINEAR_PROBES ; j++) {
196 entry++;
197 if (entry->hash == 0 && entry->key == NULL)
198 goto found_unused_or_dummy;
199 if (entry->hash == hash) {
200 PyObject *startkey = entry->key;
201 assert(startkey != dummy);
202 if (startkey == key)
203 goto found_active;
204 if (PyUnicode_CheckExact(startkey)
205 && PyUnicode_CheckExact(key)
206 && _PyUnicode_EQ(startkey, key))
207 goto found_active;
208 table = so->table;
209 Py_INCREF(startkey);
210 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
211 Py_DECREF(startkey);
212 if (cmp > 0)
213 goto found_active;
214 if (cmp < 0)
215 goto comparison_error;
216 if (table != so->table || entry->key != startkey)
217 goto restart;
218 mask = so->mask;
219 }
220 else if (entry->hash == -1)
221 freeslot = entry;
222 }
223 }
224
225 perturb >>= PERTURB_SHIFT;
226 i = (i * 5 + 1 + perturb) & mask;
227
228 entry = &so->table[i];
229 if (entry->key == NULL)
230 goto found_unused_or_dummy;
231 }
232
233 found_unused_or_dummy:
234 if (freeslot == NULL)
235 goto found_unused;
236 so->used++;
237 freeslot->key = key;
238 freeslot->hash = hash;
239 return 0;
240
241 found_unused:
242 so->fill++;
243 so->used++;
244 entry->key = key;
245 entry->hash = hash;
246 if ((size_t)so->fill*5 < mask*3)
247 return 0;
248 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
249
250 found_active:
251 Py_DECREF(key);
252 return 0;
253
254 comparison_error:
255 Py_DECREF(key);
256 return -1;
257 }
258
259 /*
260 Internal routine used by set_table_resize() to insert an item which is
261 known to be absent from the set. This routine also assumes that
262 the set contains no deleted entries. Besides the performance benefit,
263 there is also safety benefit since using set_add_entry() risks making
264 a callback in the middle of a set_table_resize(), see issue 1456209.
265 The caller is responsible for updating the key's reference count and
266 the setobject's fill and used fields.
267 */
268 static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)269 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
270 {
271 setentry *entry;
272 size_t perturb = hash;
273 size_t i = (size_t)hash & mask;
274 size_t j;
275
276 while (1) {
277 entry = &table[i];
278 if (entry->key == NULL)
279 goto found_null;
280 if (i + LINEAR_PROBES <= mask) {
281 for (j = 0; j < LINEAR_PROBES; j++) {
282 entry++;
283 if (entry->key == NULL)
284 goto found_null;
285 }
286 }
287 perturb >>= PERTURB_SHIFT;
288 i = (i * 5 + 1 + perturb) & mask;
289 }
290 found_null:
291 entry->key = key;
292 entry->hash = hash;
293 }
294
295 /* ======== End logic for probing the hash table ========================== */
296 /* ======================================================================== */
297
298 /*
299 Restructure the table by allocating a new table and reinserting all
300 keys again. When entries have been deleted, the new table may
301 actually be smaller than the old one.
302 */
303 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)304 set_table_resize(PySetObject *so, Py_ssize_t minused)
305 {
306 setentry *oldtable, *newtable, *entry;
307 Py_ssize_t oldmask = so->mask;
308 size_t newmask;
309 int is_oldtable_malloced;
310 setentry small_copy[PySet_MINSIZE];
311
312 assert(minused >= 0);
313
314 /* Find the smallest table size > minused. */
315 /* XXX speed-up with intrinsics */
316 size_t newsize = PySet_MINSIZE;
317 while (newsize <= (size_t)minused) {
318 newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
319 }
320
321 /* Get space for a new table. */
322 oldtable = so->table;
323 assert(oldtable != NULL);
324 is_oldtable_malloced = oldtable != so->smalltable;
325
326 if (newsize == PySet_MINSIZE) {
327 /* A large table is shrinking, or we can't get any smaller. */
328 newtable = so->smalltable;
329 if (newtable == oldtable) {
330 if (so->fill == so->used) {
331 /* No dummies, so no point doing anything. */
332 return 0;
333 }
334 /* We're not going to resize it, but rebuild the
335 table anyway to purge old dummy entries.
336 Subtle: This is *necessary* if fill==size,
337 as set_lookkey needs at least one virgin slot to
338 terminate failing searches. If fill < size, it's
339 merely desirable, as dummies slow searches. */
340 assert(so->fill > so->used);
341 memcpy(small_copy, oldtable, sizeof(small_copy));
342 oldtable = small_copy;
343 }
344 }
345 else {
346 newtable = PyMem_NEW(setentry, newsize);
347 if (newtable == NULL) {
348 PyErr_NoMemory();
349 return -1;
350 }
351 }
352
353 /* Make the set empty, using the new table. */
354 assert(newtable != oldtable);
355 memset(newtable, 0, sizeof(setentry) * newsize);
356 so->mask = newsize - 1;
357 so->table = newtable;
358
359 /* Copy the data over; this is refcount-neutral for active entries;
360 dummy entries aren't copied over, of course */
361 newmask = (size_t)so->mask;
362 if (so->fill == so->used) {
363 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
364 if (entry->key != NULL) {
365 set_insert_clean(newtable, newmask, entry->key, entry->hash);
366 }
367 }
368 } else {
369 so->fill = so->used;
370 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
371 if (entry->key != NULL && entry->key != dummy) {
372 set_insert_clean(newtable, newmask, entry->key, entry->hash);
373 }
374 }
375 }
376
377 if (is_oldtable_malloced)
378 PyMem_DEL(oldtable);
379 return 0;
380 }
381
382 static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)383 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
384 {
385 setentry *entry;
386
387 entry = set_lookkey(so, key, hash);
388 if (entry != NULL)
389 return entry->key != NULL;
390 return -1;
391 }
392
393 #define DISCARD_NOTFOUND 0
394 #define DISCARD_FOUND 1
395
396 static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)397 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
398 {
399 setentry *entry;
400 PyObject *old_key;
401
402 entry = set_lookkey(so, key, hash);
403 if (entry == NULL)
404 return -1;
405 if (entry->key == NULL)
406 return DISCARD_NOTFOUND;
407 old_key = entry->key;
408 entry->key = dummy;
409 entry->hash = -1;
410 so->used--;
411 Py_DECREF(old_key);
412 return DISCARD_FOUND;
413 }
414
415 static int
set_add_key(PySetObject * so,PyObject * key)416 set_add_key(PySetObject *so, PyObject *key)
417 {
418 Py_hash_t hash;
419
420 if (!PyUnicode_CheckExact(key) ||
421 (hash = ((PyASCIIObject *) key)->hash) == -1) {
422 hash = PyObject_Hash(key);
423 if (hash == -1)
424 return -1;
425 }
426 return set_add_entry(so, key, hash);
427 }
428
429 static int
set_contains_key(PySetObject * so,PyObject * key)430 set_contains_key(PySetObject *so, PyObject *key)
431 {
432 Py_hash_t hash;
433
434 if (!PyUnicode_CheckExact(key) ||
435 (hash = ((PyASCIIObject *) key)->hash) == -1) {
436 hash = PyObject_Hash(key);
437 if (hash == -1)
438 return -1;
439 }
440 return set_contains_entry(so, key, hash);
441 }
442
443 static int
set_discard_key(PySetObject * so,PyObject * key)444 set_discard_key(PySetObject *so, PyObject *key)
445 {
446 Py_hash_t hash;
447
448 if (!PyUnicode_CheckExact(key) ||
449 (hash = ((PyASCIIObject *) key)->hash) == -1) {
450 hash = PyObject_Hash(key);
451 if (hash == -1)
452 return -1;
453 }
454 return set_discard_entry(so, key, hash);
455 }
456
457 static void
set_empty_to_minsize(PySetObject * so)458 set_empty_to_minsize(PySetObject *so)
459 {
460 memset(so->smalltable, 0, sizeof(so->smalltable));
461 so->fill = 0;
462 so->used = 0;
463 so->mask = PySet_MINSIZE - 1;
464 so->table = so->smalltable;
465 so->hash = -1;
466 }
467
468 static int
set_clear_internal(PySetObject * so)469 set_clear_internal(PySetObject *so)
470 {
471 setentry *entry;
472 setentry *table = so->table;
473 Py_ssize_t fill = so->fill;
474 Py_ssize_t used = so->used;
475 int table_is_malloced = table != so->smalltable;
476 setentry small_copy[PySet_MINSIZE];
477
478 assert (PyAnySet_Check(so));
479 assert(table != NULL);
480
481 /* This is delicate. During the process of clearing the set,
482 * decrefs can cause the set to mutate. To avoid fatal confusion
483 * (voice of experience), we have to make the set empty before
484 * clearing the slots, and never refer to anything via so->ref while
485 * clearing.
486 */
487 if (table_is_malloced)
488 set_empty_to_minsize(so);
489
490 else if (fill > 0) {
491 /* It's a small table with something that needs to be cleared.
492 * Afraid the only safe way is to copy the set entries into
493 * another small table first.
494 */
495 memcpy(small_copy, table, sizeof(small_copy));
496 table = small_copy;
497 set_empty_to_minsize(so);
498 }
499 /* else it's a small table that's already empty */
500
501 /* Now we can finally clear things. If C had refcounts, we could
502 * assert that the refcount on table is 1 now, i.e. that this function
503 * has unique access to it, so decref side-effects can't alter it.
504 */
505 for (entry = table; used > 0; entry++) {
506 if (entry->key && entry->key != dummy) {
507 used--;
508 Py_DECREF(entry->key);
509 }
510 }
511
512 if (table_is_malloced)
513 PyMem_DEL(table);
514 return 0;
515 }
516
517 /*
518 * Iterate over a set table. Use like so:
519 *
520 * Py_ssize_t pos;
521 * setentry *entry;
522 * pos = 0; # important! pos should not otherwise be changed by you
523 * while (set_next(yourset, &pos, &entry)) {
524 * Refer to borrowed reference in entry->key.
525 * }
526 *
527 * CAUTION: In general, it isn't safe to use set_next in a loop that
528 * mutates the table.
529 */
530 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)531 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
532 {
533 Py_ssize_t i;
534 Py_ssize_t mask;
535 setentry *entry;
536
537 assert (PyAnySet_Check(so));
538 i = *pos_ptr;
539 assert(i >= 0);
540 mask = so->mask;
541 entry = &so->table[i];
542 while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
543 i++;
544 entry++;
545 }
546 *pos_ptr = i+1;
547 if (i > mask)
548 return 0;
549 assert(entry != NULL);
550 *entry_ptr = entry;
551 return 1;
552 }
553
554 static void
set_dealloc(PySetObject * so)555 set_dealloc(PySetObject *so)
556 {
557 setentry *entry;
558 Py_ssize_t used = so->used;
559
560 /* bpo-31095: UnTrack is needed before calling any callbacks */
561 PyObject_GC_UnTrack(so);
562 Py_TRASHCAN_BEGIN(so, set_dealloc)
563 if (so->weakreflist != NULL)
564 PyObject_ClearWeakRefs((PyObject *) so);
565
566 for (entry = so->table; used > 0; entry++) {
567 if (entry->key && entry->key != dummy) {
568 used--;
569 Py_DECREF(entry->key);
570 }
571 }
572 if (so->table != so->smalltable)
573 PyMem_DEL(so->table);
574 Py_TYPE(so)->tp_free(so);
575 Py_TRASHCAN_END
576 }
577
578 static PyObject *
set_repr(PySetObject * so)579 set_repr(PySetObject *so)
580 {
581 PyObject *result=NULL, *keys, *listrepr, *tmp;
582 int status = Py_ReprEnter((PyObject*)so);
583
584 if (status != 0) {
585 if (status < 0)
586 return NULL;
587 return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
588 }
589
590 /* shortcut for the empty set */
591 if (!so->used) {
592 Py_ReprLeave((PyObject*)so);
593 return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
594 }
595
596 keys = PySequence_List((PyObject *)so);
597 if (keys == NULL)
598 goto done;
599
600 /* repr(keys)[1:-1] */
601 listrepr = PyObject_Repr(keys);
602 Py_DECREF(keys);
603 if (listrepr == NULL)
604 goto done;
605 tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
606 Py_DECREF(listrepr);
607 if (tmp == NULL)
608 goto done;
609 listrepr = tmp;
610
611 if (Py_TYPE(so) != &PySet_Type)
612 result = PyUnicode_FromFormat("%s({%U})",
613 Py_TYPE(so)->tp_name,
614 listrepr);
615 else
616 result = PyUnicode_FromFormat("{%U}", listrepr);
617 Py_DECREF(listrepr);
618 done:
619 Py_ReprLeave((PyObject*)so);
620 return result;
621 }
622
623 static Py_ssize_t
set_len(PyObject * so)624 set_len(PyObject *so)
625 {
626 return ((PySetObject *)so)->used;
627 }
628
629 static int
set_merge(PySetObject * so,PyObject * otherset)630 set_merge(PySetObject *so, PyObject *otherset)
631 {
632 PySetObject *other;
633 PyObject *key;
634 Py_ssize_t i;
635 setentry *so_entry;
636 setentry *other_entry;
637
638 assert (PyAnySet_Check(so));
639 assert (PyAnySet_Check(otherset));
640
641 other = (PySetObject*)otherset;
642 if (other == so || other->used == 0)
643 /* a.update(a) or a.update(set()); nothing to do */
644 return 0;
645 /* Do one big resize at the start, rather than
646 * incrementally resizing as we insert new keys. Expect
647 * that there will be no (or few) overlapping keys.
648 */
649 if ((so->fill + other->used)*5 >= so->mask*3) {
650 if (set_table_resize(so, (so->used + other->used)*2) != 0)
651 return -1;
652 }
653 so_entry = so->table;
654 other_entry = other->table;
655
656 /* If our table is empty, and both tables have the same size, and
657 there are no dummies to eliminate, then just copy the pointers. */
658 if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
659 for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
660 key = other_entry->key;
661 if (key != NULL) {
662 assert(so_entry->key == NULL);
663 Py_INCREF(key);
664 so_entry->key = key;
665 so_entry->hash = other_entry->hash;
666 }
667 }
668 so->fill = other->fill;
669 so->used = other->used;
670 return 0;
671 }
672
673 /* If our table is empty, we can use set_insert_clean() */
674 if (so->fill == 0) {
675 setentry *newtable = so->table;
676 size_t newmask = (size_t)so->mask;
677 so->fill = other->used;
678 so->used = other->used;
679 for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
680 key = other_entry->key;
681 if (key != NULL && key != dummy) {
682 Py_INCREF(key);
683 set_insert_clean(newtable, newmask, key, other_entry->hash);
684 }
685 }
686 return 0;
687 }
688
689 /* We can't assure there are no duplicates, so do normal insertions */
690 for (i = 0; i <= other->mask; i++) {
691 other_entry = &other->table[i];
692 key = other_entry->key;
693 if (key != NULL && key != dummy) {
694 if (set_add_entry(so, key, other_entry->hash))
695 return -1;
696 }
697 }
698 return 0;
699 }
700
701 static PyObject *
set_pop(PySetObject * so,PyObject * Py_UNUSED (ignored))702 set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored))
703 {
704 /* Make sure the search finger is in bounds */
705 setentry *entry = so->table + (so->finger & so->mask);
706 setentry *limit = so->table + so->mask;
707 PyObject *key;
708
709 if (so->used == 0) {
710 PyErr_SetString(PyExc_KeyError, "pop from an empty set");
711 return NULL;
712 }
713 while (entry->key == NULL || entry->key==dummy) {
714 entry++;
715 if (entry > limit)
716 entry = so->table;
717 }
718 key = entry->key;
719 entry->key = dummy;
720 entry->hash = -1;
721 so->used--;
722 so->finger = entry - so->table + 1; /* next place to start */
723 return key;
724 }
725
726 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
727 Raises KeyError if the set is empty.");
728
729 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)730 set_traverse(PySetObject *so, visitproc visit, void *arg)
731 {
732 Py_ssize_t pos = 0;
733 setentry *entry;
734
735 while (set_next(so, &pos, &entry))
736 Py_VISIT(entry->key);
737 return 0;
738 }
739
740 /* Work to increase the bit dispersion for closely spaced hash values.
741 This is important because some use cases have many combinations of a
742 small number of elements with nearby hashes so that many distinct
743 combinations collapse to only a handful of distinct hash values. */
744
745 static Py_uhash_t
_shuffle_bits(Py_uhash_t h)746 _shuffle_bits(Py_uhash_t h)
747 {
748 return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
749 }
750
751 /* Most of the constants in this hash algorithm are randomly chosen
752 large primes with "interesting bit patterns" and that passed tests
753 for good collision statistics on a variety of problematic datasets
754 including powersets and graph structures (such as David Eppstein's
755 graph recipes in Lib/test/test_set.py) */
756
757 static Py_hash_t
frozenset_hash(PyObject * self)758 frozenset_hash(PyObject *self)
759 {
760 PySetObject *so = (PySetObject *)self;
761 Py_uhash_t hash = 0;
762 setentry *entry;
763
764 if (so->hash != -1)
765 return so->hash;
766
767 /* Xor-in shuffled bits from every entry's hash field because xor is
768 commutative and a frozenset hash should be independent of order.
769
770 For speed, include null entries and dummy entries and then
771 subtract out their effect afterwards so that the final hash
772 depends only on active entries. This allows the code to be
773 vectorized by the compiler and it saves the unpredictable
774 branches that would arise when trying to exclude null and dummy
775 entries on every iteration. */
776
777 for (entry = so->table; entry <= &so->table[so->mask]; entry++)
778 hash ^= _shuffle_bits(entry->hash);
779
780 /* Remove the effect of an odd number of NULL entries */
781 if ((so->mask + 1 - so->fill) & 1)
782 hash ^= _shuffle_bits(0);
783
784 /* Remove the effect of an odd number of dummy entries */
785 if ((so->fill - so->used) & 1)
786 hash ^= _shuffle_bits(-1);
787
788 /* Factor in the number of active entries */
789 hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
790
791 /* Disperse patterns arising in nested frozensets */
792 hash ^= (hash >> 11) ^ (hash >> 25);
793 hash = hash * 69069U + 907133923UL;
794
795 /* -1 is reserved as an error code */
796 if (hash == (Py_uhash_t)-1)
797 hash = 590923713UL;
798
799 so->hash = hash;
800 return hash;
801 }
802
803 /***** Set iterator type ***********************************************/
804
805 typedef struct {
806 PyObject_HEAD
807 PySetObject *si_set; /* Set to NULL when iterator is exhausted */
808 Py_ssize_t si_used;
809 Py_ssize_t si_pos;
810 Py_ssize_t len;
811 } setiterobject;
812
813 static void
setiter_dealloc(setiterobject * si)814 setiter_dealloc(setiterobject *si)
815 {
816 /* bpo-31095: UnTrack is needed before calling any callbacks */
817 _PyObject_GC_UNTRACK(si);
818 Py_XDECREF(si->si_set);
819 PyObject_GC_Del(si);
820 }
821
822 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)823 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
824 {
825 Py_VISIT(si->si_set);
826 return 0;
827 }
828
829 static PyObject *
setiter_len(setiterobject * si,PyObject * Py_UNUSED (ignored))830 setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored))
831 {
832 Py_ssize_t len = 0;
833 if (si->si_set != NULL && si->si_used == si->si_set->used)
834 len = si->len;
835 return PyLong_FromSsize_t(len);
836 }
837
838 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
839
840 static PyObject *setiter_iternext(setiterobject *si);
841
842 static PyObject *
setiter_reduce(setiterobject * si,PyObject * Py_UNUSED (ignored))843 setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored))
844 {
845 _Py_IDENTIFIER(iter);
846 /* copy the iterator state */
847 setiterobject tmp = *si;
848 Py_XINCREF(tmp.si_set);
849
850 /* iterate the temporary into a list */
851 PyObject *list = PySequence_List((PyObject*)&tmp);
852 Py_XDECREF(tmp.si_set);
853 if (list == NULL) {
854 return NULL;
855 }
856 return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), list);
857 }
858
859 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
860
861 static PyMethodDef setiter_methods[] = {
862 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
863 {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
864 {NULL, NULL} /* sentinel */
865 };
866
setiter_iternext(setiterobject * si)867 static PyObject *setiter_iternext(setiterobject *si)
868 {
869 PyObject *key;
870 Py_ssize_t i, mask;
871 setentry *entry;
872 PySetObject *so = si->si_set;
873
874 if (so == NULL)
875 return NULL;
876 assert (PyAnySet_Check(so));
877
878 if (si->si_used != so->used) {
879 PyErr_SetString(PyExc_RuntimeError,
880 "Set changed size during iteration");
881 si->si_used = -1; /* Make this state sticky */
882 return NULL;
883 }
884
885 i = si->si_pos;
886 assert(i>=0);
887 entry = so->table;
888 mask = so->mask;
889 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
890 i++;
891 si->si_pos = i+1;
892 if (i > mask)
893 goto fail;
894 si->len--;
895 key = entry[i].key;
896 Py_INCREF(key);
897 return key;
898
899 fail:
900 si->si_set = NULL;
901 Py_DECREF(so);
902 return NULL;
903 }
904
905 PyTypeObject PySetIter_Type = {
906 PyVarObject_HEAD_INIT(&PyType_Type, 0)
907 "set_iterator", /* tp_name */
908 sizeof(setiterobject), /* tp_basicsize */
909 0, /* tp_itemsize */
910 /* methods */
911 (destructor)setiter_dealloc, /* tp_dealloc */
912 0, /* tp_vectorcall_offset */
913 0, /* tp_getattr */
914 0, /* tp_setattr */
915 0, /* tp_as_async */
916 0, /* tp_repr */
917 0, /* tp_as_number */
918 0, /* tp_as_sequence */
919 0, /* tp_as_mapping */
920 0, /* tp_hash */
921 0, /* tp_call */
922 0, /* tp_str */
923 PyObject_GenericGetAttr, /* tp_getattro */
924 0, /* tp_setattro */
925 0, /* tp_as_buffer */
926 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
927 0, /* tp_doc */
928 (traverseproc)setiter_traverse, /* tp_traverse */
929 0, /* tp_clear */
930 0, /* tp_richcompare */
931 0, /* tp_weaklistoffset */
932 PyObject_SelfIter, /* tp_iter */
933 (iternextfunc)setiter_iternext, /* tp_iternext */
934 setiter_methods, /* tp_methods */
935 0,
936 };
937
938 static PyObject *
set_iter(PySetObject * so)939 set_iter(PySetObject *so)
940 {
941 setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
942 if (si == NULL)
943 return NULL;
944 Py_INCREF(so);
945 si->si_set = so;
946 si->si_used = so->used;
947 si->si_pos = 0;
948 si->len = so->used;
949 _PyObject_GC_TRACK(si);
950 return (PyObject *)si;
951 }
952
953 static int
set_update_internal(PySetObject * so,PyObject * other)954 set_update_internal(PySetObject *so, PyObject *other)
955 {
956 PyObject *key, *it;
957
958 if (PyAnySet_Check(other))
959 return set_merge(so, other);
960
961 if (PyDict_CheckExact(other)) {
962 PyObject *value;
963 Py_ssize_t pos = 0;
964 Py_hash_t hash;
965 Py_ssize_t dictsize = PyDict_GET_SIZE(other);
966
967 /* Do one big resize at the start, rather than
968 * incrementally resizing as we insert new keys. Expect
969 * that there will be no (or few) overlapping keys.
970 */
971 if (dictsize < 0)
972 return -1;
973 if ((so->fill + dictsize)*5 >= so->mask*3) {
974 if (set_table_resize(so, (so->used + dictsize)*2) != 0)
975 return -1;
976 }
977 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
978 if (set_add_entry(so, key, hash))
979 return -1;
980 }
981 return 0;
982 }
983
984 it = PyObject_GetIter(other);
985 if (it == NULL)
986 return -1;
987
988 while ((key = PyIter_Next(it)) != NULL) {
989 if (set_add_key(so, key)) {
990 Py_DECREF(it);
991 Py_DECREF(key);
992 return -1;
993 }
994 Py_DECREF(key);
995 }
996 Py_DECREF(it);
997 if (PyErr_Occurred())
998 return -1;
999 return 0;
1000 }
1001
1002 static PyObject *
set_update(PySetObject * so,PyObject * args)1003 set_update(PySetObject *so, PyObject *args)
1004 {
1005 Py_ssize_t i;
1006
1007 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1008 PyObject *other = PyTuple_GET_ITEM(args, i);
1009 if (set_update_internal(so, other))
1010 return NULL;
1011 }
1012 Py_RETURN_NONE;
1013 }
1014
1015 PyDoc_STRVAR(update_doc,
1016 "Update a set with the union of itself and others.");
1017
1018 /* XXX Todo:
1019 If aligned memory allocations become available, make the
1020 set object 64 byte aligned so that most of the fields
1021 can be retrieved or updated in a single cache line.
1022 */
1023
1024 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)1025 make_new_set(PyTypeObject *type, PyObject *iterable)
1026 {
1027 PySetObject *so;
1028
1029 so = (PySetObject *)type->tp_alloc(type, 0);
1030 if (so == NULL)
1031 return NULL;
1032
1033 so->fill = 0;
1034 so->used = 0;
1035 so->mask = PySet_MINSIZE - 1;
1036 so->table = so->smalltable;
1037 so->hash = -1;
1038 so->finger = 0;
1039 so->weakreflist = NULL;
1040
1041 if (iterable != NULL) {
1042 if (set_update_internal(so, iterable)) {
1043 Py_DECREF(so);
1044 return NULL;
1045 }
1046 }
1047
1048 return (PyObject *)so;
1049 }
1050
1051 static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)1052 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
1053 {
1054 if (type != &PySet_Type && type != &PyFrozenSet_Type) {
1055 if (PyType_IsSubtype(type, &PySet_Type))
1056 type = &PySet_Type;
1057 else
1058 type = &PyFrozenSet_Type;
1059 }
1060 return make_new_set(type, iterable);
1061 }
1062
1063 /* The empty frozenset is a singleton */
1064 static PyObject *emptyfrozenset = NULL;
1065
1066 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1067 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1068 {
1069 PyObject *iterable = NULL, *result;
1070
1071 if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset", kwds))
1072 return NULL;
1073
1074 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
1075 return NULL;
1076
1077 if (type != &PyFrozenSet_Type)
1078 return make_new_set(type, iterable);
1079
1080 if (iterable != NULL) {
1081 /* frozenset(f) is idempotent */
1082 if (PyFrozenSet_CheckExact(iterable)) {
1083 Py_INCREF(iterable);
1084 return iterable;
1085 }
1086 result = make_new_set(type, iterable);
1087 if (result == NULL || PySet_GET_SIZE(result))
1088 return result;
1089 Py_DECREF(result);
1090 }
1091 /* The empty frozenset is a singleton */
1092 if (emptyfrozenset == NULL)
1093 emptyfrozenset = make_new_set(type, NULL);
1094 Py_XINCREF(emptyfrozenset);
1095 return emptyfrozenset;
1096 }
1097
1098 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1099 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1100 {
1101 return make_new_set(type, NULL);
1102 }
1103
1104 /* set_swap_bodies() switches the contents of any two sets by moving their
1105 internal data pointers and, if needed, copying the internal smalltables.
1106 Semantically equivalent to:
1107
1108 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1109
1110 The function always succeeds and it leaves both objects in a stable state.
1111 Useful for operations that update in-place (by allowing an intermediate
1112 result to be swapped into one of the original inputs).
1113 */
1114
1115 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1116 set_swap_bodies(PySetObject *a, PySetObject *b)
1117 {
1118 Py_ssize_t t;
1119 setentry *u;
1120 setentry tab[PySet_MINSIZE];
1121 Py_hash_t h;
1122
1123 t = a->fill; a->fill = b->fill; b->fill = t;
1124 t = a->used; a->used = b->used; b->used = t;
1125 t = a->mask; a->mask = b->mask; b->mask = t;
1126
1127 u = a->table;
1128 if (a->table == a->smalltable)
1129 u = b->smalltable;
1130 a->table = b->table;
1131 if (b->table == b->smalltable)
1132 a->table = a->smalltable;
1133 b->table = u;
1134
1135 if (a->table == a->smalltable || b->table == b->smalltable) {
1136 memcpy(tab, a->smalltable, sizeof(tab));
1137 memcpy(a->smalltable, b->smalltable, sizeof(tab));
1138 memcpy(b->smalltable, tab, sizeof(tab));
1139 }
1140
1141 if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) &&
1142 PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1143 h = a->hash; a->hash = b->hash; b->hash = h;
1144 } else {
1145 a->hash = -1;
1146 b->hash = -1;
1147 }
1148 }
1149
1150 static PyObject *
set_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1151 set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1152 {
1153 return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1154 }
1155
1156 static PyObject *
frozenset_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1157 frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored))
1158 {
1159 if (PyFrozenSet_CheckExact(so)) {
1160 Py_INCREF(so);
1161 return (PyObject *)so;
1162 }
1163 return set_copy(so, NULL);
1164 }
1165
1166 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1167
1168 static PyObject *
set_clear(PySetObject * so,PyObject * Py_UNUSED (ignored))1169 set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored))
1170 {
1171 set_clear_internal(so);
1172 Py_RETURN_NONE;
1173 }
1174
1175 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1176
1177 static PyObject *
set_union(PySetObject * so,PyObject * args)1178 set_union(PySetObject *so, PyObject *args)
1179 {
1180 PySetObject *result;
1181 PyObject *other;
1182 Py_ssize_t i;
1183
1184 result = (PySetObject *)set_copy(so, NULL);
1185 if (result == NULL)
1186 return NULL;
1187
1188 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1189 other = PyTuple_GET_ITEM(args, i);
1190 if ((PyObject *)so == other)
1191 continue;
1192 if (set_update_internal(result, other)) {
1193 Py_DECREF(result);
1194 return NULL;
1195 }
1196 }
1197 return (PyObject *)result;
1198 }
1199
1200 PyDoc_STRVAR(union_doc,
1201 "Return the union of sets as a new set.\n\
1202 \n\
1203 (i.e. all elements that are in either set.)");
1204
1205 static PyObject *
set_or(PySetObject * so,PyObject * other)1206 set_or(PySetObject *so, PyObject *other)
1207 {
1208 PySetObject *result;
1209
1210 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1211 Py_RETURN_NOTIMPLEMENTED;
1212
1213 result = (PySetObject *)set_copy(so, NULL);
1214 if (result == NULL)
1215 return NULL;
1216 if ((PyObject *)so == other)
1217 return (PyObject *)result;
1218 if (set_update_internal(result, other)) {
1219 Py_DECREF(result);
1220 return NULL;
1221 }
1222 return (PyObject *)result;
1223 }
1224
1225 static PyObject *
set_ior(PySetObject * so,PyObject * other)1226 set_ior(PySetObject *so, PyObject *other)
1227 {
1228 if (!PyAnySet_Check(other))
1229 Py_RETURN_NOTIMPLEMENTED;
1230
1231 if (set_update_internal(so, other))
1232 return NULL;
1233 Py_INCREF(so);
1234 return (PyObject *)so;
1235 }
1236
1237 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1238 set_intersection(PySetObject *so, PyObject *other)
1239 {
1240 PySetObject *result;
1241 PyObject *key, *it, *tmp;
1242 Py_hash_t hash;
1243 int rv;
1244
1245 if ((PyObject *)so == other)
1246 return set_copy(so, NULL);
1247
1248 result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1249 if (result == NULL)
1250 return NULL;
1251
1252 if (PyAnySet_Check(other)) {
1253 Py_ssize_t pos = 0;
1254 setentry *entry;
1255
1256 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1257 tmp = (PyObject *)so;
1258 so = (PySetObject *)other;
1259 other = tmp;
1260 }
1261
1262 while (set_next((PySetObject *)other, &pos, &entry)) {
1263 key = entry->key;
1264 hash = entry->hash;
1265 rv = set_contains_entry(so, key, hash);
1266 if (rv < 0) {
1267 Py_DECREF(result);
1268 return NULL;
1269 }
1270 if (rv) {
1271 if (set_add_entry(result, key, hash)) {
1272 Py_DECREF(result);
1273 return NULL;
1274 }
1275 }
1276 }
1277 return (PyObject *)result;
1278 }
1279
1280 it = PyObject_GetIter(other);
1281 if (it == NULL) {
1282 Py_DECREF(result);
1283 return NULL;
1284 }
1285
1286 while ((key = PyIter_Next(it)) != NULL) {
1287 hash = PyObject_Hash(key);
1288 if (hash == -1)
1289 goto error;
1290 rv = set_contains_entry(so, key, hash);
1291 if (rv < 0)
1292 goto error;
1293 if (rv) {
1294 if (set_add_entry(result, key, hash))
1295 goto error;
1296 }
1297 Py_DECREF(key);
1298 }
1299 Py_DECREF(it);
1300 if (PyErr_Occurred()) {
1301 Py_DECREF(result);
1302 return NULL;
1303 }
1304 return (PyObject *)result;
1305 error:
1306 Py_DECREF(it);
1307 Py_DECREF(result);
1308 Py_DECREF(key);
1309 return NULL;
1310 }
1311
1312 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1313 set_intersection_multi(PySetObject *so, PyObject *args)
1314 {
1315 Py_ssize_t i;
1316 PyObject *result = (PyObject *)so;
1317
1318 if (PyTuple_GET_SIZE(args) == 0)
1319 return set_copy(so, NULL);
1320
1321 Py_INCREF(so);
1322 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1323 PyObject *other = PyTuple_GET_ITEM(args, i);
1324 PyObject *newresult = set_intersection((PySetObject *)result, other);
1325 if (newresult == NULL) {
1326 Py_DECREF(result);
1327 return NULL;
1328 }
1329 Py_DECREF(result);
1330 result = newresult;
1331 }
1332 return result;
1333 }
1334
1335 PyDoc_STRVAR(intersection_doc,
1336 "Return the intersection of two sets as a new set.\n\
1337 \n\
1338 (i.e. all elements that are in both sets.)");
1339
1340 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1341 set_intersection_update(PySetObject *so, PyObject *other)
1342 {
1343 PyObject *tmp;
1344
1345 tmp = set_intersection(so, other);
1346 if (tmp == NULL)
1347 return NULL;
1348 set_swap_bodies(so, (PySetObject *)tmp);
1349 Py_DECREF(tmp);
1350 Py_RETURN_NONE;
1351 }
1352
1353 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1354 set_intersection_update_multi(PySetObject *so, PyObject *args)
1355 {
1356 PyObject *tmp;
1357
1358 tmp = set_intersection_multi(so, args);
1359 if (tmp == NULL)
1360 return NULL;
1361 set_swap_bodies(so, (PySetObject *)tmp);
1362 Py_DECREF(tmp);
1363 Py_RETURN_NONE;
1364 }
1365
1366 PyDoc_STRVAR(intersection_update_doc,
1367 "Update a set with the intersection of itself and another.");
1368
1369 static PyObject *
set_and(PySetObject * so,PyObject * other)1370 set_and(PySetObject *so, PyObject *other)
1371 {
1372 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1373 Py_RETURN_NOTIMPLEMENTED;
1374 return set_intersection(so, other);
1375 }
1376
1377 static PyObject *
set_iand(PySetObject * so,PyObject * other)1378 set_iand(PySetObject *so, PyObject *other)
1379 {
1380 PyObject *result;
1381
1382 if (!PyAnySet_Check(other))
1383 Py_RETURN_NOTIMPLEMENTED;
1384 result = set_intersection_update(so, other);
1385 if (result == NULL)
1386 return NULL;
1387 Py_DECREF(result);
1388 Py_INCREF(so);
1389 return (PyObject *)so;
1390 }
1391
1392 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1393 set_isdisjoint(PySetObject *so, PyObject *other)
1394 {
1395 PyObject *key, *it, *tmp;
1396 int rv;
1397
1398 if ((PyObject *)so == other) {
1399 if (PySet_GET_SIZE(so) == 0)
1400 Py_RETURN_TRUE;
1401 else
1402 Py_RETURN_FALSE;
1403 }
1404
1405 if (PyAnySet_CheckExact(other)) {
1406 Py_ssize_t pos = 0;
1407 setentry *entry;
1408
1409 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1410 tmp = (PyObject *)so;
1411 so = (PySetObject *)other;
1412 other = tmp;
1413 }
1414 while (set_next((PySetObject *)other, &pos, &entry)) {
1415 rv = set_contains_entry(so, entry->key, entry->hash);
1416 if (rv < 0)
1417 return NULL;
1418 if (rv)
1419 Py_RETURN_FALSE;
1420 }
1421 Py_RETURN_TRUE;
1422 }
1423
1424 it = PyObject_GetIter(other);
1425 if (it == NULL)
1426 return NULL;
1427
1428 while ((key = PyIter_Next(it)) != NULL) {
1429 Py_hash_t hash = PyObject_Hash(key);
1430
1431 if (hash == -1) {
1432 Py_DECREF(key);
1433 Py_DECREF(it);
1434 return NULL;
1435 }
1436 rv = set_contains_entry(so, key, hash);
1437 Py_DECREF(key);
1438 if (rv < 0) {
1439 Py_DECREF(it);
1440 return NULL;
1441 }
1442 if (rv) {
1443 Py_DECREF(it);
1444 Py_RETURN_FALSE;
1445 }
1446 }
1447 Py_DECREF(it);
1448 if (PyErr_Occurred())
1449 return NULL;
1450 Py_RETURN_TRUE;
1451 }
1452
1453 PyDoc_STRVAR(isdisjoint_doc,
1454 "Return True if two sets have a null intersection.");
1455
1456 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1457 set_difference_update_internal(PySetObject *so, PyObject *other)
1458 {
1459 if ((PyObject *)so == other)
1460 return set_clear_internal(so);
1461
1462 if (PyAnySet_Check(other)) {
1463 setentry *entry;
1464 Py_ssize_t pos = 0;
1465
1466 while (set_next((PySetObject *)other, &pos, &entry))
1467 if (set_discard_entry(so, entry->key, entry->hash) < 0)
1468 return -1;
1469 } else {
1470 PyObject *key, *it;
1471 it = PyObject_GetIter(other);
1472 if (it == NULL)
1473 return -1;
1474
1475 while ((key = PyIter_Next(it)) != NULL) {
1476 if (set_discard_key(so, key) < 0) {
1477 Py_DECREF(it);
1478 Py_DECREF(key);
1479 return -1;
1480 }
1481 Py_DECREF(key);
1482 }
1483 Py_DECREF(it);
1484 if (PyErr_Occurred())
1485 return -1;
1486 }
1487 /* If more than 1/4th are dummies, then resize them away. */
1488 if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1489 return 0;
1490 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1491 }
1492
1493 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1494 set_difference_update(PySetObject *so, PyObject *args)
1495 {
1496 Py_ssize_t i;
1497
1498 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1499 PyObject *other = PyTuple_GET_ITEM(args, i);
1500 if (set_difference_update_internal(so, other))
1501 return NULL;
1502 }
1503 Py_RETURN_NONE;
1504 }
1505
1506 PyDoc_STRVAR(difference_update_doc,
1507 "Remove all elements of another set from this set.");
1508
1509 static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1510 set_copy_and_difference(PySetObject *so, PyObject *other)
1511 {
1512 PyObject *result;
1513
1514 result = set_copy(so, NULL);
1515 if (result == NULL)
1516 return NULL;
1517 if (set_difference_update_internal((PySetObject *) result, other) == 0)
1518 return result;
1519 Py_DECREF(result);
1520 return NULL;
1521 }
1522
1523 static PyObject *
set_difference(PySetObject * so,PyObject * other)1524 set_difference(PySetObject *so, PyObject *other)
1525 {
1526 PyObject *result;
1527 PyObject *key;
1528 Py_hash_t hash;
1529 setentry *entry;
1530 Py_ssize_t pos = 0, other_size;
1531 int rv;
1532
1533 if (PyAnySet_Check(other)) {
1534 other_size = PySet_GET_SIZE(other);
1535 }
1536 else if (PyDict_CheckExact(other)) {
1537 other_size = PyDict_GET_SIZE(other);
1538 }
1539 else {
1540 return set_copy_and_difference(so, other);
1541 }
1542
1543 /* If len(so) much more than len(other), it's more efficient to simply copy
1544 * so and then iterate other looking for common elements. */
1545 if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1546 return set_copy_and_difference(so, other);
1547 }
1548
1549 result = make_new_set_basetype(Py_TYPE(so), NULL);
1550 if (result == NULL)
1551 return NULL;
1552
1553 if (PyDict_CheckExact(other)) {
1554 while (set_next(so, &pos, &entry)) {
1555 key = entry->key;
1556 hash = entry->hash;
1557 rv = _PyDict_Contains(other, key, hash);
1558 if (rv < 0) {
1559 Py_DECREF(result);
1560 return NULL;
1561 }
1562 if (!rv) {
1563 if (set_add_entry((PySetObject *)result, key, hash)) {
1564 Py_DECREF(result);
1565 return NULL;
1566 }
1567 }
1568 }
1569 return result;
1570 }
1571
1572 /* Iterate over so, checking for common elements in other. */
1573 while (set_next(so, &pos, &entry)) {
1574 key = entry->key;
1575 hash = entry->hash;
1576 rv = set_contains_entry((PySetObject *)other, key, hash);
1577 if (rv < 0) {
1578 Py_DECREF(result);
1579 return NULL;
1580 }
1581 if (!rv) {
1582 if (set_add_entry((PySetObject *)result, key, hash)) {
1583 Py_DECREF(result);
1584 return NULL;
1585 }
1586 }
1587 }
1588 return result;
1589 }
1590
1591 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1592 set_difference_multi(PySetObject *so, PyObject *args)
1593 {
1594 Py_ssize_t i;
1595 PyObject *result, *other;
1596
1597 if (PyTuple_GET_SIZE(args) == 0)
1598 return set_copy(so, NULL);
1599
1600 other = PyTuple_GET_ITEM(args, 0);
1601 result = set_difference(so, other);
1602 if (result == NULL)
1603 return NULL;
1604
1605 for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1606 other = PyTuple_GET_ITEM(args, i);
1607 if (set_difference_update_internal((PySetObject *)result, other)) {
1608 Py_DECREF(result);
1609 return NULL;
1610 }
1611 }
1612 return result;
1613 }
1614
1615 PyDoc_STRVAR(difference_doc,
1616 "Return the difference of two or more sets as a new set.\n\
1617 \n\
1618 (i.e. all elements that are in this set but not the others.)");
1619 static PyObject *
set_sub(PySetObject * so,PyObject * other)1620 set_sub(PySetObject *so, PyObject *other)
1621 {
1622 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1623 Py_RETURN_NOTIMPLEMENTED;
1624 return set_difference(so, other);
1625 }
1626
1627 static PyObject *
set_isub(PySetObject * so,PyObject * other)1628 set_isub(PySetObject *so, PyObject *other)
1629 {
1630 if (!PyAnySet_Check(other))
1631 Py_RETURN_NOTIMPLEMENTED;
1632 if (set_difference_update_internal(so, other))
1633 return NULL;
1634 Py_INCREF(so);
1635 return (PyObject *)so;
1636 }
1637
1638 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1639 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1640 {
1641 PySetObject *otherset;
1642 PyObject *key;
1643 Py_ssize_t pos = 0;
1644 Py_hash_t hash;
1645 setentry *entry;
1646 int rv;
1647
1648 if ((PyObject *)so == other)
1649 return set_clear(so, NULL);
1650
1651 if (PyDict_CheckExact(other)) {
1652 PyObject *value;
1653 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1654 Py_INCREF(key);
1655 rv = set_discard_entry(so, key, hash);
1656 if (rv < 0) {
1657 Py_DECREF(key);
1658 return NULL;
1659 }
1660 if (rv == DISCARD_NOTFOUND) {
1661 if (set_add_entry(so, key, hash)) {
1662 Py_DECREF(key);
1663 return NULL;
1664 }
1665 }
1666 Py_DECREF(key);
1667 }
1668 Py_RETURN_NONE;
1669 }
1670
1671 if (PyAnySet_Check(other)) {
1672 Py_INCREF(other);
1673 otherset = (PySetObject *)other;
1674 } else {
1675 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1676 if (otherset == NULL)
1677 return NULL;
1678 }
1679
1680 while (set_next(otherset, &pos, &entry)) {
1681 key = entry->key;
1682 hash = entry->hash;
1683 rv = set_discard_entry(so, key, hash);
1684 if (rv < 0) {
1685 Py_DECREF(otherset);
1686 return NULL;
1687 }
1688 if (rv == DISCARD_NOTFOUND) {
1689 if (set_add_entry(so, key, hash)) {
1690 Py_DECREF(otherset);
1691 return NULL;
1692 }
1693 }
1694 }
1695 Py_DECREF(otherset);
1696 Py_RETURN_NONE;
1697 }
1698
1699 PyDoc_STRVAR(symmetric_difference_update_doc,
1700 "Update a set with the symmetric difference of itself and another.");
1701
1702 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1703 set_symmetric_difference(PySetObject *so, PyObject *other)
1704 {
1705 PyObject *rv;
1706 PySetObject *otherset;
1707
1708 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1709 if (otherset == NULL)
1710 return NULL;
1711 rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1712 if (rv == NULL) {
1713 Py_DECREF(otherset);
1714 return NULL;
1715 }
1716 Py_DECREF(rv);
1717 return (PyObject *)otherset;
1718 }
1719
1720 PyDoc_STRVAR(symmetric_difference_doc,
1721 "Return the symmetric difference of two sets as a new set.\n\
1722 \n\
1723 (i.e. all elements that are in exactly one of the sets.)");
1724
1725 static PyObject *
set_xor(PySetObject * so,PyObject * other)1726 set_xor(PySetObject *so, PyObject *other)
1727 {
1728 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1729 Py_RETURN_NOTIMPLEMENTED;
1730 return set_symmetric_difference(so, other);
1731 }
1732
1733 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1734 set_ixor(PySetObject *so, PyObject *other)
1735 {
1736 PyObject *result;
1737
1738 if (!PyAnySet_Check(other))
1739 Py_RETURN_NOTIMPLEMENTED;
1740 result = set_symmetric_difference_update(so, other);
1741 if (result == NULL)
1742 return NULL;
1743 Py_DECREF(result);
1744 Py_INCREF(so);
1745 return (PyObject *)so;
1746 }
1747
1748 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1749 set_issubset(PySetObject *so, PyObject *other)
1750 {
1751 setentry *entry;
1752 Py_ssize_t pos = 0;
1753 int rv;
1754
1755 if (!PyAnySet_Check(other)) {
1756 PyObject *tmp, *result;
1757 tmp = make_new_set(&PySet_Type, other);
1758 if (tmp == NULL)
1759 return NULL;
1760 result = set_issubset(so, tmp);
1761 Py_DECREF(tmp);
1762 return result;
1763 }
1764 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1765 Py_RETURN_FALSE;
1766
1767 while (set_next(so, &pos, &entry)) {
1768 rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
1769 if (rv < 0)
1770 return NULL;
1771 if (!rv)
1772 Py_RETURN_FALSE;
1773 }
1774 Py_RETURN_TRUE;
1775 }
1776
1777 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1778
1779 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1780 set_issuperset(PySetObject *so, PyObject *other)
1781 {
1782 PyObject *tmp, *result;
1783
1784 if (!PyAnySet_Check(other)) {
1785 tmp = make_new_set(&PySet_Type, other);
1786 if (tmp == NULL)
1787 return NULL;
1788 result = set_issuperset(so, tmp);
1789 Py_DECREF(tmp);
1790 return result;
1791 }
1792 return set_issubset((PySetObject *)other, (PyObject *)so);
1793 }
1794
1795 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1796
1797 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1798 set_richcompare(PySetObject *v, PyObject *w, int op)
1799 {
1800 PyObject *r1;
1801 int r2;
1802
1803 if(!PyAnySet_Check(w))
1804 Py_RETURN_NOTIMPLEMENTED;
1805
1806 switch (op) {
1807 case Py_EQ:
1808 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1809 Py_RETURN_FALSE;
1810 if (v->hash != -1 &&
1811 ((PySetObject *)w)->hash != -1 &&
1812 v->hash != ((PySetObject *)w)->hash)
1813 Py_RETURN_FALSE;
1814 return set_issubset(v, w);
1815 case Py_NE:
1816 r1 = set_richcompare(v, w, Py_EQ);
1817 if (r1 == NULL)
1818 return NULL;
1819 r2 = PyObject_IsTrue(r1);
1820 Py_DECREF(r1);
1821 if (r2 < 0)
1822 return NULL;
1823 return PyBool_FromLong(!r2);
1824 case Py_LE:
1825 return set_issubset(v, w);
1826 case Py_GE:
1827 return set_issuperset(v, w);
1828 case Py_LT:
1829 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1830 Py_RETURN_FALSE;
1831 return set_issubset(v, w);
1832 case Py_GT:
1833 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1834 Py_RETURN_FALSE;
1835 return set_issuperset(v, w);
1836 }
1837 Py_RETURN_NOTIMPLEMENTED;
1838 }
1839
1840 static PyObject *
set_add(PySetObject * so,PyObject * key)1841 set_add(PySetObject *so, PyObject *key)
1842 {
1843 if (set_add_key(so, key))
1844 return NULL;
1845 Py_RETURN_NONE;
1846 }
1847
1848 PyDoc_STRVAR(add_doc,
1849 "Add an element to a set.\n\
1850 \n\
1851 This has no effect if the element is already present.");
1852
1853 static int
set_contains(PySetObject * so,PyObject * key)1854 set_contains(PySetObject *so, PyObject *key)
1855 {
1856 PyObject *tmpkey;
1857 int rv;
1858
1859 rv = set_contains_key(so, key);
1860 if (rv < 0) {
1861 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1862 return -1;
1863 PyErr_Clear();
1864 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1865 if (tmpkey == NULL)
1866 return -1;
1867 rv = set_contains_key(so, tmpkey);
1868 Py_DECREF(tmpkey);
1869 }
1870 return rv;
1871 }
1872
1873 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1874 set_direct_contains(PySetObject *so, PyObject *key)
1875 {
1876 long result;
1877
1878 result = set_contains(so, key);
1879 if (result < 0)
1880 return NULL;
1881 return PyBool_FromLong(result);
1882 }
1883
1884 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1885
1886 static PyObject *
set_remove(PySetObject * so,PyObject * key)1887 set_remove(PySetObject *so, PyObject *key)
1888 {
1889 PyObject *tmpkey;
1890 int rv;
1891
1892 rv = set_discard_key(so, key);
1893 if (rv < 0) {
1894 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1895 return NULL;
1896 PyErr_Clear();
1897 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1898 if (tmpkey == NULL)
1899 return NULL;
1900 rv = set_discard_key(so, tmpkey);
1901 Py_DECREF(tmpkey);
1902 if (rv < 0)
1903 return NULL;
1904 }
1905
1906 if (rv == DISCARD_NOTFOUND) {
1907 _PyErr_SetKeyError(key);
1908 return NULL;
1909 }
1910 Py_RETURN_NONE;
1911 }
1912
1913 PyDoc_STRVAR(remove_doc,
1914 "Remove an element from a set; it must be a member.\n\
1915 \n\
1916 If the element is not a member, raise a KeyError.");
1917
1918 static PyObject *
set_discard(PySetObject * so,PyObject * key)1919 set_discard(PySetObject *so, PyObject *key)
1920 {
1921 PyObject *tmpkey;
1922 int rv;
1923
1924 rv = set_discard_key(so, key);
1925 if (rv < 0) {
1926 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1927 return NULL;
1928 PyErr_Clear();
1929 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1930 if (tmpkey == NULL)
1931 return NULL;
1932 rv = set_discard_key(so, tmpkey);
1933 Py_DECREF(tmpkey);
1934 if (rv < 0)
1935 return NULL;
1936 }
1937 Py_RETURN_NONE;
1938 }
1939
1940 PyDoc_STRVAR(discard_doc,
1941 "Remove an element from a set if it is a member.\n\
1942 \n\
1943 If the element is not a member, do nothing.");
1944
1945 static PyObject *
set_reduce(PySetObject * so,PyObject * Py_UNUSED (ignored))1946 set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored))
1947 {
1948 PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1949 _Py_IDENTIFIER(__dict__);
1950
1951 keys = PySequence_List((PyObject *)so);
1952 if (keys == NULL)
1953 goto done;
1954 args = PyTuple_Pack(1, keys);
1955 if (args == NULL)
1956 goto done;
1957 if (_PyObject_LookupAttrId((PyObject *)so, &PyId___dict__, &dict) < 0) {
1958 goto done;
1959 }
1960 if (dict == NULL) {
1961 dict = Py_None;
1962 Py_INCREF(dict);
1963 }
1964 result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1965 done:
1966 Py_XDECREF(args);
1967 Py_XDECREF(keys);
1968 Py_XDECREF(dict);
1969 return result;
1970 }
1971
1972 static PyObject *
set_sizeof(PySetObject * so,PyObject * Py_UNUSED (ignored))1973 set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored))
1974 {
1975 Py_ssize_t res;
1976
1977 res = _PyObject_SIZE(Py_TYPE(so));
1978 if (so->table != so->smalltable)
1979 res = res + (so->mask + 1) * sizeof(setentry);
1980 return PyLong_FromSsize_t(res);
1981 }
1982
1983 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1984 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1985 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1986 {
1987 PyObject *iterable = NULL;
1988
1989 if (!_PyArg_NoKeywords("set", kwds))
1990 return -1;
1991 if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
1992 return -1;
1993 if (self->fill)
1994 set_clear_internal(self);
1995 self->hash = -1;
1996 if (iterable == NULL)
1997 return 0;
1998 return set_update_internal(self, iterable);
1999 }
2000
2001 static PySequenceMethods set_as_sequence = {
2002 set_len, /* sq_length */
2003 0, /* sq_concat */
2004 0, /* sq_repeat */
2005 0, /* sq_item */
2006 0, /* sq_slice */
2007 0, /* sq_ass_item */
2008 0, /* sq_ass_slice */
2009 (objobjproc)set_contains, /* sq_contains */
2010 };
2011
2012 /* set object ********************************************************/
2013
2014 #ifdef Py_DEBUG
2015 static PyObject *test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored));
2016
2017 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
2018 All is well if assertions don't fail.");
2019 #endif
2020
2021 static PyMethodDef set_methods[] = {
2022 {"add", (PyCFunction)set_add, METH_O,
2023 add_doc},
2024 {"clear", (PyCFunction)set_clear, METH_NOARGS,
2025 clear_doc},
2026 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2027 contains_doc},
2028 {"copy", (PyCFunction)set_copy, METH_NOARGS,
2029 copy_doc},
2030 {"discard", (PyCFunction)set_discard, METH_O,
2031 discard_doc},
2032 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2033 difference_doc},
2034 {"difference_update", (PyCFunction)set_difference_update, METH_VARARGS,
2035 difference_update_doc},
2036 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2037 intersection_doc},
2038 {"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS,
2039 intersection_update_doc},
2040 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2041 isdisjoint_doc},
2042 {"issubset", (PyCFunction)set_issubset, METH_O,
2043 issubset_doc},
2044 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2045 issuperset_doc},
2046 {"pop", (PyCFunction)set_pop, METH_NOARGS,
2047 pop_doc},
2048 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2049 reduce_doc},
2050 {"remove", (PyCFunction)set_remove, METH_O,
2051 remove_doc},
2052 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2053 sizeof_doc},
2054 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2055 symmetric_difference_doc},
2056 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
2057 symmetric_difference_update_doc},
2058 #ifdef Py_DEBUG
2059 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
2060 test_c_api_doc},
2061 #endif
2062 {"union", (PyCFunction)set_union, METH_VARARGS,
2063 union_doc},
2064 {"update", (PyCFunction)set_update, METH_VARARGS,
2065 update_doc},
2066 {NULL, NULL} /* sentinel */
2067 };
2068
2069 static PyNumberMethods set_as_number = {
2070 0, /*nb_add*/
2071 (binaryfunc)set_sub, /*nb_subtract*/
2072 0, /*nb_multiply*/
2073 0, /*nb_remainder*/
2074 0, /*nb_divmod*/
2075 0, /*nb_power*/
2076 0, /*nb_negative*/
2077 0, /*nb_positive*/
2078 0, /*nb_absolute*/
2079 0, /*nb_bool*/
2080 0, /*nb_invert*/
2081 0, /*nb_lshift*/
2082 0, /*nb_rshift*/
2083 (binaryfunc)set_and, /*nb_and*/
2084 (binaryfunc)set_xor, /*nb_xor*/
2085 (binaryfunc)set_or, /*nb_or*/
2086 0, /*nb_int*/
2087 0, /*nb_reserved*/
2088 0, /*nb_float*/
2089 0, /*nb_inplace_add*/
2090 (binaryfunc)set_isub, /*nb_inplace_subtract*/
2091 0, /*nb_inplace_multiply*/
2092 0, /*nb_inplace_remainder*/
2093 0, /*nb_inplace_power*/
2094 0, /*nb_inplace_lshift*/
2095 0, /*nb_inplace_rshift*/
2096 (binaryfunc)set_iand, /*nb_inplace_and*/
2097 (binaryfunc)set_ixor, /*nb_inplace_xor*/
2098 (binaryfunc)set_ior, /*nb_inplace_or*/
2099 };
2100
2101 PyDoc_STRVAR(set_doc,
2102 "set() -> new empty set object\n\
2103 set(iterable) -> new set object\n\
2104 \n\
2105 Build an unordered collection of unique elements.");
2106
2107 PyTypeObject PySet_Type = {
2108 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2109 "set", /* tp_name */
2110 sizeof(PySetObject), /* tp_basicsize */
2111 0, /* tp_itemsize */
2112 /* methods */
2113 (destructor)set_dealloc, /* tp_dealloc */
2114 0, /* tp_vectorcall_offset */
2115 0, /* tp_getattr */
2116 0, /* tp_setattr */
2117 0, /* tp_as_async */
2118 (reprfunc)set_repr, /* tp_repr */
2119 &set_as_number, /* tp_as_number */
2120 &set_as_sequence, /* tp_as_sequence */
2121 0, /* tp_as_mapping */
2122 PyObject_HashNotImplemented, /* tp_hash */
2123 0, /* tp_call */
2124 0, /* tp_str */
2125 PyObject_GenericGetAttr, /* tp_getattro */
2126 0, /* tp_setattro */
2127 0, /* tp_as_buffer */
2128 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2129 Py_TPFLAGS_BASETYPE, /* tp_flags */
2130 set_doc, /* tp_doc */
2131 (traverseproc)set_traverse, /* tp_traverse */
2132 (inquiry)set_clear_internal, /* tp_clear */
2133 (richcmpfunc)set_richcompare, /* tp_richcompare */
2134 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2135 (getiterfunc)set_iter, /* tp_iter */
2136 0, /* tp_iternext */
2137 set_methods, /* tp_methods */
2138 0, /* tp_members */
2139 0, /* tp_getset */
2140 0, /* tp_base */
2141 0, /* tp_dict */
2142 0, /* tp_descr_get */
2143 0, /* tp_descr_set */
2144 0, /* tp_dictoffset */
2145 (initproc)set_init, /* tp_init */
2146 PyType_GenericAlloc, /* tp_alloc */
2147 set_new, /* tp_new */
2148 PyObject_GC_Del, /* tp_free */
2149 };
2150
2151 /* frozenset object ********************************************************/
2152
2153
2154 static PyMethodDef frozenset_methods[] = {
2155 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2156 contains_doc},
2157 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
2158 copy_doc},
2159 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2160 difference_doc},
2161 {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS,
2162 intersection_doc},
2163 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2164 isdisjoint_doc},
2165 {"issubset", (PyCFunction)set_issubset, METH_O,
2166 issubset_doc},
2167 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2168 issuperset_doc},
2169 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2170 reduce_doc},
2171 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2172 sizeof_doc},
2173 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2174 symmetric_difference_doc},
2175 {"union", (PyCFunction)set_union, METH_VARARGS,
2176 union_doc},
2177 {NULL, NULL} /* sentinel */
2178 };
2179
2180 static PyNumberMethods frozenset_as_number = {
2181 0, /*nb_add*/
2182 (binaryfunc)set_sub, /*nb_subtract*/
2183 0, /*nb_multiply*/
2184 0, /*nb_remainder*/
2185 0, /*nb_divmod*/
2186 0, /*nb_power*/
2187 0, /*nb_negative*/
2188 0, /*nb_positive*/
2189 0, /*nb_absolute*/
2190 0, /*nb_bool*/
2191 0, /*nb_invert*/
2192 0, /*nb_lshift*/
2193 0, /*nb_rshift*/
2194 (binaryfunc)set_and, /*nb_and*/
2195 (binaryfunc)set_xor, /*nb_xor*/
2196 (binaryfunc)set_or, /*nb_or*/
2197 };
2198
2199 PyDoc_STRVAR(frozenset_doc,
2200 "frozenset() -> empty frozenset object\n\
2201 frozenset(iterable) -> frozenset object\n\
2202 \n\
2203 Build an immutable unordered collection of unique elements.");
2204
2205 PyTypeObject PyFrozenSet_Type = {
2206 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2207 "frozenset", /* tp_name */
2208 sizeof(PySetObject), /* tp_basicsize */
2209 0, /* tp_itemsize */
2210 /* methods */
2211 (destructor)set_dealloc, /* tp_dealloc */
2212 0, /* tp_vectorcall_offset */
2213 0, /* tp_getattr */
2214 0, /* tp_setattr */
2215 0, /* tp_as_async */
2216 (reprfunc)set_repr, /* tp_repr */
2217 &frozenset_as_number, /* tp_as_number */
2218 &set_as_sequence, /* tp_as_sequence */
2219 0, /* tp_as_mapping */
2220 frozenset_hash, /* tp_hash */
2221 0, /* tp_call */
2222 0, /* tp_str */
2223 PyObject_GenericGetAttr, /* tp_getattro */
2224 0, /* tp_setattro */
2225 0, /* tp_as_buffer */
2226 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2227 Py_TPFLAGS_BASETYPE, /* tp_flags */
2228 frozenset_doc, /* tp_doc */
2229 (traverseproc)set_traverse, /* tp_traverse */
2230 (inquiry)set_clear_internal, /* tp_clear */
2231 (richcmpfunc)set_richcompare, /* tp_richcompare */
2232 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2233 (getiterfunc)set_iter, /* tp_iter */
2234 0, /* tp_iternext */
2235 frozenset_methods, /* tp_methods */
2236 0, /* tp_members */
2237 0, /* tp_getset */
2238 0, /* tp_base */
2239 0, /* tp_dict */
2240 0, /* tp_descr_get */
2241 0, /* tp_descr_set */
2242 0, /* tp_dictoffset */
2243 0, /* tp_init */
2244 PyType_GenericAlloc, /* tp_alloc */
2245 frozenset_new, /* tp_new */
2246 PyObject_GC_Del, /* tp_free */
2247 };
2248
2249
2250 /***** C API functions *************************************************/
2251
2252 PyObject *
PySet_New(PyObject * iterable)2253 PySet_New(PyObject *iterable)
2254 {
2255 return make_new_set(&PySet_Type, iterable);
2256 }
2257
2258 PyObject *
PyFrozenSet_New(PyObject * iterable)2259 PyFrozenSet_New(PyObject *iterable)
2260 {
2261 return make_new_set(&PyFrozenSet_Type, iterable);
2262 }
2263
2264 Py_ssize_t
PySet_Size(PyObject * anyset)2265 PySet_Size(PyObject *anyset)
2266 {
2267 if (!PyAnySet_Check(anyset)) {
2268 PyErr_BadInternalCall();
2269 return -1;
2270 }
2271 return PySet_GET_SIZE(anyset);
2272 }
2273
2274 int
PySet_Clear(PyObject * set)2275 PySet_Clear(PyObject *set)
2276 {
2277 if (!PySet_Check(set)) {
2278 PyErr_BadInternalCall();
2279 return -1;
2280 }
2281 return set_clear_internal((PySetObject *)set);
2282 }
2283
2284 int
PySet_Contains(PyObject * anyset,PyObject * key)2285 PySet_Contains(PyObject *anyset, PyObject *key)
2286 {
2287 if (!PyAnySet_Check(anyset)) {
2288 PyErr_BadInternalCall();
2289 return -1;
2290 }
2291 return set_contains_key((PySetObject *)anyset, key);
2292 }
2293
2294 int
PySet_Discard(PyObject * set,PyObject * key)2295 PySet_Discard(PyObject *set, PyObject *key)
2296 {
2297 if (!PySet_Check(set)) {
2298 PyErr_BadInternalCall();
2299 return -1;
2300 }
2301 return set_discard_key((PySetObject *)set, key);
2302 }
2303
2304 int
PySet_Add(PyObject * anyset,PyObject * key)2305 PySet_Add(PyObject *anyset, PyObject *key)
2306 {
2307 if (!PySet_Check(anyset) &&
2308 (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2309 PyErr_BadInternalCall();
2310 return -1;
2311 }
2312 return set_add_key((PySetObject *)anyset, key);
2313 }
2314
2315 int
PySet_ClearFreeList(void)2316 PySet_ClearFreeList(void)
2317 {
2318 return 0;
2319 }
2320
2321 void
PySet_Fini(void)2322 PySet_Fini(void)
2323 {
2324 Py_CLEAR(emptyfrozenset);
2325 }
2326
2327 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2328 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2329 {
2330 setentry *entry;
2331
2332 if (!PyAnySet_Check(set)) {
2333 PyErr_BadInternalCall();
2334 return -1;
2335 }
2336 if (set_next((PySetObject *)set, pos, &entry) == 0)
2337 return 0;
2338 *key = entry->key;
2339 *hash = entry->hash;
2340 return 1;
2341 }
2342
2343 PyObject *
PySet_Pop(PyObject * set)2344 PySet_Pop(PyObject *set)
2345 {
2346 if (!PySet_Check(set)) {
2347 PyErr_BadInternalCall();
2348 return NULL;
2349 }
2350 return set_pop((PySetObject *)set, NULL);
2351 }
2352
2353 int
_PySet_Update(PyObject * set,PyObject * iterable)2354 _PySet_Update(PyObject *set, PyObject *iterable)
2355 {
2356 if (!PySet_Check(set)) {
2357 PyErr_BadInternalCall();
2358 return -1;
2359 }
2360 return set_update_internal((PySetObject *)set, iterable);
2361 }
2362
2363 /* Exported for the gdb plugin's benefit. */
2364 PyObject *_PySet_Dummy = dummy;
2365
2366 #ifdef Py_DEBUG
2367
2368 /* Test code to be called with any three element set.
2369 Returns True and original set is restored. */
2370
2371 #define assertRaises(call_return_value, exception) \
2372 do { \
2373 assert(call_return_value); \
2374 assert(PyErr_ExceptionMatches(exception)); \
2375 PyErr_Clear(); \
2376 } while(0)
2377
2378 static PyObject *
test_c_api(PySetObject * so,PyObject * Py_UNUSED (ignored))2379 test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored))
2380 {
2381 Py_ssize_t count;
2382 const char *s;
2383 Py_ssize_t i;
2384 PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2385 PyObject *ob = (PyObject *)so;
2386 Py_hash_t hash;
2387 PyObject *str;
2388
2389 /* Verify preconditions */
2390 assert(PyAnySet_Check(ob));
2391 assert(PyAnySet_CheckExact(ob));
2392 assert(!PyFrozenSet_CheckExact(ob));
2393
2394 /* so.clear(); so |= set("abc"); */
2395 str = PyUnicode_FromString("abc");
2396 if (str == NULL)
2397 return NULL;
2398 set_clear_internal(so);
2399 if (set_update_internal(so, str)) {
2400 Py_DECREF(str);
2401 return NULL;
2402 }
2403 Py_DECREF(str);
2404
2405 /* Exercise type/size checks */
2406 assert(PySet_Size(ob) == 3);
2407 assert(PySet_GET_SIZE(ob) == 3);
2408
2409 /* Raise TypeError for non-iterable constructor arguments */
2410 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2411 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2412
2413 /* Raise TypeError for unhashable key */
2414 dup = PySet_New(ob);
2415 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2416 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2417 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2418
2419 /* Exercise successful pop, contains, add, and discard */
2420 elem = PySet_Pop(ob);
2421 assert(PySet_Contains(ob, elem) == 0);
2422 assert(PySet_GET_SIZE(ob) == 2);
2423 assert(PySet_Add(ob, elem) == 0);
2424 assert(PySet_Contains(ob, elem) == 1);
2425 assert(PySet_GET_SIZE(ob) == 3);
2426 assert(PySet_Discard(ob, elem) == 1);
2427 assert(PySet_GET_SIZE(ob) == 2);
2428 assert(PySet_Discard(ob, elem) == 0);
2429 assert(PySet_GET_SIZE(ob) == 2);
2430
2431 /* Exercise clear */
2432 dup2 = PySet_New(dup);
2433 assert(PySet_Clear(dup2) == 0);
2434 assert(PySet_Size(dup2) == 0);
2435 Py_DECREF(dup2);
2436
2437 /* Raise SystemError on clear or update of frozen set */
2438 f = PyFrozenSet_New(dup);
2439 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2440 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2441 assert(PySet_Add(f, elem) == 0);
2442 Py_INCREF(f);
2443 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2444 Py_DECREF(f);
2445 Py_DECREF(f);
2446
2447 /* Exercise direct iteration */
2448 i = 0, count = 0;
2449 while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2450 s = PyUnicode_AsUTF8(x);
2451 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2452 count++;
2453 }
2454 assert(count == 3);
2455
2456 /* Exercise updates */
2457 dup2 = PySet_New(NULL);
2458 assert(_PySet_Update(dup2, dup) == 0);
2459 assert(PySet_Size(dup2) == 3);
2460 assert(_PySet_Update(dup2, dup) == 0);
2461 assert(PySet_Size(dup2) == 3);
2462 Py_DECREF(dup2);
2463
2464 /* Raise SystemError when self argument is not a set or frozenset. */
2465 t = PyTuple_New(0);
2466 assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2467 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2468 Py_DECREF(t);
2469
2470 /* Raise SystemError when self argument is not a set. */
2471 f = PyFrozenSet_New(dup);
2472 assert(PySet_Size(f) == 3);
2473 assert(PyFrozenSet_CheckExact(f));
2474 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2475 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2476 Py_DECREF(f);
2477
2478 /* Raise KeyError when popping from an empty set */
2479 assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2480 Py_DECREF(ob);
2481 assert(PySet_GET_SIZE(ob) == 0);
2482 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2483
2484 /* Restore the set from the copy using the PyNumber API */
2485 assert(PyNumber_InPlaceOr(ob, dup) == ob);
2486 Py_DECREF(ob);
2487
2488 /* Verify constructors accept NULL arguments */
2489 f = PySet_New(NULL);
2490 assert(f != NULL);
2491 assert(PySet_GET_SIZE(f) == 0);
2492 Py_DECREF(f);
2493 f = PyFrozenSet_New(NULL);
2494 assert(f != NULL);
2495 assert(PyFrozenSet_CheckExact(f));
2496 assert(PySet_GET_SIZE(f) == 0);
2497 Py_DECREF(f);
2498
2499 Py_DECREF(elem);
2500 Py_DECREF(dup);
2501 Py_RETURN_TRUE;
2502 }
2503
2504 #undef assertRaises
2505
2506 #endif
2507
2508 /***** Dummy Struct *************************************************/
2509
2510 static PyObject *
dummy_repr(PyObject * op)2511 dummy_repr(PyObject *op)
2512 {
2513 return PyUnicode_FromString("<dummy key>");
2514 }
2515
2516 static void
dummy_dealloc(PyObject * ignore)2517 dummy_dealloc(PyObject* ignore)
2518 {
2519 Py_FatalError("deallocating <dummy key>");
2520 }
2521
2522 static PyTypeObject _PySetDummy_Type = {
2523 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2524 "<dummy key> type",
2525 0,
2526 0,
2527 dummy_dealloc, /*tp_dealloc*/ /*never called*/
2528 0, /*tp_vectorcall_offset*/
2529 0, /*tp_getattr*/
2530 0, /*tp_setattr*/
2531 0, /*tp_as_async*/
2532 dummy_repr, /*tp_repr*/
2533 0, /*tp_as_number*/
2534 0, /*tp_as_sequence*/
2535 0, /*tp_as_mapping*/
2536 0, /*tp_hash */
2537 0, /*tp_call */
2538 0, /*tp_str */
2539 0, /*tp_getattro */
2540 0, /*tp_setattro */
2541 0, /*tp_as_buffer */
2542 Py_TPFLAGS_DEFAULT, /*tp_flags */
2543 };
2544
2545 static PyObject _dummy_struct = {
2546 _PyObject_EXTRA_INIT
2547 2, &_PySetDummy_Type
2548 };
2549
2550