1
2 /* set object implementation
3 Written and maintained by Raymond D. Hettinger <python@rcn.com>
4 Derived from Lib/sets.py and Objects/dictobject.c.
5 */
6
7 #include "Python.h"
8 #include "structmember.h"
9
10 /* Set a key error with the specified argument, wrapping it in a
11 * tuple automatically so that tuple keys are not unpacked as the
12 * exception arguments. */
13 static void
set_key_error(PyObject * arg)14 set_key_error(PyObject *arg)
15 {
16 PyObject *tup;
17 tup = PyTuple_Pack(1, arg);
18 if (!tup)
19 return; /* caller will expect error to be set anyway */
20 PyErr_SetObject(PyExc_KeyError, tup);
21 Py_DECREF(tup);
22 }
23
24 /* This must be >= 1. */
25 #define PERTURB_SHIFT 5
26
27 /* Object used as dummy key to fill deleted entries */
28 static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */
29
30 #ifdef Py_REF_DEBUG
31 PyObject *
_PySet_Dummy(void)32 _PySet_Dummy(void)
33 {
34 return dummy;
35 }
36 #endif
37
38 #define INIT_NONZERO_SET_SLOTS(so) do { \
39 (so)->table = (so)->smalltable; \
40 (so)->mask = PySet_MINSIZE - 1; \
41 (so)->hash = -1; \
42 } while(0)
43
44 #define EMPTY_TO_MINSIZE(so) do { \
45 memset((so)->smalltable, 0, sizeof((so)->smalltable)); \
46 (so)->used = (so)->fill = 0; \
47 INIT_NONZERO_SET_SLOTS(so); \
48 } while(0)
49
50 /* Reuse scheme to save calls to malloc, free, and memset */
51 #ifndef PySet_MAXFREELIST
52 #define PySet_MAXFREELIST 80
53 #endif
54 static PySetObject *free_list[PySet_MAXFREELIST];
55 static int numfree = 0;
56
57 /*
58 The basic lookup function used by all operations.
59 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
60 Open addressing is preferred over chaining since the link overhead for
61 chaining would be substantial (100% with typical malloc overhead).
62
63 The initial probe index is computed as hash mod the table size. Subsequent
64 probe indices are computed as explained in Objects/dictobject.c.
65
66 All arithmetic on hash should ignore overflow.
67
68 Unlike the dictionary implementation, the lookkey functions can return
69 NULL if the rich comparison returns an error.
70 */
71
72 static setentry *
set_lookkey(PySetObject * so,PyObject * key,register long hash)73 set_lookkey(PySetObject *so, PyObject *key, register long hash)
74 {
75 register Py_ssize_t i;
76 register size_t perturb;
77 register setentry *freeslot;
78 register size_t mask = so->mask;
79 setentry *table = so->table;
80 register setentry *entry;
81 register int cmp;
82 PyObject *startkey;
83
84 i = hash & mask;
85 entry = &table[i];
86 if (entry->key == NULL || entry->key == key)
87 return entry;
88
89 if (entry->key == dummy)
90 freeslot = entry;
91 else {
92 if (entry->hash == hash) {
93 startkey = entry->key;
94 Py_INCREF(startkey);
95 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
96 Py_DECREF(startkey);
97 if (cmp < 0)
98 return NULL;
99 if (table == so->table && entry->key == startkey) {
100 if (cmp > 0)
101 return entry;
102 }
103 else {
104 /* The compare did major nasty stuff to the
105 * set: start over.
106 */
107 return set_lookkey(so, key, hash);
108 }
109 }
110 freeslot = NULL;
111 }
112
113 /* In the loop, key == dummy is by far (factor of 100s) the
114 least likely outcome, so test for that last. */
115 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
116 i = (i << 2) + i + perturb + 1;
117 entry = &table[i & mask];
118 if (entry->key == NULL) {
119 if (freeslot != NULL)
120 entry = freeslot;
121 break;
122 }
123 if (entry->key == key)
124 break;
125 if (entry->hash == hash && entry->key != dummy) {
126 startkey = entry->key;
127 Py_INCREF(startkey);
128 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
129 Py_DECREF(startkey);
130 if (cmp < 0)
131 return NULL;
132 if (table == so->table && entry->key == startkey) {
133 if (cmp > 0)
134 break;
135 }
136 else {
137 /* The compare did major nasty stuff to the
138 * set: start over.
139 */
140 return set_lookkey(so, key, hash);
141 }
142 }
143 else if (entry->key == dummy && freeslot == NULL)
144 freeslot = entry;
145 }
146 return entry;
147 }
148
149 /*
150 * Hacked up version of set_lookkey which can assume keys are always strings;
151 * This means we can always use _PyString_Eq directly and not have to check to
152 * see if the comparison altered the table.
153 */
154 static setentry *
set_lookkey_string(PySetObject * so,PyObject * key,register long hash)155 set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
156 {
157 register Py_ssize_t i;
158 register size_t perturb;
159 register setentry *freeslot;
160 register size_t mask = so->mask;
161 setentry *table = so->table;
162 register setentry *entry;
163
164 /* Make sure this function doesn't have to handle non-string keys,
165 including subclasses of str; e.g., one reason to subclass
166 strings is to override __eq__, and for speed we don't cater to
167 that here. */
168 if (!PyString_CheckExact(key)) {
169 so->lookup = set_lookkey;
170 return set_lookkey(so, key, hash);
171 }
172 i = hash & mask;
173 entry = &table[i];
174 if (entry->key == NULL || entry->key == key)
175 return entry;
176 if (entry->key == dummy)
177 freeslot = entry;
178 else {
179 if (entry->hash == hash && _PyString_Eq(entry->key, key))
180 return entry;
181 freeslot = NULL;
182 }
183
184 /* In the loop, key == dummy is by far (factor of 100s) the
185 least likely outcome, so test for that last. */
186 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
187 i = (i << 2) + i + perturb + 1;
188 entry = &table[i & mask];
189 if (entry->key == NULL)
190 return freeslot == NULL ? entry : freeslot;
191 if (entry->key == key
192 || (entry->hash == hash
193 && entry->key != dummy
194 && _PyString_Eq(entry->key, key)))
195 return entry;
196 if (entry->key == dummy && freeslot == NULL)
197 freeslot = entry;
198 }
199 assert(0); /* NOT REACHED */
200 return 0;
201 }
202
203 /*
204 Internal routine to insert a new key into the table.
205 Used by the public insert routine.
206 Eats a reference to key.
207 */
208 static int
set_insert_key(register PySetObject * so,PyObject * key,long hash)209 set_insert_key(register PySetObject *so, PyObject *key, long hash)
210 {
211 register setentry *entry;
212
213 assert(so->lookup != NULL);
214 entry = so->lookup(so, key, hash);
215 if (entry == NULL)
216 return -1;
217 if (entry->key == NULL) {
218 /* UNUSED */
219 so->fill++;
220 entry->key = key;
221 entry->hash = hash;
222 so->used++;
223 } else if (entry->key == dummy) {
224 /* DUMMY */
225 entry->key = key;
226 entry->hash = hash;
227 so->used++;
228 Py_DECREF(dummy);
229 } else {
230 /* ACTIVE */
231 Py_DECREF(key);
232 }
233 return 0;
234 }
235
236 /*
237 Internal routine used by set_table_resize() to insert an item which is
238 known to be absent from the set. This routine also assumes that
239 the set contains no deleted entries. Besides the performance benefit,
240 using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209).
241 Note that no refcounts are changed by this routine; if needed, the caller
242 is responsible for incref'ing `key`.
243 */
244 static void
set_insert_clean(register PySetObject * so,PyObject * key,long hash)245 set_insert_clean(register PySetObject *so, PyObject *key, long hash)
246 {
247 register size_t i;
248 register size_t perturb;
249 register size_t mask = (size_t)so->mask;
250 setentry *table = so->table;
251 register setentry *entry;
252
253 i = hash & mask;
254 entry = &table[i];
255 for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) {
256 i = (i << 2) + i + perturb + 1;
257 entry = &table[i & mask];
258 }
259 so->fill++;
260 entry->key = key;
261 entry->hash = hash;
262 so->used++;
263 }
264
265 /*
266 Restructure the table by allocating a new table and reinserting all
267 keys again. When entries have been deleted, the new table may
268 actually be smaller than the old one.
269 */
270 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)271 set_table_resize(PySetObject *so, Py_ssize_t minused)
272 {
273 Py_ssize_t newsize;
274 setentry *oldtable, *newtable, *entry;
275 Py_ssize_t i;
276 int is_oldtable_malloced;
277 setentry small_copy[PySet_MINSIZE];
278
279 assert(minused >= 0);
280
281 /* Find the smallest table size > minused. */
282 for (newsize = PySet_MINSIZE;
283 newsize <= minused && newsize > 0;
284 newsize <<= 1)
285 ;
286 if (newsize <= 0) {
287 PyErr_NoMemory();
288 return -1;
289 }
290
291 /* Get space for a new table. */
292 oldtable = so->table;
293 assert(oldtable != NULL);
294 is_oldtable_malloced = oldtable != so->smalltable;
295
296 if (newsize == PySet_MINSIZE) {
297 /* A large table is shrinking, or we can't get any smaller. */
298 newtable = so->smalltable;
299 if (newtable == oldtable) {
300 if (so->fill == so->used) {
301 /* No dummies, so no point doing anything. */
302 return 0;
303 }
304 /* We're not going to resize it, but rebuild the
305 table anyway to purge old dummy entries.
306 Subtle: This is *necessary* if fill==size,
307 as set_lookkey needs at least one virgin slot to
308 terminate failing searches. If fill < size, it's
309 merely desirable, as dummies slow searches. */
310 assert(so->fill > so->used);
311 memcpy(small_copy, oldtable, sizeof(small_copy));
312 oldtable = small_copy;
313 }
314 }
315 else {
316 newtable = PyMem_NEW(setentry, newsize);
317 if (newtable == NULL) {
318 PyErr_NoMemory();
319 return -1;
320 }
321 }
322
323 /* Make the set empty, using the new table. */
324 assert(newtable != oldtable);
325 so->table = newtable;
326 so->mask = newsize - 1;
327 memset(newtable, 0, sizeof(setentry) * newsize);
328 so->used = 0;
329 i = so->fill;
330 so->fill = 0;
331
332 /* Copy the data over; this is refcount-neutral for active entries;
333 dummy entries aren't copied over, of course */
334 for (entry = oldtable; i > 0; entry++) {
335 if (entry->key == NULL) {
336 /* UNUSED */
337 ;
338 } else if (entry->key == dummy) {
339 /* DUMMY */
340 --i;
341 assert(entry->key == dummy);
342 Py_DECREF(entry->key);
343 } else {
344 /* ACTIVE */
345 --i;
346 set_insert_clean(so, entry->key, entry->hash);
347 }
348 }
349
350 if (is_oldtable_malloced)
351 PyMem_DEL(oldtable);
352 return 0;
353 }
354
355 /* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
356
357 static int
set_add_entry(register PySetObject * so,setentry * entry)358 set_add_entry(register PySetObject *so, setentry *entry)
359 {
360 register Py_ssize_t n_used;
361 PyObject *key = entry->key;
362 long hash = entry->hash;
363
364 assert(so->fill <= so->mask); /* at least one empty slot */
365 n_used = so->used;
366 Py_INCREF(key);
367 if (set_insert_key(so, key, hash) == -1) {
368 Py_DECREF(key);
369 return -1;
370 }
371 if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
372 return 0;
373 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
374 }
375
376 static int
set_add_key(register PySetObject * so,PyObject * key)377 set_add_key(register PySetObject *so, PyObject *key)
378 {
379 register long hash;
380 register Py_ssize_t n_used;
381
382 if (!PyString_CheckExact(key) ||
383 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
384 hash = PyObject_Hash(key);
385 if (hash == -1)
386 return -1;
387 }
388 assert(so->fill <= so->mask); /* at least one empty slot */
389 n_used = so->used;
390 Py_INCREF(key);
391 if (set_insert_key(so, key, hash) == -1) {
392 Py_DECREF(key);
393 return -1;
394 }
395 if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
396 return 0;
397 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
398 }
399
400 #define DISCARD_NOTFOUND 0
401 #define DISCARD_FOUND 1
402
403 static int
set_discard_entry(PySetObject * so,setentry * oldentry)404 set_discard_entry(PySetObject *so, setentry *oldentry)
405 { register setentry *entry;
406 PyObject *old_key;
407
408 entry = (so->lookup)(so, oldentry->key, oldentry->hash);
409 if (entry == NULL)
410 return -1;
411 if (entry->key == NULL || entry->key == dummy)
412 return DISCARD_NOTFOUND;
413 old_key = entry->key;
414 Py_INCREF(dummy);
415 entry->key = dummy;
416 so->used--;
417 Py_DECREF(old_key);
418 return DISCARD_FOUND;
419 }
420
421 static int
set_discard_key(PySetObject * so,PyObject * key)422 set_discard_key(PySetObject *so, PyObject *key)
423 {
424 register long hash;
425 register setentry *entry;
426 PyObject *old_key;
427
428 assert (PyAnySet_Check(so));
429 if (!PyString_CheckExact(key) ||
430 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
431 hash = PyObject_Hash(key);
432 if (hash == -1)
433 return -1;
434 }
435 entry = (so->lookup)(so, key, hash);
436 if (entry == NULL)
437 return -1;
438 if (entry->key == NULL || entry->key == dummy)
439 return DISCARD_NOTFOUND;
440 old_key = entry->key;
441 Py_INCREF(dummy);
442 entry->key = dummy;
443 so->used--;
444 Py_DECREF(old_key);
445 return DISCARD_FOUND;
446 }
447
448 static int
set_clear_internal(PySetObject * so)449 set_clear_internal(PySetObject *so)
450 {
451 setentry *entry, *table;
452 int table_is_malloced;
453 Py_ssize_t fill;
454 setentry small_copy[PySet_MINSIZE];
455 #ifdef Py_DEBUG
456 Py_ssize_t i, n;
457 assert (PyAnySet_Check(so));
458
459 n = so->mask + 1;
460 i = 0;
461 #endif
462
463 table = so->table;
464 assert(table != NULL);
465 table_is_malloced = table != so->smalltable;
466
467 /* This is delicate. During the process of clearing the set,
468 * decrefs can cause the set to mutate. To avoid fatal confusion
469 * (voice of experience), we have to make the set empty before
470 * clearing the slots, and never refer to anything via so->ref while
471 * clearing.
472 */
473 fill = so->fill;
474 if (table_is_malloced)
475 EMPTY_TO_MINSIZE(so);
476
477 else if (fill > 0) {
478 /* It's a small table with something that needs to be cleared.
479 * Afraid the only safe way is to copy the set entries into
480 * another small table first.
481 */
482 memcpy(small_copy, table, sizeof(small_copy));
483 table = small_copy;
484 EMPTY_TO_MINSIZE(so);
485 }
486 /* else it's a small table that's already empty */
487
488 /* Now we can finally clear things. If C had refcounts, we could
489 * assert that the refcount on table is 1 now, i.e. that this function
490 * has unique access to it, so decref side-effects can't alter it.
491 */
492 for (entry = table; fill > 0; ++entry) {
493 #ifdef Py_DEBUG
494 assert(i < n);
495 ++i;
496 #endif
497 if (entry->key) {
498 --fill;
499 Py_DECREF(entry->key);
500 }
501 #ifdef Py_DEBUG
502 else
503 assert(entry->key == NULL);
504 #endif
505 }
506
507 if (table_is_malloced)
508 PyMem_DEL(table);
509 return 0;
510 }
511
512 /*
513 * Iterate over a set table. Use like so:
514 *
515 * Py_ssize_t pos;
516 * setentry *entry;
517 * pos = 0; # important! pos should not otherwise be changed by you
518 * while (set_next(yourset, &pos, &entry)) {
519 * Refer to borrowed reference in entry->key.
520 * }
521 *
522 * CAUTION: In general, it isn't safe to use set_next in a loop that
523 * mutates the table.
524 */
525 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)526 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
527 {
528 Py_ssize_t i;
529 Py_ssize_t mask;
530 register setentry *table;
531
532 assert (PyAnySet_Check(so));
533 i = *pos_ptr;
534 assert(i >= 0);
535 table = so->table;
536 mask = so->mask;
537 while (i <= mask && (table[i].key == NULL || table[i].key == dummy))
538 i++;
539 *pos_ptr = i+1;
540 if (i > mask)
541 return 0;
542 assert(table[i].key != NULL);
543 *entry_ptr = &table[i];
544 return 1;
545 }
546
547 static void
set_dealloc(PySetObject * so)548 set_dealloc(PySetObject *so)
549 {
550 register setentry *entry;
551 Py_ssize_t fill = so->fill;
552 /* bpo-31095: UnTrack is needed before calling any callbacks */
553 PyObject_GC_UnTrack(so);
554 Py_TRASHCAN_SAFE_BEGIN(so)
555 if (so->weakreflist != NULL)
556 PyObject_ClearWeakRefs((PyObject *) so);
557
558 for (entry = so->table; fill > 0; entry++) {
559 if (entry->key) {
560 --fill;
561 Py_DECREF(entry->key);
562 }
563 }
564 if (so->table != so->smalltable)
565 PyMem_DEL(so->table);
566 if (numfree < PySet_MAXFREELIST && PyAnySet_CheckExact(so))
567 free_list[numfree++] = so;
568 else
569 Py_TYPE(so)->tp_free(so);
570 Py_TRASHCAN_SAFE_END(so)
571 }
572
573 static int
set_tp_print(PySetObject * so,FILE * fp,int flags)574 set_tp_print(PySetObject *so, FILE *fp, int flags)
575 {
576 setentry *entry;
577 Py_ssize_t pos=0;
578 char *emit = ""; /* No separator emitted on first pass */
579 char *separator = ", ";
580 int status = Py_ReprEnter((PyObject*)so);
581
582 if (status != 0) {
583 if (status < 0)
584 return status;
585 Py_BEGIN_ALLOW_THREADS
586 fprintf(fp, "%s(...)", Py_TYPE(so)->tp_name);
587 Py_END_ALLOW_THREADS
588 return 0;
589 }
590
591 Py_BEGIN_ALLOW_THREADS
592 fprintf(fp, "%s([", Py_TYPE(so)->tp_name);
593 Py_END_ALLOW_THREADS
594 while (set_next(so, &pos, &entry)) {
595 Py_BEGIN_ALLOW_THREADS
596 fputs(emit, fp);
597 Py_END_ALLOW_THREADS
598 emit = separator;
599 if (PyObject_Print(entry->key, fp, 0) != 0) {
600 Py_ReprLeave((PyObject*)so);
601 return -1;
602 }
603 }
604 Py_BEGIN_ALLOW_THREADS
605 fputs("])", fp);
606 Py_END_ALLOW_THREADS
607 Py_ReprLeave((PyObject*)so);
608 return 0;
609 }
610
611 static PyObject *
set_repr(PySetObject * so)612 set_repr(PySetObject *so)
613 {
614 PyObject *keys, *result=NULL, *listrepr;
615 int status = Py_ReprEnter((PyObject*)so);
616
617 if (status != 0) {
618 if (status < 0)
619 return NULL;
620 return PyString_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
621 }
622
623 keys = PySequence_List((PyObject *)so);
624 if (keys == NULL)
625 goto done;
626 listrepr = PyObject_Repr(keys);
627 Py_DECREF(keys);
628 if (listrepr == NULL)
629 goto done;
630
631 result = PyString_FromFormat("%s(%s)", Py_TYPE(so)->tp_name,
632 PyString_AS_STRING(listrepr));
633 Py_DECREF(listrepr);
634 done:
635 Py_ReprLeave((PyObject*)so);
636 return result;
637 }
638
639 static Py_ssize_t
set_len(PyObject * so)640 set_len(PyObject *so)
641 {
642 return ((PySetObject *)so)->used;
643 }
644
645 static int
set_merge(PySetObject * so,PyObject * otherset)646 set_merge(PySetObject *so, PyObject *otherset)
647 {
648 PySetObject *other;
649 PyObject *key;
650 long hash;
651 register Py_ssize_t i;
652 register setentry *entry;
653
654 assert (PyAnySet_Check(so));
655 assert (PyAnySet_Check(otherset));
656
657 other = (PySetObject*)otherset;
658 if (other == so || other->used == 0)
659 /* a.update(a) or a.update({}); nothing to do */
660 return 0;
661 /* Do one big resize at the start, rather than
662 * incrementally resizing as we insert new keys. Expect
663 * that there will be no (or few) overlapping keys.
664 */
665 if ((so->fill + other->used)*3 >= (so->mask+1)*2) {
666 if (set_table_resize(so, (so->used + other->used)*2) != 0)
667 return -1;
668 }
669 for (i = 0; i <= other->mask; i++) {
670 entry = &other->table[i];
671 key = entry->key;
672 hash = entry->hash;
673 if (key != NULL &&
674 key != dummy) {
675 Py_INCREF(key);
676 if (set_insert_key(so, key, hash) == -1) {
677 Py_DECREF(key);
678 return -1;
679 }
680 }
681 }
682 return 0;
683 }
684
685 static int
set_contains_key(PySetObject * so,PyObject * key)686 set_contains_key(PySetObject *so, PyObject *key)
687 {
688 long hash;
689 setentry *entry;
690
691 if (!PyString_CheckExact(key) ||
692 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
693 hash = PyObject_Hash(key);
694 if (hash == -1)
695 return -1;
696 }
697 entry = (so->lookup)(so, key, hash);
698 if (entry == NULL)
699 return -1;
700 key = entry->key;
701 return key != NULL && key != dummy;
702 }
703
704 static int
set_contains_entry(PySetObject * so,setentry * entry)705 set_contains_entry(PySetObject *so, setentry *entry)
706 {
707 PyObject *key;
708 setentry *lu_entry;
709
710 lu_entry = (so->lookup)(so, entry->key, entry->hash);
711 if (lu_entry == NULL)
712 return -1;
713 key = lu_entry->key;
714 return key != NULL && key != dummy;
715 }
716
717 static PyObject *
set_pop(PySetObject * so)718 set_pop(PySetObject *so)
719 {
720 register Py_ssize_t i = 0;
721 register setentry *entry;
722 PyObject *key;
723
724 assert (PyAnySet_Check(so));
725 if (so->used == 0) {
726 PyErr_SetString(PyExc_KeyError, "pop from an empty set");
727 return NULL;
728 }
729
730 /* Set entry to "the first" unused or dummy set entry. We abuse
731 * the hash field of slot 0 to hold a search finger:
732 * If slot 0 has a value, use slot 0.
733 * Else slot 0 is being used to hold a search finger,
734 * and we use its hash value as the first index to look.
735 */
736 entry = &so->table[0];
737 if (entry->key == NULL || entry->key == dummy) {
738 i = entry->hash;
739 /* The hash field may be a real hash value, or it may be a
740 * legit search finger, or it may be a once-legit search
741 * finger that's out of bounds now because it wrapped around
742 * or the table shrunk -- simply make sure it's in bounds now.
743 */
744 if (i > so->mask || i < 1)
745 i = 1; /* skip slot 0 */
746 while ((entry = &so->table[i])->key == NULL || entry->key==dummy) {
747 i++;
748 if (i > so->mask)
749 i = 1;
750 }
751 }
752 key = entry->key;
753 Py_INCREF(dummy);
754 entry->key = dummy;
755 so->used--;
756 so->table[0].hash = i + 1; /* next place to start */
757 return key;
758 }
759
760 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
761 Raises KeyError if the set is empty.");
762
763 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)764 set_traverse(PySetObject *so, visitproc visit, void *arg)
765 {
766 Py_ssize_t pos = 0;
767 setentry *entry;
768
769 while (set_next(so, &pos, &entry))
770 Py_VISIT(entry->key);
771 return 0;
772 }
773
774 static long
frozenset_hash(PyObject * self)775 frozenset_hash(PyObject *self)
776 {
777 PySetObject *so = (PySetObject *)self;
778 long h, hash = 1927868237L;
779 setentry *entry;
780 Py_ssize_t pos = 0;
781
782 if (so->hash != -1)
783 return so->hash;
784
785 hash *= PySet_GET_SIZE(self) + 1;
786 while (set_next(so, &pos, &entry)) {
787 /* Work to increase the bit dispersion for closely spaced hash
788 values. This is important because some use cases have many
789 combinations of a small number of elements with nearby
790 hashes so that many distinct combinations collapse to only
791 a handful of distinct hash values. */
792 h = entry->hash;
793 hash ^= (h ^ (h << 16) ^ 89869747L) * 3644798167u;
794 }
795 hash = hash * 69069L + 907133923L;
796 if (hash == -1)
797 hash = 590923713L;
798 so->hash = hash;
799 return hash;
800 }
801
802 /***** Set iterator type ***********************************************/
803
804 typedef struct {
805 PyObject_HEAD
806 PySetObject *si_set; /* Set to NULL when iterator is exhausted */
807 Py_ssize_t si_used;
808 Py_ssize_t si_pos;
809 Py_ssize_t len;
810 } setiterobject;
811
812 static void
setiter_dealloc(setiterobject * si)813 setiter_dealloc(setiterobject *si)
814 {
815 /* bpo-31095: UnTrack is needed before calling any callbacks */
816 _PyObject_GC_UNTRACK(si);
817 Py_XDECREF(si->si_set);
818 PyObject_GC_Del(si);
819 }
820
821 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)822 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
823 {
824 Py_VISIT(si->si_set);
825 return 0;
826 }
827
828 static PyObject *
setiter_len(setiterobject * si)829 setiter_len(setiterobject *si)
830 {
831 Py_ssize_t len = 0;
832 if (si->si_set != NULL && si->si_used == si->si_set->used)
833 len = si->len;
834 return PyInt_FromLong(len);
835 }
836
837 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
838
839 static PyMethodDef setiter_methods[] = {
840 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
841 {NULL, NULL} /* sentinel */
842 };
843
setiter_iternext(setiterobject * si)844 static PyObject *setiter_iternext(setiterobject *si)
845 {
846 PyObject *key;
847 register Py_ssize_t i, mask;
848 register setentry *entry;
849 PySetObject *so = si->si_set;
850
851 if (so == NULL)
852 return NULL;
853 assert (PyAnySet_Check(so));
854
855 if (si->si_used != so->used) {
856 PyErr_SetString(PyExc_RuntimeError,
857 "Set changed size during iteration");
858 si->si_used = -1; /* Make this state sticky */
859 return NULL;
860 }
861
862 i = si->si_pos;
863 assert(i>=0);
864 entry = so->table;
865 mask = so->mask;
866 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
867 i++;
868 si->si_pos = i+1;
869 if (i > mask)
870 goto fail;
871 si->len--;
872 key = entry[i].key;
873 Py_INCREF(key);
874 return key;
875
876 fail:
877 si->si_set = NULL;
878 Py_DECREF(so);
879 return NULL;
880 }
881
882 static PyTypeObject PySetIter_Type = {
883 PyVarObject_HEAD_INIT(&PyType_Type, 0)
884 "setiterator", /* tp_name */
885 sizeof(setiterobject), /* tp_basicsize */
886 0, /* tp_itemsize */
887 /* methods */
888 (destructor)setiter_dealloc, /* tp_dealloc */
889 0, /* tp_print */
890 0, /* tp_getattr */
891 0, /* tp_setattr */
892 0, /* tp_compare */
893 0, /* tp_repr */
894 0, /* tp_as_number */
895 0, /* tp_as_sequence */
896 0, /* tp_as_mapping */
897 0, /* tp_hash */
898 0, /* tp_call */
899 0, /* tp_str */
900 PyObject_GenericGetAttr, /* tp_getattro */
901 0, /* tp_setattro */
902 0, /* tp_as_buffer */
903 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
904 0, /* tp_doc */
905 (traverseproc)setiter_traverse, /* tp_traverse */
906 0, /* tp_clear */
907 0, /* tp_richcompare */
908 0, /* tp_weaklistoffset */
909 PyObject_SelfIter, /* tp_iter */
910 (iternextfunc)setiter_iternext, /* tp_iternext */
911 setiter_methods, /* tp_methods */
912 0,
913 };
914
915 static PyObject *
set_iter(PySetObject * so)916 set_iter(PySetObject *so)
917 {
918 setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
919 if (si == NULL)
920 return NULL;
921 Py_INCREF(so);
922 si->si_set = so;
923 si->si_used = so->used;
924 si->si_pos = 0;
925 si->len = so->used;
926 _PyObject_GC_TRACK(si);
927 return (PyObject *)si;
928 }
929
930 static int
set_update_internal(PySetObject * so,PyObject * other)931 set_update_internal(PySetObject *so, PyObject *other)
932 {
933 PyObject *key, *it;
934
935 if (PyAnySet_Check(other))
936 return set_merge(so, other);
937
938 if (PyDict_CheckExact(other)) {
939 PyObject *value;
940 Py_ssize_t pos = 0;
941 long hash;
942 Py_ssize_t dictsize = PyDict_Size(other);
943
944 /* Do one big resize at the start, rather than
945 * incrementally resizing as we insert new keys. Expect
946 * that there will be no (or few) overlapping keys.
947 */
948 if (dictsize == -1)
949 return -1;
950 if ((so->fill + dictsize)*3 >= (so->mask+1)*2) {
951 if (set_table_resize(so, (so->used + dictsize)*2) != 0)
952 return -1;
953 }
954 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
955 setentry an_entry;
956
957 an_entry.hash = hash;
958 an_entry.key = key;
959 if (set_add_entry(so, &an_entry) == -1)
960 return -1;
961 }
962 return 0;
963 }
964
965 it = PyObject_GetIter(other);
966 if (it == NULL)
967 return -1;
968
969 while ((key = PyIter_Next(it)) != NULL) {
970 if (set_add_key(so, key) == -1) {
971 Py_DECREF(it);
972 Py_DECREF(key);
973 return -1;
974 }
975 Py_DECREF(key);
976 }
977 Py_DECREF(it);
978 if (PyErr_Occurred())
979 return -1;
980 return 0;
981 }
982
983 static PyObject *
set_update(PySetObject * so,PyObject * args)984 set_update(PySetObject *so, PyObject *args)
985 {
986 Py_ssize_t i;
987
988 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
989 PyObject *other = PyTuple_GET_ITEM(args, i);
990 if (set_update_internal(so, other) == -1)
991 return NULL;
992 }
993 Py_RETURN_NONE;
994 }
995
996 PyDoc_STRVAR(update_doc,
997 "Update a set with the union of itself and others.");
998
999 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)1000 make_new_set(PyTypeObject *type, PyObject *iterable)
1001 {
1002 register PySetObject *so = NULL;
1003
1004 if (dummy == NULL) { /* Auto-initialize dummy */
1005 dummy = PyString_FromString("<dummy key>");
1006 if (dummy == NULL)
1007 return NULL;
1008 }
1009
1010 /* create PySetObject structure */
1011 if (numfree &&
1012 (type == &PySet_Type || type == &PyFrozenSet_Type)) {
1013 so = free_list[--numfree];
1014 assert (so != NULL && PyAnySet_CheckExact(so));
1015 Py_TYPE(so) = type;
1016 _Py_NewReference((PyObject *)so);
1017 EMPTY_TO_MINSIZE(so);
1018 PyObject_GC_Track(so);
1019 } else {
1020 so = (PySetObject *)type->tp_alloc(type, 0);
1021 if (so == NULL)
1022 return NULL;
1023 /* tp_alloc has already zeroed the structure */
1024 assert(so->table == NULL && so->fill == 0 && so->used == 0);
1025 INIT_NONZERO_SET_SLOTS(so);
1026 }
1027
1028 so->lookup = set_lookkey_string;
1029 so->weakreflist = NULL;
1030
1031 if (iterable != NULL) {
1032 if (set_update_internal(so, iterable) == -1) {
1033 Py_DECREF(so);
1034 return NULL;
1035 }
1036 }
1037
1038 return (PyObject *)so;
1039 }
1040
1041 /* The empty frozenset is a singleton */
1042 static PyObject *emptyfrozenset = NULL;
1043
1044 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1045 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1046 {
1047 PyObject *iterable = NULL, *result;
1048
1049 if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset()", kwds))
1050 return NULL;
1051
1052 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
1053 return NULL;
1054
1055 if (type != &PyFrozenSet_Type)
1056 return make_new_set(type, iterable);
1057
1058 if (iterable != NULL) {
1059 /* frozenset(f) is idempotent */
1060 if (PyFrozenSet_CheckExact(iterable)) {
1061 Py_INCREF(iterable);
1062 return iterable;
1063 }
1064 result = make_new_set(type, iterable);
1065 if (result == NULL || PySet_GET_SIZE(result))
1066 return result;
1067 Py_DECREF(result);
1068 }
1069 /* The empty frozenset is a singleton */
1070 if (emptyfrozenset == NULL)
1071 emptyfrozenset = make_new_set(type, NULL);
1072 Py_XINCREF(emptyfrozenset);
1073 return emptyfrozenset;
1074 }
1075
1076 void
PySet_Fini(void)1077 PySet_Fini(void)
1078 {
1079 PySetObject *so;
1080
1081 while (numfree) {
1082 numfree--;
1083 so = free_list[numfree];
1084 PyObject_GC_Del(so);
1085 }
1086 Py_CLEAR(dummy);
1087 Py_CLEAR(emptyfrozenset);
1088 }
1089
1090 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1091 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1092 {
1093 if (type == &PySet_Type && !_PyArg_NoKeywords("set()", kwds))
1094 return NULL;
1095
1096 return make_new_set(type, NULL);
1097 }
1098
1099 /* set_swap_bodies() switches the contents of any two sets by moving their
1100 internal data pointers and, if needed, copying the internal smalltables.
1101 Semantically equivalent to:
1102
1103 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1104
1105 The function always succeeds and it leaves both objects in a stable state.
1106 Useful for creating temporary frozensets from sets for membership testing
1107 in __contains__(), discard(), and remove(). Also useful for operations
1108 that update in-place (by allowing an intermediate result to be swapped
1109 into one of the original inputs).
1110 */
1111
1112 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1113 set_swap_bodies(PySetObject *a, PySetObject *b)
1114 {
1115 Py_ssize_t t;
1116 setentry *u;
1117 setentry *(*f)(PySetObject *so, PyObject *key, long hash);
1118 setentry tab[PySet_MINSIZE];
1119 long h;
1120
1121 t = a->fill; a->fill = b->fill; b->fill = t;
1122 t = a->used; a->used = b->used; b->used = t;
1123 t = a->mask; a->mask = b->mask; b->mask = t;
1124
1125 u = a->table;
1126 if (a->table == a->smalltable)
1127 u = b->smalltable;
1128 a->table = b->table;
1129 if (b->table == b->smalltable)
1130 a->table = a->smalltable;
1131 b->table = u;
1132
1133 f = a->lookup; a->lookup = b->lookup; b->lookup = f;
1134
1135 if (a->table == a->smalltable || b->table == b->smalltable) {
1136 memcpy(tab, a->smalltable, sizeof(tab));
1137 memcpy(a->smalltable, b->smalltable, sizeof(tab));
1138 memcpy(b->smalltable, tab, sizeof(tab));
1139 }
1140
1141 if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) &&
1142 PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1143 h = a->hash; a->hash = b->hash; b->hash = h;
1144 } else {
1145 a->hash = -1;
1146 b->hash = -1;
1147 }
1148 }
1149
1150 static PyObject *
set_copy(PySetObject * so)1151 set_copy(PySetObject *so)
1152 {
1153 return make_new_set(Py_TYPE(so), (PyObject *)so);
1154 }
1155
1156 static PyObject *
frozenset_copy(PySetObject * so)1157 frozenset_copy(PySetObject *so)
1158 {
1159 if (PyFrozenSet_CheckExact(so)) {
1160 Py_INCREF(so);
1161 return (PyObject *)so;
1162 }
1163 return set_copy(so);
1164 }
1165
1166 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1167
1168 static PyObject *
set_clear(PySetObject * so)1169 set_clear(PySetObject *so)
1170 {
1171 set_clear_internal(so);
1172 Py_RETURN_NONE;
1173 }
1174
1175 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1176
1177 static PyObject *
set_union(PySetObject * so,PyObject * args)1178 set_union(PySetObject *so, PyObject *args)
1179 {
1180 PySetObject *result;
1181 PyObject *other;
1182 Py_ssize_t i;
1183
1184 result = (PySetObject *)set_copy(so);
1185 if (result == NULL)
1186 return NULL;
1187
1188 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1189 other = PyTuple_GET_ITEM(args, i);
1190 if ((PyObject *)so == other)
1191 continue;
1192 if (set_update_internal(result, other) == -1) {
1193 Py_DECREF(result);
1194 return NULL;
1195 }
1196 }
1197 return (PyObject *)result;
1198 }
1199
1200 PyDoc_STRVAR(union_doc,
1201 "Return the union of sets as a new set.\n\
1202 \n\
1203 (i.e. all elements that are in either set.)");
1204
1205 static PyObject *
set_or(PySetObject * so,PyObject * other)1206 set_or(PySetObject *so, PyObject *other)
1207 {
1208 PySetObject *result;
1209
1210 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1211 Py_INCREF(Py_NotImplemented);
1212 return Py_NotImplemented;
1213 }
1214
1215 result = (PySetObject *)set_copy(so);
1216 if (result == NULL)
1217 return NULL;
1218 if ((PyObject *)so == other)
1219 return (PyObject *)result;
1220 if (set_update_internal(result, other) == -1) {
1221 Py_DECREF(result);
1222 return NULL;
1223 }
1224 return (PyObject *)result;
1225 }
1226
1227 static PyObject *
set_ior(PySetObject * so,PyObject * other)1228 set_ior(PySetObject *so, PyObject *other)
1229 {
1230 if (!PyAnySet_Check(other)) {
1231 Py_INCREF(Py_NotImplemented);
1232 return Py_NotImplemented;
1233 }
1234 if (set_update_internal(so, other) == -1)
1235 return NULL;
1236 Py_INCREF(so);
1237 return (PyObject *)so;
1238 }
1239
1240 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1241 set_intersection(PySetObject *so, PyObject *other)
1242 {
1243 PySetObject *result;
1244 PyObject *key, *it, *tmp;
1245
1246 if ((PyObject *)so == other)
1247 return set_copy(so);
1248
1249 result = (PySetObject *)make_new_set(Py_TYPE(so), NULL);
1250 if (result == NULL)
1251 return NULL;
1252
1253 if (PyAnySet_Check(other)) {
1254 Py_ssize_t pos = 0;
1255 setentry *entry;
1256
1257 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1258 tmp = (PyObject *)so;
1259 so = (PySetObject *)other;
1260 other = tmp;
1261 }
1262
1263 while (set_next((PySetObject *)other, &pos, &entry)) {
1264 int rv = set_contains_entry(so, entry);
1265 if (rv == -1) {
1266 Py_DECREF(result);
1267 return NULL;
1268 }
1269 if (rv) {
1270 if (set_add_entry(result, entry) == -1) {
1271 Py_DECREF(result);
1272 return NULL;
1273 }
1274 }
1275 }
1276 return (PyObject *)result;
1277 }
1278
1279 it = PyObject_GetIter(other);
1280 if (it == NULL) {
1281 Py_DECREF(result);
1282 return NULL;
1283 }
1284
1285 while ((key = PyIter_Next(it)) != NULL) {
1286 int rv;
1287 setentry entry;
1288 long hash = PyObject_Hash(key);
1289
1290 if (hash == -1) {
1291 Py_DECREF(it);
1292 Py_DECREF(result);
1293 Py_DECREF(key);
1294 return NULL;
1295 }
1296 entry.hash = hash;
1297 entry.key = key;
1298 rv = set_contains_entry(so, &entry);
1299 if (rv == -1) {
1300 Py_DECREF(it);
1301 Py_DECREF(result);
1302 Py_DECREF(key);
1303 return NULL;
1304 }
1305 if (rv) {
1306 if (set_add_entry(result, &entry) == -1) {
1307 Py_DECREF(it);
1308 Py_DECREF(result);
1309 Py_DECREF(key);
1310 return NULL;
1311 }
1312 }
1313 Py_DECREF(key);
1314 }
1315 Py_DECREF(it);
1316 if (PyErr_Occurred()) {
1317 Py_DECREF(result);
1318 return NULL;
1319 }
1320 return (PyObject *)result;
1321 }
1322
1323 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1324 set_intersection_multi(PySetObject *so, PyObject *args)
1325 {
1326 Py_ssize_t i;
1327 PyObject *result = (PyObject *)so;
1328
1329 if (PyTuple_GET_SIZE(args) == 0)
1330 return set_copy(so);
1331
1332 Py_INCREF(so);
1333 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1334 PyObject *other = PyTuple_GET_ITEM(args, i);
1335 PyObject *newresult = set_intersection((PySetObject *)result, other);
1336 if (newresult == NULL) {
1337 Py_DECREF(result);
1338 return NULL;
1339 }
1340 Py_DECREF(result);
1341 result = newresult;
1342 }
1343 return result;
1344 }
1345
1346 PyDoc_STRVAR(intersection_doc,
1347 "Return the intersection of two or more sets as a new set.\n\
1348 \n\
1349 (i.e. elements that are common to all of the sets.)");
1350
1351 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1352 set_intersection_update(PySetObject *so, PyObject *other)
1353 {
1354 PyObject *tmp;
1355
1356 tmp = set_intersection(so, other);
1357 if (tmp == NULL)
1358 return NULL;
1359 set_swap_bodies(so, (PySetObject *)tmp);
1360 Py_DECREF(tmp);
1361 Py_RETURN_NONE;
1362 }
1363
1364 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1365 set_intersection_update_multi(PySetObject *so, PyObject *args)
1366 {
1367 PyObject *tmp;
1368
1369 tmp = set_intersection_multi(so, args);
1370 if (tmp == NULL)
1371 return NULL;
1372 set_swap_bodies(so, (PySetObject *)tmp);
1373 Py_DECREF(tmp);
1374 Py_RETURN_NONE;
1375 }
1376
1377 PyDoc_STRVAR(intersection_update_doc,
1378 "Update a set with the intersection of itself and another.");
1379
1380 static PyObject *
set_and(PySetObject * so,PyObject * other)1381 set_and(PySetObject *so, PyObject *other)
1382 {
1383 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1384 Py_INCREF(Py_NotImplemented);
1385 return Py_NotImplemented;
1386 }
1387 return set_intersection(so, other);
1388 }
1389
1390 static PyObject *
set_iand(PySetObject * so,PyObject * other)1391 set_iand(PySetObject *so, PyObject *other)
1392 {
1393 PyObject *result;
1394
1395 if (!PyAnySet_Check(other)) {
1396 Py_INCREF(Py_NotImplemented);
1397 return Py_NotImplemented;
1398 }
1399 result = set_intersection_update(so, other);
1400 if (result == NULL)
1401 return NULL;
1402 Py_DECREF(result);
1403 Py_INCREF(so);
1404 return (PyObject *)so;
1405 }
1406
1407 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1408 set_isdisjoint(PySetObject *so, PyObject *other)
1409 {
1410 PyObject *key, *it, *tmp;
1411
1412 if ((PyObject *)so == other) {
1413 if (PySet_GET_SIZE(so) == 0)
1414 Py_RETURN_TRUE;
1415 else
1416 Py_RETURN_FALSE;
1417 }
1418
1419 if (PyAnySet_CheckExact(other)) {
1420 Py_ssize_t pos = 0;
1421 setentry *entry;
1422
1423 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1424 tmp = (PyObject *)so;
1425 so = (PySetObject *)other;
1426 other = tmp;
1427 }
1428 while (set_next((PySetObject *)other, &pos, &entry)) {
1429 int rv = set_contains_entry(so, entry);
1430 if (rv == -1)
1431 return NULL;
1432 if (rv)
1433 Py_RETURN_FALSE;
1434 }
1435 Py_RETURN_TRUE;
1436 }
1437
1438 it = PyObject_GetIter(other);
1439 if (it == NULL)
1440 return NULL;
1441
1442 while ((key = PyIter_Next(it)) != NULL) {
1443 int rv;
1444 setentry entry;
1445 long hash = PyObject_Hash(key);
1446
1447 if (hash == -1) {
1448 Py_DECREF(key);
1449 Py_DECREF(it);
1450 return NULL;
1451 }
1452 entry.hash = hash;
1453 entry.key = key;
1454 rv = set_contains_entry(so, &entry);
1455 Py_DECREF(key);
1456 if (rv == -1) {
1457 Py_DECREF(it);
1458 return NULL;
1459 }
1460 if (rv) {
1461 Py_DECREF(it);
1462 Py_RETURN_FALSE;
1463 }
1464 }
1465 Py_DECREF(it);
1466 if (PyErr_Occurred())
1467 return NULL;
1468 Py_RETURN_TRUE;
1469 }
1470
1471 PyDoc_STRVAR(isdisjoint_doc,
1472 "Return True if two sets have a null intersection.");
1473
1474 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1475 set_difference_update_internal(PySetObject *so, PyObject *other)
1476 {
1477 if ((PyObject *)so == other)
1478 return set_clear_internal(so);
1479
1480 if (PyAnySet_Check(other)) {
1481 setentry *entry;
1482 Py_ssize_t pos = 0;
1483
1484 while (set_next((PySetObject *)other, &pos, &entry))
1485 if (set_discard_entry(so, entry) == -1)
1486 return -1;
1487 } else {
1488 PyObject *key, *it;
1489 it = PyObject_GetIter(other);
1490 if (it == NULL)
1491 return -1;
1492
1493 while ((key = PyIter_Next(it)) != NULL) {
1494 if (set_discard_key(so, key) == -1) {
1495 Py_DECREF(it);
1496 Py_DECREF(key);
1497 return -1;
1498 }
1499 Py_DECREF(key);
1500 }
1501 Py_DECREF(it);
1502 if (PyErr_Occurred())
1503 return -1;
1504 }
1505 /* If more than 1/5 are dummies, then resize them away. */
1506 if ((so->fill - so->used) * 5 < so->mask)
1507 return 0;
1508 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1509 }
1510
1511 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1512 set_difference_update(PySetObject *so, PyObject *args)
1513 {
1514 Py_ssize_t i;
1515
1516 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1517 PyObject *other = PyTuple_GET_ITEM(args, i);
1518 if (set_difference_update_internal(so, other) == -1)
1519 return NULL;
1520 }
1521 Py_RETURN_NONE;
1522 }
1523
1524 PyDoc_STRVAR(difference_update_doc,
1525 "Remove all elements of another set from this set.");
1526
1527 static PyObject *
set_difference(PySetObject * so,PyObject * other)1528 set_difference(PySetObject *so, PyObject *other)
1529 {
1530 PyObject *result;
1531 setentry *entry;
1532 Py_ssize_t pos = 0;
1533
1534 if (!PyAnySet_Check(other) && !PyDict_CheckExact(other)) {
1535 result = set_copy(so);
1536 if (result == NULL)
1537 return NULL;
1538 if (set_difference_update_internal((PySetObject *)result, other) != -1)
1539 return result;
1540 Py_DECREF(result);
1541 return NULL;
1542 }
1543
1544 result = make_new_set(Py_TYPE(so), NULL);
1545 if (result == NULL)
1546 return NULL;
1547
1548 if (PyDict_CheckExact(other)) {
1549 while (set_next(so, &pos, &entry)) {
1550 setentry entrycopy;
1551 int rv;
1552 entrycopy.hash = entry->hash;
1553 entrycopy.key = entry->key;
1554 rv = _PyDict_Contains(other, entry->key, entry->hash);
1555 if (rv < 0) {
1556 Py_DECREF(result);
1557 return NULL;
1558 }
1559 if (!rv) {
1560 if (set_add_entry((PySetObject *)result, &entrycopy) == -1) {
1561 Py_DECREF(result);
1562 return NULL;
1563 }
1564 }
1565 }
1566 return result;
1567 }
1568
1569 while (set_next(so, &pos, &entry)) {
1570 int rv = set_contains_entry((PySetObject *)other, entry);
1571 if (rv == -1) {
1572 Py_DECREF(result);
1573 return NULL;
1574 }
1575 if (!rv) {
1576 if (set_add_entry((PySetObject *)result, entry) == -1) {
1577 Py_DECREF(result);
1578 return NULL;
1579 }
1580 }
1581 }
1582 return result;
1583 }
1584
1585 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1586 set_difference_multi(PySetObject *so, PyObject *args)
1587 {
1588 Py_ssize_t i;
1589 PyObject *result, *other;
1590
1591 if (PyTuple_GET_SIZE(args) == 0)
1592 return set_copy(so);
1593
1594 other = PyTuple_GET_ITEM(args, 0);
1595 result = set_difference(so, other);
1596 if (result == NULL)
1597 return NULL;
1598
1599 for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1600 other = PyTuple_GET_ITEM(args, i);
1601 if (set_difference_update_internal((PySetObject *)result, other) == -1) {
1602 Py_DECREF(result);
1603 return NULL;
1604 }
1605 }
1606 return result;
1607 }
1608
1609 PyDoc_STRVAR(difference_doc,
1610 "Return the difference of two or more sets as a new set.\n\
1611 \n\
1612 (i.e. all elements that are in this set but not the others.)");
1613 static PyObject *
set_sub(PySetObject * so,PyObject * other)1614 set_sub(PySetObject *so, PyObject *other)
1615 {
1616 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1617 Py_INCREF(Py_NotImplemented);
1618 return Py_NotImplemented;
1619 }
1620 return set_difference(so, other);
1621 }
1622
1623 static PyObject *
set_isub(PySetObject * so,PyObject * other)1624 set_isub(PySetObject *so, PyObject *other)
1625 {
1626 if (!PyAnySet_Check(other)) {
1627 Py_INCREF(Py_NotImplemented);
1628 return Py_NotImplemented;
1629 }
1630 if (set_difference_update_internal(so, other) == -1)
1631 return NULL;
1632 Py_INCREF(so);
1633 return (PyObject *)so;
1634 }
1635
1636 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1637 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1638 {
1639 PySetObject *otherset;
1640 PyObject *key;
1641 Py_ssize_t pos = 0;
1642 setentry *entry;
1643
1644 if ((PyObject *)so == other)
1645 return set_clear(so);
1646
1647 if (PyDict_CheckExact(other)) {
1648 PyObject *value;
1649 int rv;
1650 long hash;
1651 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1652 setentry an_entry;
1653
1654 Py_INCREF(key);
1655 an_entry.hash = hash;
1656 an_entry.key = key;
1657
1658 rv = set_discard_entry(so, &an_entry);
1659 if (rv == -1) {
1660 Py_DECREF(key);
1661 return NULL;
1662 }
1663 if (rv == DISCARD_NOTFOUND) {
1664 if (set_add_entry(so, &an_entry) == -1) {
1665 Py_DECREF(key);
1666 return NULL;
1667 }
1668 }
1669 Py_DECREF(key);
1670 }
1671 Py_RETURN_NONE;
1672 }
1673
1674 if (PyAnySet_Check(other)) {
1675 Py_INCREF(other);
1676 otherset = (PySetObject *)other;
1677 } else {
1678 otherset = (PySetObject *)make_new_set(Py_TYPE(so), other);
1679 if (otherset == NULL)
1680 return NULL;
1681 }
1682
1683 while (set_next(otherset, &pos, &entry)) {
1684 int rv = set_discard_entry(so, entry);
1685 if (rv == -1) {
1686 Py_DECREF(otherset);
1687 return NULL;
1688 }
1689 if (rv == DISCARD_NOTFOUND) {
1690 if (set_add_entry(so, entry) == -1) {
1691 Py_DECREF(otherset);
1692 return NULL;
1693 }
1694 }
1695 }
1696 Py_DECREF(otherset);
1697 Py_RETURN_NONE;
1698 }
1699
1700 PyDoc_STRVAR(symmetric_difference_update_doc,
1701 "Update a set with the symmetric difference of itself and another.");
1702
1703 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1704 set_symmetric_difference(PySetObject *so, PyObject *other)
1705 {
1706 PyObject *rv;
1707 PySetObject *otherset;
1708
1709 otherset = (PySetObject *)make_new_set(Py_TYPE(so), other);
1710 if (otherset == NULL)
1711 return NULL;
1712 rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1713 if (rv == NULL) {
1714 Py_DECREF(otherset);
1715 return NULL;
1716 }
1717 Py_DECREF(rv);
1718 return (PyObject *)otherset;
1719 }
1720
1721 PyDoc_STRVAR(symmetric_difference_doc,
1722 "Return the symmetric difference of two sets as a new set.\n\
1723 \n\
1724 (i.e. all elements that are in exactly one of the sets.)");
1725
1726 static PyObject *
set_xor(PySetObject * so,PyObject * other)1727 set_xor(PySetObject *so, PyObject *other)
1728 {
1729 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1730 Py_INCREF(Py_NotImplemented);
1731 return Py_NotImplemented;
1732 }
1733 return set_symmetric_difference(so, other);
1734 }
1735
1736 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1737 set_ixor(PySetObject *so, PyObject *other)
1738 {
1739 PyObject *result;
1740
1741 if (!PyAnySet_Check(other)) {
1742 Py_INCREF(Py_NotImplemented);
1743 return Py_NotImplemented;
1744 }
1745 result = set_symmetric_difference_update(so, other);
1746 if (result == NULL)
1747 return NULL;
1748 Py_DECREF(result);
1749 Py_INCREF(so);
1750 return (PyObject *)so;
1751 }
1752
1753 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1754 set_issubset(PySetObject *so, PyObject *other)
1755 {
1756 setentry *entry;
1757 Py_ssize_t pos = 0;
1758
1759 if (!PyAnySet_Check(other)) {
1760 PyObject *tmp, *result;
1761 tmp = make_new_set(&PySet_Type, other);
1762 if (tmp == NULL)
1763 return NULL;
1764 result = set_issubset(so, tmp);
1765 Py_DECREF(tmp);
1766 return result;
1767 }
1768 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1769 Py_RETURN_FALSE;
1770
1771 while (set_next(so, &pos, &entry)) {
1772 int rv = set_contains_entry((PySetObject *)other, entry);
1773 if (rv == -1)
1774 return NULL;
1775 if (!rv)
1776 Py_RETURN_FALSE;
1777 }
1778 Py_RETURN_TRUE;
1779 }
1780
1781 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1782
1783 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1784 set_issuperset(PySetObject *so, PyObject *other)
1785 {
1786 PyObject *tmp, *result;
1787
1788 if (!PyAnySet_Check(other)) {
1789 tmp = make_new_set(&PySet_Type, other);
1790 if (tmp == NULL)
1791 return NULL;
1792 result = set_issuperset(so, tmp);
1793 Py_DECREF(tmp);
1794 return result;
1795 }
1796 return set_issubset((PySetObject *)other, (PyObject *)so);
1797 }
1798
1799 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1800
1801 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1802 set_richcompare(PySetObject *v, PyObject *w, int op)
1803 {
1804 PyObject *r1;
1805 int r2;
1806
1807 if(!PyAnySet_Check(w)) {
1808 Py_INCREF(Py_NotImplemented);
1809 return Py_NotImplemented;
1810 }
1811 switch (op) {
1812 case Py_EQ:
1813 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1814 Py_RETURN_FALSE;
1815 if (v->hash != -1 &&
1816 ((PySetObject *)w)->hash != -1 &&
1817 v->hash != ((PySetObject *)w)->hash)
1818 Py_RETURN_FALSE;
1819 return set_issubset(v, w);
1820 case Py_NE:
1821 r1 = set_richcompare(v, w, Py_EQ);
1822 if (r1 == NULL)
1823 return NULL;
1824 r2 = PyObject_IsTrue(r1);
1825 Py_DECREF(r1);
1826 if (r2 < 0)
1827 return NULL;
1828 return PyBool_FromLong(!r2);
1829 case Py_LE:
1830 return set_issubset(v, w);
1831 case Py_GE:
1832 return set_issuperset(v, w);
1833 case Py_LT:
1834 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1835 Py_RETURN_FALSE;
1836 return set_issubset(v, w);
1837 case Py_GT:
1838 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1839 Py_RETURN_FALSE;
1840 return set_issuperset(v, w);
1841 }
1842 Py_INCREF(Py_NotImplemented);
1843 return Py_NotImplemented;
1844 }
1845
1846 static int
set_nocmp(PyObject * self,PyObject * other)1847 set_nocmp(PyObject *self, PyObject *other)
1848 {
1849 PyErr_SetString(PyExc_TypeError, "cannot compare sets using cmp()");
1850 return -1;
1851 }
1852
1853 static PyObject *
set_add(PySetObject * so,PyObject * key)1854 set_add(PySetObject *so, PyObject *key)
1855 {
1856 if (set_add_key(so, key) == -1)
1857 return NULL;
1858 Py_RETURN_NONE;
1859 }
1860
1861 PyDoc_STRVAR(add_doc,
1862 "Add an element to a set.\n\
1863 \n\
1864 This has no effect if the element is already present.");
1865
1866 static int
set_contains(PySetObject * so,PyObject * key)1867 set_contains(PySetObject *so, PyObject *key)
1868 {
1869 PyObject *tmpkey;
1870 int rv;
1871
1872 rv = set_contains_key(so, key);
1873 if (rv == -1) {
1874 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1875 return -1;
1876 PyErr_Clear();
1877 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1878 if (tmpkey == NULL)
1879 return -1;
1880 rv = set_contains_key(so, tmpkey);
1881 Py_DECREF(tmpkey);
1882 }
1883 return rv;
1884 }
1885
1886 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1887 set_direct_contains(PySetObject *so, PyObject *key)
1888 {
1889 long result;
1890
1891 result = set_contains(so, key);
1892 if (result == -1)
1893 return NULL;
1894 return PyBool_FromLong(result);
1895 }
1896
1897 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1898
1899 static PyObject *
set_remove(PySetObject * so,PyObject * key)1900 set_remove(PySetObject *so, PyObject *key)
1901 {
1902 PyObject *tmpkey;
1903 int rv;
1904
1905 rv = set_discard_key(so, key);
1906 if (rv == -1) {
1907 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1908 return NULL;
1909 PyErr_Clear();
1910 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1911 if (tmpkey == NULL)
1912 return NULL;
1913 rv = set_discard_key(so, tmpkey);
1914 Py_DECREF(tmpkey);
1915 if (rv == -1)
1916 return NULL;
1917 }
1918
1919 if (rv == DISCARD_NOTFOUND) {
1920 set_key_error(key);
1921 return NULL;
1922 }
1923 Py_RETURN_NONE;
1924 }
1925
1926 PyDoc_STRVAR(remove_doc,
1927 "Remove an element from a set; it must be a member.\n\
1928 \n\
1929 If the element is not a member, raise a KeyError.");
1930
1931 static PyObject *
set_discard(PySetObject * so,PyObject * key)1932 set_discard(PySetObject *so, PyObject *key)
1933 {
1934 PyObject *tmpkey;
1935 int rv;
1936
1937 rv = set_discard_key(so, key);
1938 if (rv == -1) {
1939 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1940 return NULL;
1941 PyErr_Clear();
1942 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1943 if (tmpkey == NULL)
1944 return NULL;
1945 rv = set_discard_key(so, tmpkey);
1946 Py_DECREF(tmpkey);
1947 if (rv == -1)
1948 return NULL;
1949 }
1950 Py_RETURN_NONE;
1951 }
1952
1953 PyDoc_STRVAR(discard_doc,
1954 "Remove an element from a set if it is a member.\n\
1955 \n\
1956 If the element is not a member, do nothing.");
1957
1958 static PyObject *
set_reduce(PySetObject * so)1959 set_reduce(PySetObject *so)
1960 {
1961 PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1962
1963 keys = PySequence_List((PyObject *)so);
1964 if (keys == NULL)
1965 goto done;
1966 args = PyTuple_Pack(1, keys);
1967 if (args == NULL)
1968 goto done;
1969 dict = PyObject_GetAttrString((PyObject *)so, "__dict__");
1970 if (dict == NULL) {
1971 PyErr_Clear();
1972 dict = Py_None;
1973 Py_INCREF(dict);
1974 }
1975 result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1976 done:
1977 Py_XDECREF(args);
1978 Py_XDECREF(keys);
1979 Py_XDECREF(dict);
1980 return result;
1981 }
1982
1983 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
1984
1985 static PyObject *
set_sizeof(PySetObject * so)1986 set_sizeof(PySetObject *so)
1987 {
1988 Py_ssize_t res;
1989
1990 res = _PyObject_SIZE(Py_TYPE(so));
1991 if (so->table != so->smalltable)
1992 res = res + (so->mask + 1) * sizeof(setentry);
1993 return PyInt_FromSsize_t(res);
1994 }
1995
1996 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1997 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1998 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1999 {
2000 PyObject *iterable = NULL;
2001
2002 if (!PyAnySet_Check(self))
2003 return -1;
2004 if (PySet_Check(self) && !_PyArg_NoKeywords("set()", kwds))
2005 return -1;
2006 if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
2007 return -1;
2008 set_clear_internal(self);
2009 self->hash = -1;
2010 if (iterable == NULL)
2011 return 0;
2012 return set_update_internal(self, iterable);
2013 }
2014
2015 static PySequenceMethods set_as_sequence = {
2016 set_len, /* sq_length */
2017 0, /* sq_concat */
2018 0, /* sq_repeat */
2019 0, /* sq_item */
2020 0, /* sq_slice */
2021 0, /* sq_ass_item */
2022 0, /* sq_ass_slice */
2023 (objobjproc)set_contains, /* sq_contains */
2024 };
2025
2026 /* set object ********************************************************/
2027
2028 #ifdef Py_DEBUG
2029 static PyObject *test_c_api(PySetObject *so);
2030
2031 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
2032 All is well if assertions don't fail.");
2033 #endif
2034
2035 static PyMethodDef set_methods[] = {
2036 {"add", (PyCFunction)set_add, METH_O,
2037 add_doc},
2038 {"clear", (PyCFunction)set_clear, METH_NOARGS,
2039 clear_doc},
2040 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2041 contains_doc},
2042 {"copy", (PyCFunction)set_copy, METH_NOARGS,
2043 copy_doc},
2044 {"discard", (PyCFunction)set_discard, METH_O,
2045 discard_doc},
2046 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2047 difference_doc},
2048 {"difference_update", (PyCFunction)set_difference_update, METH_VARARGS,
2049 difference_update_doc},
2050 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2051 intersection_doc},
2052 {"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS,
2053 intersection_update_doc},
2054 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2055 isdisjoint_doc},
2056 {"issubset", (PyCFunction)set_issubset, METH_O,
2057 issubset_doc},
2058 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2059 issuperset_doc},
2060 {"pop", (PyCFunction)set_pop, METH_NOARGS,
2061 pop_doc},
2062 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2063 reduce_doc},
2064 {"remove", (PyCFunction)set_remove, METH_O,
2065 remove_doc},
2066 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2067 sizeof_doc},
2068 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2069 symmetric_difference_doc},
2070 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
2071 symmetric_difference_update_doc},
2072 #ifdef Py_DEBUG
2073 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
2074 test_c_api_doc},
2075 #endif
2076 {"union", (PyCFunction)set_union, METH_VARARGS,
2077 union_doc},
2078 {"update", (PyCFunction)set_update, METH_VARARGS,
2079 update_doc},
2080 {NULL, NULL} /* sentinel */
2081 };
2082
2083 static PyNumberMethods set_as_number = {
2084 0, /*nb_add*/
2085 (binaryfunc)set_sub, /*nb_subtract*/
2086 0, /*nb_multiply*/
2087 0, /*nb_divide*/
2088 0, /*nb_remainder*/
2089 0, /*nb_divmod*/
2090 0, /*nb_power*/
2091 0, /*nb_negative*/
2092 0, /*nb_positive*/
2093 0, /*nb_absolute*/
2094 0, /*nb_nonzero*/
2095 0, /*nb_invert*/
2096 0, /*nb_lshift*/
2097 0, /*nb_rshift*/
2098 (binaryfunc)set_and, /*nb_and*/
2099 (binaryfunc)set_xor, /*nb_xor*/
2100 (binaryfunc)set_or, /*nb_or*/
2101 0, /*nb_coerce*/
2102 0, /*nb_int*/
2103 0, /*nb_long*/
2104 0, /*nb_float*/
2105 0, /*nb_oct*/
2106 0, /*nb_hex*/
2107 0, /*nb_inplace_add*/
2108 (binaryfunc)set_isub, /*nb_inplace_subtract*/
2109 0, /*nb_inplace_multiply*/
2110 0, /*nb_inplace_divide*/
2111 0, /*nb_inplace_remainder*/
2112 0, /*nb_inplace_power*/
2113 0, /*nb_inplace_lshift*/
2114 0, /*nb_inplace_rshift*/
2115 (binaryfunc)set_iand, /*nb_inplace_and*/
2116 (binaryfunc)set_ixor, /*nb_inplace_xor*/
2117 (binaryfunc)set_ior, /*nb_inplace_or*/
2118 };
2119
2120 PyDoc_STRVAR(set_doc,
2121 "set() -> new empty set object\n\
2122 set(iterable) -> new set object\n\
2123 \n\
2124 Build an unordered collection of unique elements.");
2125
2126 PyTypeObject PySet_Type = {
2127 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2128 "set", /* tp_name */
2129 sizeof(PySetObject), /* tp_basicsize */
2130 0, /* tp_itemsize */
2131 /* methods */
2132 (destructor)set_dealloc, /* tp_dealloc */
2133 (printfunc)set_tp_print, /* tp_print */
2134 0, /* tp_getattr */
2135 0, /* tp_setattr */
2136 set_nocmp, /* tp_compare */
2137 (reprfunc)set_repr, /* tp_repr */
2138 &set_as_number, /* tp_as_number */
2139 &set_as_sequence, /* tp_as_sequence */
2140 0, /* tp_as_mapping */
2141 (hashfunc)PyObject_HashNotImplemented, /* tp_hash */
2142 0, /* tp_call */
2143 0, /* tp_str */
2144 PyObject_GenericGetAttr, /* tp_getattro */
2145 0, /* tp_setattro */
2146 0, /* tp_as_buffer */
2147 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES |
2148 Py_TPFLAGS_BASETYPE, /* tp_flags */
2149 set_doc, /* tp_doc */
2150 (traverseproc)set_traverse, /* tp_traverse */
2151 (inquiry)set_clear_internal, /* tp_clear */
2152 (richcmpfunc)set_richcompare, /* tp_richcompare */
2153 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2154 (getiterfunc)set_iter, /* tp_iter */
2155 0, /* tp_iternext */
2156 set_methods, /* tp_methods */
2157 0, /* tp_members */
2158 0, /* tp_getset */
2159 0, /* tp_base */
2160 0, /* tp_dict */
2161 0, /* tp_descr_get */
2162 0, /* tp_descr_set */
2163 0, /* tp_dictoffset */
2164 (initproc)set_init, /* tp_init */
2165 PyType_GenericAlloc, /* tp_alloc */
2166 set_new, /* tp_new */
2167 PyObject_GC_Del, /* tp_free */
2168 };
2169
2170 /* frozenset object ********************************************************/
2171
2172
2173 static PyMethodDef frozenset_methods[] = {
2174 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2175 contains_doc},
2176 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
2177 copy_doc},
2178 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2179 difference_doc},
2180 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2181 intersection_doc},
2182 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2183 isdisjoint_doc},
2184 {"issubset", (PyCFunction)set_issubset, METH_O,
2185 issubset_doc},
2186 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2187 issuperset_doc},
2188 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2189 reduce_doc},
2190 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2191 sizeof_doc},
2192 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2193 symmetric_difference_doc},
2194 {"union", (PyCFunction)set_union, METH_VARARGS,
2195 union_doc},
2196 {NULL, NULL} /* sentinel */
2197 };
2198
2199 static PyNumberMethods frozenset_as_number = {
2200 0, /*nb_add*/
2201 (binaryfunc)set_sub, /*nb_subtract*/
2202 0, /*nb_multiply*/
2203 0, /*nb_divide*/
2204 0, /*nb_remainder*/
2205 0, /*nb_divmod*/
2206 0, /*nb_power*/
2207 0, /*nb_negative*/
2208 0, /*nb_positive*/
2209 0, /*nb_absolute*/
2210 0, /*nb_nonzero*/
2211 0, /*nb_invert*/
2212 0, /*nb_lshift*/
2213 0, /*nb_rshift*/
2214 (binaryfunc)set_and, /*nb_and*/
2215 (binaryfunc)set_xor, /*nb_xor*/
2216 (binaryfunc)set_or, /*nb_or*/
2217 };
2218
2219 PyDoc_STRVAR(frozenset_doc,
2220 "frozenset() -> empty frozenset object\n\
2221 frozenset(iterable) -> frozenset object\n\
2222 \n\
2223 Build an immutable unordered collection of unique elements.");
2224
2225 PyTypeObject PyFrozenSet_Type = {
2226 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2227 "frozenset", /* tp_name */
2228 sizeof(PySetObject), /* tp_basicsize */
2229 0, /* tp_itemsize */
2230 /* methods */
2231 (destructor)set_dealloc, /* tp_dealloc */
2232 (printfunc)set_tp_print, /* tp_print */
2233 0, /* tp_getattr */
2234 0, /* tp_setattr */
2235 set_nocmp, /* tp_compare */
2236 (reprfunc)set_repr, /* tp_repr */
2237 &frozenset_as_number, /* tp_as_number */
2238 &set_as_sequence, /* tp_as_sequence */
2239 0, /* tp_as_mapping */
2240 frozenset_hash, /* tp_hash */
2241 0, /* tp_call */
2242 0, /* tp_str */
2243 PyObject_GenericGetAttr, /* tp_getattro */
2244 0, /* tp_setattro */
2245 0, /* tp_as_buffer */
2246 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES |
2247 Py_TPFLAGS_BASETYPE, /* tp_flags */
2248 frozenset_doc, /* tp_doc */
2249 (traverseproc)set_traverse, /* tp_traverse */
2250 (inquiry)set_clear_internal, /* tp_clear */
2251 (richcmpfunc)set_richcompare, /* tp_richcompare */
2252 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2253 (getiterfunc)set_iter, /* tp_iter */
2254 0, /* tp_iternext */
2255 frozenset_methods, /* tp_methods */
2256 0, /* tp_members */
2257 0, /* tp_getset */
2258 0, /* tp_base */
2259 0, /* tp_dict */
2260 0, /* tp_descr_get */
2261 0, /* tp_descr_set */
2262 0, /* tp_dictoffset */
2263 0, /* tp_init */
2264 PyType_GenericAlloc, /* tp_alloc */
2265 frozenset_new, /* tp_new */
2266 PyObject_GC_Del, /* tp_free */
2267 };
2268
2269
2270 /***** C API functions *************************************************/
2271
2272 PyObject *
PySet_New(PyObject * iterable)2273 PySet_New(PyObject *iterable)
2274 {
2275 return make_new_set(&PySet_Type, iterable);
2276 }
2277
2278 PyObject *
PyFrozenSet_New(PyObject * iterable)2279 PyFrozenSet_New(PyObject *iterable)
2280 {
2281 return make_new_set(&PyFrozenSet_Type, iterable);
2282 }
2283
2284 Py_ssize_t
PySet_Size(PyObject * anyset)2285 PySet_Size(PyObject *anyset)
2286 {
2287 if (!PyAnySet_Check(anyset)) {
2288 PyErr_BadInternalCall();
2289 return -1;
2290 }
2291 return PySet_GET_SIZE(anyset);
2292 }
2293
2294 int
PySet_Clear(PyObject * set)2295 PySet_Clear(PyObject *set)
2296 {
2297 if (!PySet_Check(set)) {
2298 PyErr_BadInternalCall();
2299 return -1;
2300 }
2301 return set_clear_internal((PySetObject *)set);
2302 }
2303
2304 int
PySet_Contains(PyObject * anyset,PyObject * key)2305 PySet_Contains(PyObject *anyset, PyObject *key)
2306 {
2307 if (!PyAnySet_Check(anyset)) {
2308 PyErr_BadInternalCall();
2309 return -1;
2310 }
2311 return set_contains_key((PySetObject *)anyset, key);
2312 }
2313
2314 int
PySet_Discard(PyObject * set,PyObject * key)2315 PySet_Discard(PyObject *set, PyObject *key)
2316 {
2317 if (!PySet_Check(set)) {
2318 PyErr_BadInternalCall();
2319 return -1;
2320 }
2321 return set_discard_key((PySetObject *)set, key);
2322 }
2323
2324 int
PySet_Add(PyObject * anyset,PyObject * key)2325 PySet_Add(PyObject *anyset, PyObject *key)
2326 {
2327 if (!PySet_Check(anyset) &&
2328 (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2329 PyErr_BadInternalCall();
2330 return -1;
2331 }
2332 return set_add_key((PySetObject *)anyset, key);
2333 }
2334
2335 int
_PySet_Next(PyObject * set,Py_ssize_t * pos,PyObject ** key)2336 _PySet_Next(PyObject *set, Py_ssize_t *pos, PyObject **key)
2337 {
2338 setentry *entry_ptr;
2339
2340 if (!PyAnySet_Check(set)) {
2341 PyErr_BadInternalCall();
2342 return -1;
2343 }
2344 if (set_next((PySetObject *)set, pos, &entry_ptr) == 0)
2345 return 0;
2346 *key = entry_ptr->key;
2347 return 1;
2348 }
2349
2350 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,long * hash)2351 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, long *hash)
2352 {
2353 setentry *entry;
2354
2355 if (!PyAnySet_Check(set)) {
2356 PyErr_BadInternalCall();
2357 return -1;
2358 }
2359 if (set_next((PySetObject *)set, pos, &entry) == 0)
2360 return 0;
2361 *key = entry->key;
2362 *hash = entry->hash;
2363 return 1;
2364 }
2365
2366 PyObject *
PySet_Pop(PyObject * set)2367 PySet_Pop(PyObject *set)
2368 {
2369 if (!PySet_Check(set)) {
2370 PyErr_BadInternalCall();
2371 return NULL;
2372 }
2373 return set_pop((PySetObject *)set);
2374 }
2375
2376 int
_PySet_Update(PyObject * set,PyObject * iterable)2377 _PySet_Update(PyObject *set, PyObject *iterable)
2378 {
2379 if (!PySet_Check(set)) {
2380 PyErr_BadInternalCall();
2381 return -1;
2382 }
2383 return set_update_internal((PySetObject *)set, iterable);
2384 }
2385
2386 #ifdef Py_DEBUG
2387
2388 /* Test code to be called with any three element set.
2389 Returns True and original set is restored. */
2390
2391 #define assertRaises(call_return_value, exception) \
2392 do { \
2393 assert(call_return_value); \
2394 assert(PyErr_ExceptionMatches(exception)); \
2395 PyErr_Clear(); \
2396 } while(0)
2397
2398 static PyObject *
test_c_api(PySetObject * so)2399 test_c_api(PySetObject *so)
2400 {
2401 Py_ssize_t count;
2402 char *s;
2403 Py_ssize_t i;
2404 PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x;
2405 PyObject *ob = (PyObject *)so;
2406 PyObject *str;
2407
2408 /* Verify preconditions */
2409 assert(PyAnySet_Check(ob));
2410 assert(PyAnySet_CheckExact(ob));
2411 assert(!PyFrozenSet_CheckExact(ob));
2412
2413 /* so.clear(); so |= set("abc"); */
2414 str = PyString_FromString("abc");
2415 if (str == NULL)
2416 return NULL;
2417 set_clear_internal(so);
2418 if (set_update_internal(so, str) == -1) {
2419 Py_DECREF(str);
2420 return NULL;
2421 }
2422 Py_DECREF(str);
2423
2424 /* Exercise type/size checks */
2425 assert(PySet_Size(ob) == 3);
2426 assert(PySet_GET_SIZE(ob) == 3);
2427
2428 /* Raise TypeError for non-iterable constructor arguments */
2429 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2430 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2431
2432 /* Raise TypeError for unhashable key */
2433 dup = PySet_New(ob);
2434 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2435 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2436 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2437
2438 /* Exercise successful pop, contains, add, and discard */
2439 elem = PySet_Pop(ob);
2440 assert(PySet_Contains(ob, elem) == 0);
2441 assert(PySet_GET_SIZE(ob) == 2);
2442 assert(PySet_Add(ob, elem) == 0);
2443 assert(PySet_Contains(ob, elem) == 1);
2444 assert(PySet_GET_SIZE(ob) == 3);
2445 assert(PySet_Discard(ob, elem) == 1);
2446 assert(PySet_GET_SIZE(ob) == 2);
2447 assert(PySet_Discard(ob, elem) == 0);
2448 assert(PySet_GET_SIZE(ob) == 2);
2449
2450 /* Exercise clear */
2451 dup2 = PySet_New(dup);
2452 assert(PySet_Clear(dup2) == 0);
2453 assert(PySet_Size(dup2) == 0);
2454 Py_DECREF(dup2);
2455
2456 /* Raise SystemError on clear or update of frozen set */
2457 f = PyFrozenSet_New(dup);
2458 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2459 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2460 assert(PySet_Add(f, elem) == 0);
2461 Py_INCREF(f);
2462 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2463 Py_DECREF(f);
2464 Py_DECREF(f);
2465
2466 /* Exercise direct iteration */
2467 i = 0, count = 0;
2468 while (_PySet_Next((PyObject *)dup, &i, &x)) {
2469 s = PyString_AsString(x);
2470 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2471 count++;
2472 }
2473 assert(count == 3);
2474
2475 /* Exercise updates */
2476 dup2 = PySet_New(NULL);
2477 assert(_PySet_Update(dup2, dup) == 0);
2478 assert(PySet_Size(dup2) == 3);
2479 assert(_PySet_Update(dup2, dup) == 0);
2480 assert(PySet_Size(dup2) == 3);
2481 Py_DECREF(dup2);
2482
2483 /* Raise SystemError when self argument is not a set or frozenset. */
2484 t = PyTuple_New(0);
2485 assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2486 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2487 Py_DECREF(t);
2488
2489 /* Raise SystemError when self argument is not a set. */
2490 f = PyFrozenSet_New(dup);
2491 assert(PySet_Size(f) == 3);
2492 assert(PyFrozenSet_CheckExact(f));
2493 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2494 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2495 Py_DECREF(f);
2496
2497 /* Raise KeyError when popping from an empty set */
2498 assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2499 Py_DECREF(ob);
2500 assert(PySet_GET_SIZE(ob) == 0);
2501 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2502
2503 /* Restore the set from the copy using the PyNumber API */
2504 assert(PyNumber_InPlaceOr(ob, dup) == ob);
2505 Py_DECREF(ob);
2506
2507 /* Verify constructors accept NULL arguments */
2508 f = PySet_New(NULL);
2509 assert(f != NULL);
2510 assert(PySet_GET_SIZE(f) == 0);
2511 Py_DECREF(f);
2512 f = PyFrozenSet_New(NULL);
2513 assert(f != NULL);
2514 assert(PyFrozenSet_CheckExact(f));
2515 assert(PySet_GET_SIZE(f) == 0);
2516 Py_DECREF(f);
2517
2518 Py_DECREF(elem);
2519 Py_DECREF(dup);
2520 Py_RETURN_TRUE;
2521 }
2522
2523 #undef assertRaises
2524
2525 #endif
2526