1
2 /* set object implementation
3 Written and maintained by Raymond D. Hettinger <python@rcn.com>
4 Derived from Lib/sets.py and Objects/dictobject.c.
5 */
6
7 #include "Python.h"
8 #include "structmember.h"
9
10 /* Set a key error with the specified argument, wrapping it in a
11 * tuple automatically so that tuple keys are not unpacked as the
12 * exception arguments. */
13 static void
set_key_error(PyObject * arg)14 set_key_error(PyObject *arg)
15 {
16 PyObject *tup;
17 tup = PyTuple_Pack(1, arg);
18 if (!tup)
19 return; /* caller will expect error to be set anyway */
20 PyErr_SetObject(PyExc_KeyError, tup);
21 Py_DECREF(tup);
22 }
23
24 /* This must be >= 1. */
25 #define PERTURB_SHIFT 5
26
27 /* Object used as dummy key to fill deleted entries */
28 static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */
29
30 #ifdef Py_REF_DEBUG
31 PyObject *
_PySet_Dummy(void)32 _PySet_Dummy(void)
33 {
34 return dummy;
35 }
36 #endif
37
38 #define INIT_NONZERO_SET_SLOTS(so) do { \
39 (so)->table = (so)->smalltable; \
40 (so)->mask = PySet_MINSIZE - 1; \
41 (so)->hash = -1; \
42 } while(0)
43
44 #define EMPTY_TO_MINSIZE(so) do { \
45 memset((so)->smalltable, 0, sizeof((so)->smalltable)); \
46 (so)->used = (so)->fill = 0; \
47 INIT_NONZERO_SET_SLOTS(so); \
48 } while(0)
49
50 /* Reuse scheme to save calls to malloc, free, and memset */
51 #ifndef PySet_MAXFREELIST
52 #define PySet_MAXFREELIST 80
53 #endif
54 static PySetObject *free_list[PySet_MAXFREELIST];
55 static int numfree = 0;
56
57 /*
58 The basic lookup function used by all operations.
59 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
60 Open addressing is preferred over chaining since the link overhead for
61 chaining would be substantial (100% with typical malloc overhead).
62
63 The initial probe index is computed as hash mod the table size. Subsequent
64 probe indices are computed as explained in Objects/dictobject.c.
65
66 All arithmetic on hash should ignore overflow.
67
68 Unlike the dictionary implementation, the lookkey functions can return
69 NULL if the rich comparison returns an error.
70 */
71
72 static setentry *
set_lookkey(PySetObject * so,PyObject * key,register long hash)73 set_lookkey(PySetObject *so, PyObject *key, register long hash)
74 {
75 register Py_ssize_t i;
76 register size_t perturb;
77 register setentry *freeslot;
78 register size_t mask = so->mask;
79 setentry *table = so->table;
80 register setentry *entry;
81 register int cmp;
82 PyObject *startkey;
83
84 i = hash & mask;
85 entry = &table[i];
86 if (entry->key == NULL || entry->key == key)
87 return entry;
88
89 if (entry->key == dummy)
90 freeslot = entry;
91 else {
92 if (entry->hash == hash) {
93 startkey = entry->key;
94 Py_INCREF(startkey);
95 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
96 Py_DECREF(startkey);
97 if (cmp < 0)
98 return NULL;
99 if (table == so->table && entry->key == startkey) {
100 if (cmp > 0)
101 return entry;
102 }
103 else {
104 /* The compare did major nasty stuff to the
105 * set: start over.
106 */
107 return set_lookkey(so, key, hash);
108 }
109 }
110 freeslot = NULL;
111 }
112
113 /* In the loop, key == dummy is by far (factor of 100s) the
114 least likely outcome, so test for that last. */
115 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
116 i = (i << 2) + i + perturb + 1;
117 entry = &table[i & mask];
118 if (entry->key == NULL) {
119 if (freeslot != NULL)
120 entry = freeslot;
121 break;
122 }
123 if (entry->key == key)
124 break;
125 if (entry->hash == hash && entry->key != dummy) {
126 startkey = entry->key;
127 Py_INCREF(startkey);
128 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
129 Py_DECREF(startkey);
130 if (cmp < 0)
131 return NULL;
132 if (table == so->table && entry->key == startkey) {
133 if (cmp > 0)
134 break;
135 }
136 else {
137 /* The compare did major nasty stuff to the
138 * set: start over.
139 */
140 return set_lookkey(so, key, hash);
141 }
142 }
143 else if (entry->key == dummy && freeslot == NULL)
144 freeslot = entry;
145 }
146 return entry;
147 }
148
149 /*
150 * Hacked up version of set_lookkey which can assume keys are always strings;
151 * This means we can always use _PyString_Eq directly and not have to check to
152 * see if the comparison altered the table.
153 */
154 static setentry *
set_lookkey_string(PySetObject * so,PyObject * key,register long hash)155 set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
156 {
157 register Py_ssize_t i;
158 register size_t perturb;
159 register setentry *freeslot;
160 register size_t mask = so->mask;
161 setentry *table = so->table;
162 register setentry *entry;
163
164 /* Make sure this function doesn't have to handle non-string keys,
165 including subclasses of str; e.g., one reason to subclass
166 strings is to override __eq__, and for speed we don't cater to
167 that here. */
168 if (!PyString_CheckExact(key)) {
169 so->lookup = set_lookkey;
170 return set_lookkey(so, key, hash);
171 }
172 i = hash & mask;
173 entry = &table[i];
174 if (entry->key == NULL || entry->key == key)
175 return entry;
176 if (entry->key == dummy)
177 freeslot = entry;
178 else {
179 if (entry->hash == hash && _PyString_Eq(entry->key, key))
180 return entry;
181 freeslot = NULL;
182 }
183
184 /* In the loop, key == dummy is by far (factor of 100s) the
185 least likely outcome, so test for that last. */
186 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
187 i = (i << 2) + i + perturb + 1;
188 entry = &table[i & mask];
189 if (entry->key == NULL)
190 return freeslot == NULL ? entry : freeslot;
191 if (entry->key == key
192 || (entry->hash == hash
193 && entry->key != dummy
194 && _PyString_Eq(entry->key, key)))
195 return entry;
196 if (entry->key == dummy && freeslot == NULL)
197 freeslot = entry;
198 }
199 assert(0); /* NOT REACHED */
200 return 0;
201 }
202
203 /*
204 Internal routine to insert a new key into the table.
205 Used by the public insert routine.
206 Eats a reference to key.
207 */
208 static int
set_insert_key(register PySetObject * so,PyObject * key,long hash)209 set_insert_key(register PySetObject *so, PyObject *key, long hash)
210 {
211 register setentry *entry;
212
213 assert(so->lookup != NULL);
214 entry = so->lookup(so, key, hash);
215 if (entry == NULL)
216 return -1;
217 if (entry->key == NULL) {
218 /* UNUSED */
219 so->fill++;
220 entry->key = key;
221 entry->hash = hash;
222 so->used++;
223 } else if (entry->key == dummy) {
224 /* DUMMY */
225 entry->key = key;
226 entry->hash = hash;
227 so->used++;
228 Py_DECREF(dummy);
229 } else {
230 /* ACTIVE */
231 Py_DECREF(key);
232 }
233 return 0;
234 }
235
236 /*
237 Internal routine used by set_table_resize() to insert an item which is
238 known to be absent from the set. This routine also assumes that
239 the set contains no deleted entries. Besides the performance benefit,
240 using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209).
241 Note that no refcounts are changed by this routine; if needed, the caller
242 is responsible for incref'ing `key`.
243 */
244 static void
set_insert_clean(register PySetObject * so,PyObject * key,long hash)245 set_insert_clean(register PySetObject *so, PyObject *key, long hash)
246 {
247 register size_t i;
248 register size_t perturb;
249 register size_t mask = (size_t)so->mask;
250 setentry *table = so->table;
251 register setentry *entry;
252
253 i = hash & mask;
254 entry = &table[i];
255 for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) {
256 i = (i << 2) + i + perturb + 1;
257 entry = &table[i & mask];
258 }
259 so->fill++;
260 entry->key = key;
261 entry->hash = hash;
262 so->used++;
263 }
264
265 /*
266 Restructure the table by allocating a new table and reinserting all
267 keys again. When entries have been deleted, the new table may
268 actually be smaller than the old one.
269 */
270 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)271 set_table_resize(PySetObject *so, Py_ssize_t minused)
272 {
273 Py_ssize_t newsize;
274 setentry *oldtable, *newtable, *entry;
275 Py_ssize_t i;
276 int is_oldtable_malloced;
277 setentry small_copy[PySet_MINSIZE];
278
279 assert(minused >= 0);
280
281 /* Find the smallest table size > minused. */
282 for (newsize = PySet_MINSIZE;
283 newsize <= minused && newsize > 0;
284 newsize <<= 1)
285 ;
286 if (newsize <= 0) {
287 PyErr_NoMemory();
288 return -1;
289 }
290
291 /* Get space for a new table. */
292 oldtable = so->table;
293 assert(oldtable != NULL);
294 is_oldtable_malloced = oldtable != so->smalltable;
295
296 if (newsize == PySet_MINSIZE) {
297 /* A large table is shrinking, or we can't get any smaller. */
298 newtable = so->smalltable;
299 if (newtable == oldtable) {
300 if (so->fill == so->used) {
301 /* No dummies, so no point doing anything. */
302 return 0;
303 }
304 /* We're not going to resize it, but rebuild the
305 table anyway to purge old dummy entries.
306 Subtle: This is *necessary* if fill==size,
307 as set_lookkey needs at least one virgin slot to
308 terminate failing searches. If fill < size, it's
309 merely desirable, as dummies slow searches. */
310 assert(so->fill > so->used);
311 memcpy(small_copy, oldtable, sizeof(small_copy));
312 oldtable = small_copy;
313 }
314 }
315 else {
316 newtable = PyMem_NEW(setentry, newsize);
317 if (newtable == NULL) {
318 PyErr_NoMemory();
319 return -1;
320 }
321 }
322
323 /* Make the set empty, using the new table. */
324 assert(newtable != oldtable);
325 so->table = newtable;
326 so->mask = newsize - 1;
327 memset(newtable, 0, sizeof(setentry) * newsize);
328 so->used = 0;
329 i = so->fill;
330 so->fill = 0;
331
332 /* Copy the data over; this is refcount-neutral for active entries;
333 dummy entries aren't copied over, of course */
334 for (entry = oldtable; i > 0; entry++) {
335 if (entry->key == NULL) {
336 /* UNUSED */
337 ;
338 } else if (entry->key == dummy) {
339 /* DUMMY */
340 --i;
341 assert(entry->key == dummy);
342 Py_DECREF(entry->key);
343 } else {
344 /* ACTIVE */
345 --i;
346 set_insert_clean(so, entry->key, entry->hash);
347 }
348 }
349
350 if (is_oldtable_malloced)
351 PyMem_DEL(oldtable);
352 return 0;
353 }
354
355 /* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
356
357 static int
set_add_entry(register PySetObject * so,setentry * entry)358 set_add_entry(register PySetObject *so, setentry *entry)
359 {
360 register Py_ssize_t n_used;
361 PyObject *key = entry->key;
362 long hash = entry->hash;
363
364 assert(so->fill <= so->mask); /* at least one empty slot */
365 n_used = so->used;
366 Py_INCREF(key);
367 if (set_insert_key(so, key, hash) == -1) {
368 Py_DECREF(key);
369 return -1;
370 }
371 if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
372 return 0;
373 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
374 }
375
376 static int
set_add_key(register PySetObject * so,PyObject * key)377 set_add_key(register PySetObject *so, PyObject *key)
378 {
379 register long hash;
380 register Py_ssize_t n_used;
381
382 if (!PyString_CheckExact(key) ||
383 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
384 hash = PyObject_Hash(key);
385 if (hash == -1)
386 return -1;
387 }
388 assert(so->fill <= so->mask); /* at least one empty slot */
389 n_used = so->used;
390 Py_INCREF(key);
391 if (set_insert_key(so, key, hash) == -1) {
392 Py_DECREF(key);
393 return -1;
394 }
395 if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
396 return 0;
397 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
398 }
399
400 #define DISCARD_NOTFOUND 0
401 #define DISCARD_FOUND 1
402
403 static int
set_discard_entry(PySetObject * so,setentry * oldentry)404 set_discard_entry(PySetObject *so, setentry *oldentry)
405 { register setentry *entry;
406 PyObject *old_key;
407
408 entry = (so->lookup)(so, oldentry->key, oldentry->hash);
409 if (entry == NULL)
410 return -1;
411 if (entry->key == NULL || entry->key == dummy)
412 return DISCARD_NOTFOUND;
413 old_key = entry->key;
414 Py_INCREF(dummy);
415 entry->key = dummy;
416 so->used--;
417 Py_DECREF(old_key);
418 return DISCARD_FOUND;
419 }
420
421 static int
set_discard_key(PySetObject * so,PyObject * key)422 set_discard_key(PySetObject *so, PyObject *key)
423 {
424 register long hash;
425 register setentry *entry;
426 PyObject *old_key;
427
428 assert (PyAnySet_Check(so));
429 if (!PyString_CheckExact(key) ||
430 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
431 hash = PyObject_Hash(key);
432 if (hash == -1)
433 return -1;
434 }
435 entry = (so->lookup)(so, key, hash);
436 if (entry == NULL)
437 return -1;
438 if (entry->key == NULL || entry->key == dummy)
439 return DISCARD_NOTFOUND;
440 old_key = entry->key;
441 Py_INCREF(dummy);
442 entry->key = dummy;
443 so->used--;
444 Py_DECREF(old_key);
445 return DISCARD_FOUND;
446 }
447
448 static int
set_clear_internal(PySetObject * so)449 set_clear_internal(PySetObject *so)
450 {
451 setentry *entry, *table;
452 int table_is_malloced;
453 Py_ssize_t fill;
454 setentry small_copy[PySet_MINSIZE];
455 #ifdef Py_DEBUG
456 Py_ssize_t i, n;
457 assert (PyAnySet_Check(so));
458
459 n = so->mask + 1;
460 i = 0;
461 #endif
462
463 table = so->table;
464 assert(table != NULL);
465 table_is_malloced = table != so->smalltable;
466
467 /* This is delicate. During the process of clearing the set,
468 * decrefs can cause the set to mutate. To avoid fatal confusion
469 * (voice of experience), we have to make the set empty before
470 * clearing the slots, and never refer to anything via so->ref while
471 * clearing.
472 */
473 fill = so->fill;
474 if (table_is_malloced)
475 EMPTY_TO_MINSIZE(so);
476
477 else if (fill > 0) {
478 /* It's a small table with something that needs to be cleared.
479 * Afraid the only safe way is to copy the set entries into
480 * another small table first.
481 */
482 memcpy(small_copy, table, sizeof(small_copy));
483 table = small_copy;
484 EMPTY_TO_MINSIZE(so);
485 }
486 /* else it's a small table that's already empty */
487
488 /* Now we can finally clear things. If C had refcounts, we could
489 * assert that the refcount on table is 1 now, i.e. that this function
490 * has unique access to it, so decref side-effects can't alter it.
491 */
492 for (entry = table; fill > 0; ++entry) {
493 #ifdef Py_DEBUG
494 assert(i < n);
495 ++i;
496 #endif
497 if (entry->key) {
498 --fill;
499 Py_DECREF(entry->key);
500 }
501 #ifdef Py_DEBUG
502 else
503 assert(entry->key == NULL);
504 #endif
505 }
506
507 if (table_is_malloced)
508 PyMem_DEL(table);
509 return 0;
510 }
511
512 /*
513 * Iterate over a set table. Use like so:
514 *
515 * Py_ssize_t pos;
516 * setentry *entry;
517 * pos = 0; # important! pos should not otherwise be changed by you
518 * while (set_next(yourset, &pos, &entry)) {
519 * Refer to borrowed reference in entry->key.
520 * }
521 *
522 * CAUTION: In general, it isn't safe to use set_next in a loop that
523 * mutates the table.
524 */
525 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)526 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
527 {
528 Py_ssize_t i;
529 Py_ssize_t mask;
530 register setentry *table;
531
532 assert (PyAnySet_Check(so));
533 i = *pos_ptr;
534 assert(i >= 0);
535 table = so->table;
536 mask = so->mask;
537 while (i <= mask && (table[i].key == NULL || table[i].key == dummy))
538 i++;
539 *pos_ptr = i+1;
540 if (i > mask)
541 return 0;
542 assert(table[i].key != NULL);
543 *entry_ptr = &table[i];
544 return 1;
545 }
546
547 static void
set_dealloc(PySetObject * so)548 set_dealloc(PySetObject *so)
549 {
550 register setentry *entry;
551 Py_ssize_t fill = so->fill;
552 PyObject_GC_UnTrack(so);
553 Py_TRASHCAN_SAFE_BEGIN(so)
554 if (so->weakreflist != NULL)
555 PyObject_ClearWeakRefs((PyObject *) so);
556
557 for (entry = so->table; fill > 0; entry++) {
558 if (entry->key) {
559 --fill;
560 Py_DECREF(entry->key);
561 }
562 }
563 if (so->table != so->smalltable)
564 PyMem_DEL(so->table);
565 if (numfree < PySet_MAXFREELIST && PyAnySet_CheckExact(so))
566 free_list[numfree++] = so;
567 else
568 Py_TYPE(so)->tp_free(so);
569 Py_TRASHCAN_SAFE_END(so)
570 }
571
572 static int
set_tp_print(PySetObject * so,FILE * fp,int flags)573 set_tp_print(PySetObject *so, FILE *fp, int flags)
574 {
575 setentry *entry;
576 Py_ssize_t pos=0;
577 char *emit = ""; /* No separator emitted on first pass */
578 char *separator = ", ";
579 int status = Py_ReprEnter((PyObject*)so);
580
581 if (status != 0) {
582 if (status < 0)
583 return status;
584 Py_BEGIN_ALLOW_THREADS
585 fprintf(fp, "%s(...)", so->ob_type->tp_name);
586 Py_END_ALLOW_THREADS
587 return 0;
588 }
589
590 Py_BEGIN_ALLOW_THREADS
591 fprintf(fp, "%s([", so->ob_type->tp_name);
592 Py_END_ALLOW_THREADS
593 while (set_next(so, &pos, &entry)) {
594 Py_BEGIN_ALLOW_THREADS
595 fputs(emit, fp);
596 Py_END_ALLOW_THREADS
597 emit = separator;
598 if (PyObject_Print(entry->key, fp, 0) != 0) {
599 Py_ReprLeave((PyObject*)so);
600 return -1;
601 }
602 }
603 Py_BEGIN_ALLOW_THREADS
604 fputs("])", fp);
605 Py_END_ALLOW_THREADS
606 Py_ReprLeave((PyObject*)so);
607 return 0;
608 }
609
610 static PyObject *
set_repr(PySetObject * so)611 set_repr(PySetObject *so)
612 {
613 PyObject *keys, *result=NULL, *listrepr;
614 int status = Py_ReprEnter((PyObject*)so);
615
616 if (status != 0) {
617 if (status < 0)
618 return NULL;
619 return PyString_FromFormat("%s(...)", so->ob_type->tp_name);
620 }
621
622 keys = PySequence_List((PyObject *)so);
623 if (keys == NULL)
624 goto done;
625 listrepr = PyObject_Repr(keys);
626 Py_DECREF(keys);
627 if (listrepr == NULL)
628 goto done;
629
630 result = PyString_FromFormat("%s(%s)", so->ob_type->tp_name,
631 PyString_AS_STRING(listrepr));
632 Py_DECREF(listrepr);
633 done:
634 Py_ReprLeave((PyObject*)so);
635 return result;
636 }
637
638 static Py_ssize_t
set_len(PyObject * so)639 set_len(PyObject *so)
640 {
641 return ((PySetObject *)so)->used;
642 }
643
644 static int
set_merge(PySetObject * so,PyObject * otherset)645 set_merge(PySetObject *so, PyObject *otherset)
646 {
647 PySetObject *other;
648 PyObject *key;
649 long hash;
650 register Py_ssize_t i;
651 register setentry *entry;
652
653 assert (PyAnySet_Check(so));
654 assert (PyAnySet_Check(otherset));
655
656 other = (PySetObject*)otherset;
657 if (other == so || other->used == 0)
658 /* a.update(a) or a.update({}); nothing to do */
659 return 0;
660 /* Do one big resize at the start, rather than
661 * incrementally resizing as we insert new keys. Expect
662 * that there will be no (or few) overlapping keys.
663 */
664 if ((so->fill + other->used)*3 >= (so->mask+1)*2) {
665 if (set_table_resize(so, (so->used + other->used)*2) != 0)
666 return -1;
667 }
668 for (i = 0; i <= other->mask; i++) {
669 entry = &other->table[i];
670 key = entry->key;
671 hash = entry->hash;
672 if (key != NULL &&
673 key != dummy) {
674 Py_INCREF(key);
675 if (set_insert_key(so, key, hash) == -1) {
676 Py_DECREF(key);
677 return -1;
678 }
679 }
680 }
681 return 0;
682 }
683
684 static int
set_contains_key(PySetObject * so,PyObject * key)685 set_contains_key(PySetObject *so, PyObject *key)
686 {
687 long hash;
688 setentry *entry;
689
690 if (!PyString_CheckExact(key) ||
691 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
692 hash = PyObject_Hash(key);
693 if (hash == -1)
694 return -1;
695 }
696 entry = (so->lookup)(so, key, hash);
697 if (entry == NULL)
698 return -1;
699 key = entry->key;
700 return key != NULL && key != dummy;
701 }
702
703 static int
set_contains_entry(PySetObject * so,setentry * entry)704 set_contains_entry(PySetObject *so, setentry *entry)
705 {
706 PyObject *key;
707 setentry *lu_entry;
708
709 lu_entry = (so->lookup)(so, entry->key, entry->hash);
710 if (lu_entry == NULL)
711 return -1;
712 key = lu_entry->key;
713 return key != NULL && key != dummy;
714 }
715
716 static PyObject *
set_pop(PySetObject * so)717 set_pop(PySetObject *so)
718 {
719 register Py_ssize_t i = 0;
720 register setentry *entry;
721 PyObject *key;
722
723 assert (PyAnySet_Check(so));
724 if (so->used == 0) {
725 PyErr_SetString(PyExc_KeyError, "pop from an empty set");
726 return NULL;
727 }
728
729 /* Set entry to "the first" unused or dummy set entry. We abuse
730 * the hash field of slot 0 to hold a search finger:
731 * If slot 0 has a value, use slot 0.
732 * Else slot 0 is being used to hold a search finger,
733 * and we use its hash value as the first index to look.
734 */
735 entry = &so->table[0];
736 if (entry->key == NULL || entry->key == dummy) {
737 i = entry->hash;
738 /* The hash field may be a real hash value, or it may be a
739 * legit search finger, or it may be a once-legit search
740 * finger that's out of bounds now because it wrapped around
741 * or the table shrunk -- simply make sure it's in bounds now.
742 */
743 if (i > so->mask || i < 1)
744 i = 1; /* skip slot 0 */
745 while ((entry = &so->table[i])->key == NULL || entry->key==dummy) {
746 i++;
747 if (i > so->mask)
748 i = 1;
749 }
750 }
751 key = entry->key;
752 Py_INCREF(dummy);
753 entry->key = dummy;
754 so->used--;
755 so->table[0].hash = i + 1; /* next place to start */
756 return key;
757 }
758
759 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
760 Raises KeyError if the set is empty.");
761
762 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)763 set_traverse(PySetObject *so, visitproc visit, void *arg)
764 {
765 Py_ssize_t pos = 0;
766 setentry *entry;
767
768 while (set_next(so, &pos, &entry))
769 Py_VISIT(entry->key);
770 return 0;
771 }
772
773 static long
frozenset_hash(PyObject * self)774 frozenset_hash(PyObject *self)
775 {
776 PySetObject *so = (PySetObject *)self;
777 long h, hash = 1927868237L;
778 setentry *entry;
779 Py_ssize_t pos = 0;
780
781 if (so->hash != -1)
782 return so->hash;
783
784 hash *= PySet_GET_SIZE(self) + 1;
785 while (set_next(so, &pos, &entry)) {
786 /* Work to increase the bit dispersion for closely spaced hash
787 values. This is important because some use cases have many
788 combinations of a small number of elements with nearby
789 hashes so that many distinct combinations collapse to only
790 a handful of distinct hash values. */
791 h = entry->hash;
792 hash ^= (h ^ (h << 16) ^ 89869747L) * 3644798167u;
793 }
794 hash = hash * 69069L + 907133923L;
795 if (hash == -1)
796 hash = 590923713L;
797 so->hash = hash;
798 return hash;
799 }
800
801 /***** Set iterator type ***********************************************/
802
803 typedef struct {
804 PyObject_HEAD
805 PySetObject *si_set; /* Set to NULL when iterator is exhausted */
806 Py_ssize_t si_used;
807 Py_ssize_t si_pos;
808 Py_ssize_t len;
809 } setiterobject;
810
811 static void
setiter_dealloc(setiterobject * si)812 setiter_dealloc(setiterobject *si)
813 {
814 Py_XDECREF(si->si_set);
815 PyObject_GC_Del(si);
816 }
817
818 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)819 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
820 {
821 Py_VISIT(si->si_set);
822 return 0;
823 }
824
825 static PyObject *
setiter_len(setiterobject * si)826 setiter_len(setiterobject *si)
827 {
828 Py_ssize_t len = 0;
829 if (si->si_set != NULL && si->si_used == si->si_set->used)
830 len = si->len;
831 return PyInt_FromLong(len);
832 }
833
834 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
835
836 static PyMethodDef setiter_methods[] = {
837 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
838 {NULL, NULL} /* sentinel */
839 };
840
setiter_iternext(setiterobject * si)841 static PyObject *setiter_iternext(setiterobject *si)
842 {
843 PyObject *key;
844 register Py_ssize_t i, mask;
845 register setentry *entry;
846 PySetObject *so = si->si_set;
847
848 if (so == NULL)
849 return NULL;
850 assert (PyAnySet_Check(so));
851
852 if (si->si_used != so->used) {
853 PyErr_SetString(PyExc_RuntimeError,
854 "Set changed size during iteration");
855 si->si_used = -1; /* Make this state sticky */
856 return NULL;
857 }
858
859 i = si->si_pos;
860 assert(i>=0);
861 entry = so->table;
862 mask = so->mask;
863 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
864 i++;
865 si->si_pos = i+1;
866 if (i > mask)
867 goto fail;
868 si->len--;
869 key = entry[i].key;
870 Py_INCREF(key);
871 return key;
872
873 fail:
874 si->si_set = NULL;
875 Py_DECREF(so);
876 return NULL;
877 }
878
879 static PyTypeObject PySetIter_Type = {
880 PyVarObject_HEAD_INIT(&PyType_Type, 0)
881 "setiterator", /* tp_name */
882 sizeof(setiterobject), /* tp_basicsize */
883 0, /* tp_itemsize */
884 /* methods */
885 (destructor)setiter_dealloc, /* tp_dealloc */
886 0, /* tp_print */
887 0, /* tp_getattr */
888 0, /* tp_setattr */
889 0, /* tp_compare */
890 0, /* tp_repr */
891 0, /* tp_as_number */
892 0, /* tp_as_sequence */
893 0, /* tp_as_mapping */
894 0, /* tp_hash */
895 0, /* tp_call */
896 0, /* tp_str */
897 PyObject_GenericGetAttr, /* tp_getattro */
898 0, /* tp_setattro */
899 0, /* tp_as_buffer */
900 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
901 0, /* tp_doc */
902 (traverseproc)setiter_traverse, /* tp_traverse */
903 0, /* tp_clear */
904 0, /* tp_richcompare */
905 0, /* tp_weaklistoffset */
906 PyObject_SelfIter, /* tp_iter */
907 (iternextfunc)setiter_iternext, /* tp_iternext */
908 setiter_methods, /* tp_methods */
909 0,
910 };
911
912 static PyObject *
set_iter(PySetObject * so)913 set_iter(PySetObject *so)
914 {
915 setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
916 if (si == NULL)
917 return NULL;
918 Py_INCREF(so);
919 si->si_set = so;
920 si->si_used = so->used;
921 si->si_pos = 0;
922 si->len = so->used;
923 _PyObject_GC_TRACK(si);
924 return (PyObject *)si;
925 }
926
927 static int
set_update_internal(PySetObject * so,PyObject * other)928 set_update_internal(PySetObject *so, PyObject *other)
929 {
930 PyObject *key, *it;
931
932 if (PyAnySet_Check(other))
933 return set_merge(so, other);
934
935 if (PyDict_CheckExact(other)) {
936 PyObject *value;
937 Py_ssize_t pos = 0;
938 long hash;
939 Py_ssize_t dictsize = PyDict_Size(other);
940
941 /* Do one big resize at the start, rather than
942 * incrementally resizing as we insert new keys. Expect
943 * that there will be no (or few) overlapping keys.
944 */
945 if (dictsize == -1)
946 return -1;
947 if ((so->fill + dictsize)*3 >= (so->mask+1)*2) {
948 if (set_table_resize(so, (so->used + dictsize)*2) != 0)
949 return -1;
950 }
951 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
952 setentry an_entry;
953
954 an_entry.hash = hash;
955 an_entry.key = key;
956 if (set_add_entry(so, &an_entry) == -1)
957 return -1;
958 }
959 return 0;
960 }
961
962 it = PyObject_GetIter(other);
963 if (it == NULL)
964 return -1;
965
966 while ((key = PyIter_Next(it)) != NULL) {
967 if (set_add_key(so, key) == -1) {
968 Py_DECREF(it);
969 Py_DECREF(key);
970 return -1;
971 }
972 Py_DECREF(key);
973 }
974 Py_DECREF(it);
975 if (PyErr_Occurred())
976 return -1;
977 return 0;
978 }
979
980 static PyObject *
set_update(PySetObject * so,PyObject * args)981 set_update(PySetObject *so, PyObject *args)
982 {
983 Py_ssize_t i;
984
985 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
986 PyObject *other = PyTuple_GET_ITEM(args, i);
987 if (set_update_internal(so, other) == -1)
988 return NULL;
989 }
990 Py_RETURN_NONE;
991 }
992
993 PyDoc_STRVAR(update_doc,
994 "Update a set with the union of itself and others.");
995
996 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)997 make_new_set(PyTypeObject *type, PyObject *iterable)
998 {
999 register PySetObject *so = NULL;
1000
1001 if (dummy == NULL) { /* Auto-initialize dummy */
1002 dummy = PyString_FromString("<dummy key>");
1003 if (dummy == NULL)
1004 return NULL;
1005 }
1006
1007 /* create PySetObject structure */
1008 if (numfree &&
1009 (type == &PySet_Type || type == &PyFrozenSet_Type)) {
1010 so = free_list[--numfree];
1011 assert (so != NULL && PyAnySet_CheckExact(so));
1012 Py_TYPE(so) = type;
1013 _Py_NewReference((PyObject *)so);
1014 EMPTY_TO_MINSIZE(so);
1015 PyObject_GC_Track(so);
1016 } else {
1017 so = (PySetObject *)type->tp_alloc(type, 0);
1018 if (so == NULL)
1019 return NULL;
1020 /* tp_alloc has already zeroed the structure */
1021 assert(so->table == NULL && so->fill == 0 && so->used == 0);
1022 INIT_NONZERO_SET_SLOTS(so);
1023 }
1024
1025 so->lookup = set_lookkey_string;
1026 so->weakreflist = NULL;
1027
1028 if (iterable != NULL) {
1029 if (set_update_internal(so, iterable) == -1) {
1030 Py_DECREF(so);
1031 return NULL;
1032 }
1033 }
1034
1035 return (PyObject *)so;
1036 }
1037
1038 /* The empty frozenset is a singleton */
1039 static PyObject *emptyfrozenset = NULL;
1040
1041 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1042 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1043 {
1044 PyObject *iterable = NULL, *result;
1045
1046 if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset()", kwds))
1047 return NULL;
1048
1049 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
1050 return NULL;
1051
1052 if (type != &PyFrozenSet_Type)
1053 return make_new_set(type, iterable);
1054
1055 if (iterable != NULL) {
1056 /* frozenset(f) is idempotent */
1057 if (PyFrozenSet_CheckExact(iterable)) {
1058 Py_INCREF(iterable);
1059 return iterable;
1060 }
1061 result = make_new_set(type, iterable);
1062 if (result == NULL || PySet_GET_SIZE(result))
1063 return result;
1064 Py_DECREF(result);
1065 }
1066 /* The empty frozenset is a singleton */
1067 if (emptyfrozenset == NULL)
1068 emptyfrozenset = make_new_set(type, NULL);
1069 Py_XINCREF(emptyfrozenset);
1070 return emptyfrozenset;
1071 }
1072
1073 void
PySet_Fini(void)1074 PySet_Fini(void)
1075 {
1076 PySetObject *so;
1077
1078 while (numfree) {
1079 numfree--;
1080 so = free_list[numfree];
1081 PyObject_GC_Del(so);
1082 }
1083 Py_CLEAR(dummy);
1084 Py_CLEAR(emptyfrozenset);
1085 }
1086
1087 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1088 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1089 {
1090 if (type == &PySet_Type && !_PyArg_NoKeywords("set()", kwds))
1091 return NULL;
1092
1093 return make_new_set(type, NULL);
1094 }
1095
1096 /* set_swap_bodies() switches the contents of any two sets by moving their
1097 internal data pointers and, if needed, copying the internal smalltables.
1098 Semantically equivalent to:
1099
1100 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1101
1102 The function always succeeds and it leaves both objects in a stable state.
1103 Useful for creating temporary frozensets from sets for membership testing
1104 in __contains__(), discard(), and remove(). Also useful for operations
1105 that update in-place (by allowing an intermediate result to be swapped
1106 into one of the original inputs).
1107 */
1108
1109 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1110 set_swap_bodies(PySetObject *a, PySetObject *b)
1111 {
1112 Py_ssize_t t;
1113 setentry *u;
1114 setentry *(*f)(PySetObject *so, PyObject *key, long hash);
1115 setentry tab[PySet_MINSIZE];
1116 long h;
1117
1118 t = a->fill; a->fill = b->fill; b->fill = t;
1119 t = a->used; a->used = b->used; b->used = t;
1120 t = a->mask; a->mask = b->mask; b->mask = t;
1121
1122 u = a->table;
1123 if (a->table == a->smalltable)
1124 u = b->smalltable;
1125 a->table = b->table;
1126 if (b->table == b->smalltable)
1127 a->table = a->smalltable;
1128 b->table = u;
1129
1130 f = a->lookup; a->lookup = b->lookup; b->lookup = f;
1131
1132 if (a->table == a->smalltable || b->table == b->smalltable) {
1133 memcpy(tab, a->smalltable, sizeof(tab));
1134 memcpy(a->smalltable, b->smalltable, sizeof(tab));
1135 memcpy(b->smalltable, tab, sizeof(tab));
1136 }
1137
1138 if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) &&
1139 PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1140 h = a->hash; a->hash = b->hash; b->hash = h;
1141 } else {
1142 a->hash = -1;
1143 b->hash = -1;
1144 }
1145 }
1146
1147 static PyObject *
set_copy(PySetObject * so)1148 set_copy(PySetObject *so)
1149 {
1150 return make_new_set(Py_TYPE(so), (PyObject *)so);
1151 }
1152
1153 static PyObject *
frozenset_copy(PySetObject * so)1154 frozenset_copy(PySetObject *so)
1155 {
1156 if (PyFrozenSet_CheckExact(so)) {
1157 Py_INCREF(so);
1158 return (PyObject *)so;
1159 }
1160 return set_copy(so);
1161 }
1162
1163 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1164
1165 static PyObject *
set_clear(PySetObject * so)1166 set_clear(PySetObject *so)
1167 {
1168 set_clear_internal(so);
1169 Py_RETURN_NONE;
1170 }
1171
1172 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1173
1174 static PyObject *
set_union(PySetObject * so,PyObject * args)1175 set_union(PySetObject *so, PyObject *args)
1176 {
1177 PySetObject *result;
1178 PyObject *other;
1179 Py_ssize_t i;
1180
1181 result = (PySetObject *)set_copy(so);
1182 if (result == NULL)
1183 return NULL;
1184
1185 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1186 other = PyTuple_GET_ITEM(args, i);
1187 if ((PyObject *)so == other)
1188 continue;
1189 if (set_update_internal(result, other) == -1) {
1190 Py_DECREF(result);
1191 return NULL;
1192 }
1193 }
1194 return (PyObject *)result;
1195 }
1196
1197 PyDoc_STRVAR(union_doc,
1198 "Return the union of sets as a new set.\n\
1199 \n\
1200 (i.e. all elements that are in either set.)");
1201
1202 static PyObject *
set_or(PySetObject * so,PyObject * other)1203 set_or(PySetObject *so, PyObject *other)
1204 {
1205 PySetObject *result;
1206
1207 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1208 Py_INCREF(Py_NotImplemented);
1209 return Py_NotImplemented;
1210 }
1211
1212 result = (PySetObject *)set_copy(so);
1213 if (result == NULL)
1214 return NULL;
1215 if ((PyObject *)so == other)
1216 return (PyObject *)result;
1217 if (set_update_internal(result, other) == -1) {
1218 Py_DECREF(result);
1219 return NULL;
1220 }
1221 return (PyObject *)result;
1222 }
1223
1224 static PyObject *
set_ior(PySetObject * so,PyObject * other)1225 set_ior(PySetObject *so, PyObject *other)
1226 {
1227 if (!PyAnySet_Check(other)) {
1228 Py_INCREF(Py_NotImplemented);
1229 return Py_NotImplemented;
1230 }
1231 if (set_update_internal(so, other) == -1)
1232 return NULL;
1233 Py_INCREF(so);
1234 return (PyObject *)so;
1235 }
1236
1237 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1238 set_intersection(PySetObject *so, PyObject *other)
1239 {
1240 PySetObject *result;
1241 PyObject *key, *it, *tmp;
1242
1243 if ((PyObject *)so == other)
1244 return set_copy(so);
1245
1246 result = (PySetObject *)make_new_set(Py_TYPE(so), NULL);
1247 if (result == NULL)
1248 return NULL;
1249
1250 if (PyAnySet_Check(other)) {
1251 Py_ssize_t pos = 0;
1252 setentry *entry;
1253
1254 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1255 tmp = (PyObject *)so;
1256 so = (PySetObject *)other;
1257 other = tmp;
1258 }
1259
1260 while (set_next((PySetObject *)other, &pos, &entry)) {
1261 int rv = set_contains_entry(so, entry);
1262 if (rv == -1) {
1263 Py_DECREF(result);
1264 return NULL;
1265 }
1266 if (rv) {
1267 if (set_add_entry(result, entry) == -1) {
1268 Py_DECREF(result);
1269 return NULL;
1270 }
1271 }
1272 }
1273 return (PyObject *)result;
1274 }
1275
1276 it = PyObject_GetIter(other);
1277 if (it == NULL) {
1278 Py_DECREF(result);
1279 return NULL;
1280 }
1281
1282 while ((key = PyIter_Next(it)) != NULL) {
1283 int rv;
1284 setentry entry;
1285 long hash = PyObject_Hash(key);
1286
1287 if (hash == -1) {
1288 Py_DECREF(it);
1289 Py_DECREF(result);
1290 Py_DECREF(key);
1291 return NULL;
1292 }
1293 entry.hash = hash;
1294 entry.key = key;
1295 rv = set_contains_entry(so, &entry);
1296 if (rv == -1) {
1297 Py_DECREF(it);
1298 Py_DECREF(result);
1299 Py_DECREF(key);
1300 return NULL;
1301 }
1302 if (rv) {
1303 if (set_add_entry(result, &entry) == -1) {
1304 Py_DECREF(it);
1305 Py_DECREF(result);
1306 Py_DECREF(key);
1307 return NULL;
1308 }
1309 }
1310 Py_DECREF(key);
1311 }
1312 Py_DECREF(it);
1313 if (PyErr_Occurred()) {
1314 Py_DECREF(result);
1315 return NULL;
1316 }
1317 return (PyObject *)result;
1318 }
1319
1320 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1321 set_intersection_multi(PySetObject *so, PyObject *args)
1322 {
1323 Py_ssize_t i;
1324 PyObject *result = (PyObject *)so;
1325
1326 if (PyTuple_GET_SIZE(args) == 0)
1327 return set_copy(so);
1328
1329 Py_INCREF(so);
1330 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1331 PyObject *other = PyTuple_GET_ITEM(args, i);
1332 PyObject *newresult = set_intersection((PySetObject *)result, other);
1333 if (newresult == NULL) {
1334 Py_DECREF(result);
1335 return NULL;
1336 }
1337 Py_DECREF(result);
1338 result = newresult;
1339 }
1340 return result;
1341 }
1342
1343 PyDoc_STRVAR(intersection_doc,
1344 "Return the intersection of two or more sets as a new set.\n\
1345 \n\
1346 (i.e. elements that are common to all of the sets.)");
1347
1348 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1349 set_intersection_update(PySetObject *so, PyObject *other)
1350 {
1351 PyObject *tmp;
1352
1353 tmp = set_intersection(so, other);
1354 if (tmp == NULL)
1355 return NULL;
1356 set_swap_bodies(so, (PySetObject *)tmp);
1357 Py_DECREF(tmp);
1358 Py_RETURN_NONE;
1359 }
1360
1361 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1362 set_intersection_update_multi(PySetObject *so, PyObject *args)
1363 {
1364 PyObject *tmp;
1365
1366 tmp = set_intersection_multi(so, args);
1367 if (tmp == NULL)
1368 return NULL;
1369 set_swap_bodies(so, (PySetObject *)tmp);
1370 Py_DECREF(tmp);
1371 Py_RETURN_NONE;
1372 }
1373
1374 PyDoc_STRVAR(intersection_update_doc,
1375 "Update a set with the intersection of itself and another.");
1376
1377 static PyObject *
set_and(PySetObject * so,PyObject * other)1378 set_and(PySetObject *so, PyObject *other)
1379 {
1380 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1381 Py_INCREF(Py_NotImplemented);
1382 return Py_NotImplemented;
1383 }
1384 return set_intersection(so, other);
1385 }
1386
1387 static PyObject *
set_iand(PySetObject * so,PyObject * other)1388 set_iand(PySetObject *so, PyObject *other)
1389 {
1390 PyObject *result;
1391
1392 if (!PyAnySet_Check(other)) {
1393 Py_INCREF(Py_NotImplemented);
1394 return Py_NotImplemented;
1395 }
1396 result = set_intersection_update(so, other);
1397 if (result == NULL)
1398 return NULL;
1399 Py_DECREF(result);
1400 Py_INCREF(so);
1401 return (PyObject *)so;
1402 }
1403
1404 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1405 set_isdisjoint(PySetObject *so, PyObject *other)
1406 {
1407 PyObject *key, *it, *tmp;
1408
1409 if ((PyObject *)so == other) {
1410 if (PySet_GET_SIZE(so) == 0)
1411 Py_RETURN_TRUE;
1412 else
1413 Py_RETURN_FALSE;
1414 }
1415
1416 if (PyAnySet_CheckExact(other)) {
1417 Py_ssize_t pos = 0;
1418 setentry *entry;
1419
1420 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1421 tmp = (PyObject *)so;
1422 so = (PySetObject *)other;
1423 other = tmp;
1424 }
1425 while (set_next((PySetObject *)other, &pos, &entry)) {
1426 int rv = set_contains_entry(so, entry);
1427 if (rv == -1)
1428 return NULL;
1429 if (rv)
1430 Py_RETURN_FALSE;
1431 }
1432 Py_RETURN_TRUE;
1433 }
1434
1435 it = PyObject_GetIter(other);
1436 if (it == NULL)
1437 return NULL;
1438
1439 while ((key = PyIter_Next(it)) != NULL) {
1440 int rv;
1441 setentry entry;
1442 long hash = PyObject_Hash(key);
1443
1444 if (hash == -1) {
1445 Py_DECREF(key);
1446 Py_DECREF(it);
1447 return NULL;
1448 }
1449 entry.hash = hash;
1450 entry.key = key;
1451 rv = set_contains_entry(so, &entry);
1452 Py_DECREF(key);
1453 if (rv == -1) {
1454 Py_DECREF(it);
1455 return NULL;
1456 }
1457 if (rv) {
1458 Py_DECREF(it);
1459 Py_RETURN_FALSE;
1460 }
1461 }
1462 Py_DECREF(it);
1463 if (PyErr_Occurred())
1464 return NULL;
1465 Py_RETURN_TRUE;
1466 }
1467
1468 PyDoc_STRVAR(isdisjoint_doc,
1469 "Return True if two sets have a null intersection.");
1470
1471 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1472 set_difference_update_internal(PySetObject *so, PyObject *other)
1473 {
1474 if ((PyObject *)so == other)
1475 return set_clear_internal(so);
1476
1477 if (PyAnySet_Check(other)) {
1478 setentry *entry;
1479 Py_ssize_t pos = 0;
1480
1481 while (set_next((PySetObject *)other, &pos, &entry))
1482 if (set_discard_entry(so, entry) == -1)
1483 return -1;
1484 } else {
1485 PyObject *key, *it;
1486 it = PyObject_GetIter(other);
1487 if (it == NULL)
1488 return -1;
1489
1490 while ((key = PyIter_Next(it)) != NULL) {
1491 if (set_discard_key(so, key) == -1) {
1492 Py_DECREF(it);
1493 Py_DECREF(key);
1494 return -1;
1495 }
1496 Py_DECREF(key);
1497 }
1498 Py_DECREF(it);
1499 if (PyErr_Occurred())
1500 return -1;
1501 }
1502 /* If more than 1/5 are dummies, then resize them away. */
1503 if ((so->fill - so->used) * 5 < so->mask)
1504 return 0;
1505 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1506 }
1507
1508 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1509 set_difference_update(PySetObject *so, PyObject *args)
1510 {
1511 Py_ssize_t i;
1512
1513 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1514 PyObject *other = PyTuple_GET_ITEM(args, i);
1515 if (set_difference_update_internal(so, other) == -1)
1516 return NULL;
1517 }
1518 Py_RETURN_NONE;
1519 }
1520
1521 PyDoc_STRVAR(difference_update_doc,
1522 "Remove all elements of another set from this set.");
1523
1524 static PyObject *
set_difference(PySetObject * so,PyObject * other)1525 set_difference(PySetObject *so, PyObject *other)
1526 {
1527 PyObject *result;
1528 setentry *entry;
1529 Py_ssize_t pos = 0;
1530
1531 if (!PyAnySet_Check(other) && !PyDict_CheckExact(other)) {
1532 result = set_copy(so);
1533 if (result == NULL)
1534 return NULL;
1535 if (set_difference_update_internal((PySetObject *)result, other) != -1)
1536 return result;
1537 Py_DECREF(result);
1538 return NULL;
1539 }
1540
1541 result = make_new_set(Py_TYPE(so), NULL);
1542 if (result == NULL)
1543 return NULL;
1544
1545 if (PyDict_CheckExact(other)) {
1546 while (set_next(so, &pos, &entry)) {
1547 setentry entrycopy;
1548 int rv;
1549 entrycopy.hash = entry->hash;
1550 entrycopy.key = entry->key;
1551 rv = _PyDict_Contains(other, entry->key, entry->hash);
1552 if (rv < 0) {
1553 Py_DECREF(result);
1554 return NULL;
1555 }
1556 if (!rv) {
1557 if (set_add_entry((PySetObject *)result, &entrycopy) == -1) {
1558 Py_DECREF(result);
1559 return NULL;
1560 }
1561 }
1562 }
1563 return result;
1564 }
1565
1566 while (set_next(so, &pos, &entry)) {
1567 int rv = set_contains_entry((PySetObject *)other, entry);
1568 if (rv == -1) {
1569 Py_DECREF(result);
1570 return NULL;
1571 }
1572 if (!rv) {
1573 if (set_add_entry((PySetObject *)result, entry) == -1) {
1574 Py_DECREF(result);
1575 return NULL;
1576 }
1577 }
1578 }
1579 return result;
1580 }
1581
1582 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1583 set_difference_multi(PySetObject *so, PyObject *args)
1584 {
1585 Py_ssize_t i;
1586 PyObject *result, *other;
1587
1588 if (PyTuple_GET_SIZE(args) == 0)
1589 return set_copy(so);
1590
1591 other = PyTuple_GET_ITEM(args, 0);
1592 result = set_difference(so, other);
1593 if (result == NULL)
1594 return NULL;
1595
1596 for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1597 other = PyTuple_GET_ITEM(args, i);
1598 if (set_difference_update_internal((PySetObject *)result, other) == -1) {
1599 Py_DECREF(result);
1600 return NULL;
1601 }
1602 }
1603 return result;
1604 }
1605
1606 PyDoc_STRVAR(difference_doc,
1607 "Return the difference of two or more sets as a new set.\n\
1608 \n\
1609 (i.e. all elements that are in this set but not the others.)");
1610 static PyObject *
set_sub(PySetObject * so,PyObject * other)1611 set_sub(PySetObject *so, PyObject *other)
1612 {
1613 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1614 Py_INCREF(Py_NotImplemented);
1615 return Py_NotImplemented;
1616 }
1617 return set_difference(so, other);
1618 }
1619
1620 static PyObject *
set_isub(PySetObject * so,PyObject * other)1621 set_isub(PySetObject *so, PyObject *other)
1622 {
1623 if (!PyAnySet_Check(other)) {
1624 Py_INCREF(Py_NotImplemented);
1625 return Py_NotImplemented;
1626 }
1627 if (set_difference_update_internal(so, other) == -1)
1628 return NULL;
1629 Py_INCREF(so);
1630 return (PyObject *)so;
1631 }
1632
1633 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1634 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1635 {
1636 PySetObject *otherset;
1637 PyObject *key;
1638 Py_ssize_t pos = 0;
1639 setentry *entry;
1640
1641 if ((PyObject *)so == other)
1642 return set_clear(so);
1643
1644 if (PyDict_CheckExact(other)) {
1645 PyObject *value;
1646 int rv;
1647 long hash;
1648 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1649 setentry an_entry;
1650
1651 Py_INCREF(key);
1652 an_entry.hash = hash;
1653 an_entry.key = key;
1654
1655 rv = set_discard_entry(so, &an_entry);
1656 if (rv == -1) {
1657 Py_DECREF(key);
1658 return NULL;
1659 }
1660 if (rv == DISCARD_NOTFOUND) {
1661 if (set_add_entry(so, &an_entry) == -1) {
1662 Py_DECREF(key);
1663 return NULL;
1664 }
1665 }
1666 Py_DECREF(key);
1667 }
1668 Py_RETURN_NONE;
1669 }
1670
1671 if (PyAnySet_Check(other)) {
1672 Py_INCREF(other);
1673 otherset = (PySetObject *)other;
1674 } else {
1675 otherset = (PySetObject *)make_new_set(Py_TYPE(so), other);
1676 if (otherset == NULL)
1677 return NULL;
1678 }
1679
1680 while (set_next(otherset, &pos, &entry)) {
1681 int rv = set_discard_entry(so, entry);
1682 if (rv == -1) {
1683 Py_DECREF(otherset);
1684 return NULL;
1685 }
1686 if (rv == DISCARD_NOTFOUND) {
1687 if (set_add_entry(so, entry) == -1) {
1688 Py_DECREF(otherset);
1689 return NULL;
1690 }
1691 }
1692 }
1693 Py_DECREF(otherset);
1694 Py_RETURN_NONE;
1695 }
1696
1697 PyDoc_STRVAR(symmetric_difference_update_doc,
1698 "Update a set with the symmetric difference of itself and another.");
1699
1700 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1701 set_symmetric_difference(PySetObject *so, PyObject *other)
1702 {
1703 PyObject *rv;
1704 PySetObject *otherset;
1705
1706 otherset = (PySetObject *)make_new_set(Py_TYPE(so), other);
1707 if (otherset == NULL)
1708 return NULL;
1709 rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1710 if (rv == NULL)
1711 return NULL;
1712 Py_DECREF(rv);
1713 return (PyObject *)otherset;
1714 }
1715
1716 PyDoc_STRVAR(symmetric_difference_doc,
1717 "Return the symmetric difference of two sets as a new set.\n\
1718 \n\
1719 (i.e. all elements that are in exactly one of the sets.)");
1720
1721 static PyObject *
set_xor(PySetObject * so,PyObject * other)1722 set_xor(PySetObject *so, PyObject *other)
1723 {
1724 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1725 Py_INCREF(Py_NotImplemented);
1726 return Py_NotImplemented;
1727 }
1728 return set_symmetric_difference(so, other);
1729 }
1730
1731 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1732 set_ixor(PySetObject *so, PyObject *other)
1733 {
1734 PyObject *result;
1735
1736 if (!PyAnySet_Check(other)) {
1737 Py_INCREF(Py_NotImplemented);
1738 return Py_NotImplemented;
1739 }
1740 result = set_symmetric_difference_update(so, other);
1741 if (result == NULL)
1742 return NULL;
1743 Py_DECREF(result);
1744 Py_INCREF(so);
1745 return (PyObject *)so;
1746 }
1747
1748 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1749 set_issubset(PySetObject *so, PyObject *other)
1750 {
1751 setentry *entry;
1752 Py_ssize_t pos = 0;
1753
1754 if (!PyAnySet_Check(other)) {
1755 PyObject *tmp, *result;
1756 tmp = make_new_set(&PySet_Type, other);
1757 if (tmp == NULL)
1758 return NULL;
1759 result = set_issubset(so, tmp);
1760 Py_DECREF(tmp);
1761 return result;
1762 }
1763 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1764 Py_RETURN_FALSE;
1765
1766 while (set_next(so, &pos, &entry)) {
1767 int rv = set_contains_entry((PySetObject *)other, entry);
1768 if (rv == -1)
1769 return NULL;
1770 if (!rv)
1771 Py_RETURN_FALSE;
1772 }
1773 Py_RETURN_TRUE;
1774 }
1775
1776 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1777
1778 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1779 set_issuperset(PySetObject *so, PyObject *other)
1780 {
1781 PyObject *tmp, *result;
1782
1783 if (!PyAnySet_Check(other)) {
1784 tmp = make_new_set(&PySet_Type, other);
1785 if (tmp == NULL)
1786 return NULL;
1787 result = set_issuperset(so, tmp);
1788 Py_DECREF(tmp);
1789 return result;
1790 }
1791 return set_issubset((PySetObject *)other, (PyObject *)so);
1792 }
1793
1794 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1795
1796 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1797 set_richcompare(PySetObject *v, PyObject *w, int op)
1798 {
1799 PyObject *r1;
1800 int r2;
1801
1802 if(!PyAnySet_Check(w)) {
1803 Py_INCREF(Py_NotImplemented);
1804 return Py_NotImplemented;
1805 }
1806 switch (op) {
1807 case Py_EQ:
1808 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1809 Py_RETURN_FALSE;
1810 if (v->hash != -1 &&
1811 ((PySetObject *)w)->hash != -1 &&
1812 v->hash != ((PySetObject *)w)->hash)
1813 Py_RETURN_FALSE;
1814 return set_issubset(v, w);
1815 case Py_NE:
1816 r1 = set_richcompare(v, w, Py_EQ);
1817 if (r1 == NULL)
1818 return NULL;
1819 r2 = PyObject_IsTrue(r1);
1820 Py_DECREF(r1);
1821 if (r2 < 0)
1822 return NULL;
1823 return PyBool_FromLong(!r2);
1824 case Py_LE:
1825 return set_issubset(v, w);
1826 case Py_GE:
1827 return set_issuperset(v, w);
1828 case Py_LT:
1829 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1830 Py_RETURN_FALSE;
1831 return set_issubset(v, w);
1832 case Py_GT:
1833 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1834 Py_RETURN_FALSE;
1835 return set_issuperset(v, w);
1836 }
1837 Py_INCREF(Py_NotImplemented);
1838 return Py_NotImplemented;
1839 }
1840
1841 static int
set_nocmp(PyObject * self,PyObject * other)1842 set_nocmp(PyObject *self, PyObject *other)
1843 {
1844 PyErr_SetString(PyExc_TypeError, "cannot compare sets using cmp()");
1845 return -1;
1846 }
1847
1848 static PyObject *
set_add(PySetObject * so,PyObject * key)1849 set_add(PySetObject *so, PyObject *key)
1850 {
1851 if (set_add_key(so, key) == -1)
1852 return NULL;
1853 Py_RETURN_NONE;
1854 }
1855
1856 PyDoc_STRVAR(add_doc,
1857 "Add an element to a set.\n\
1858 \n\
1859 This has no effect if the element is already present.");
1860
1861 static int
set_contains(PySetObject * so,PyObject * key)1862 set_contains(PySetObject *so, PyObject *key)
1863 {
1864 PyObject *tmpkey;
1865 int rv;
1866
1867 rv = set_contains_key(so, key);
1868 if (rv == -1) {
1869 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1870 return -1;
1871 PyErr_Clear();
1872 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1873 if (tmpkey == NULL)
1874 return -1;
1875 rv = set_contains_key(so, tmpkey);
1876 Py_DECREF(tmpkey);
1877 }
1878 return rv;
1879 }
1880
1881 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1882 set_direct_contains(PySetObject *so, PyObject *key)
1883 {
1884 long result;
1885
1886 result = set_contains(so, key);
1887 if (result == -1)
1888 return NULL;
1889 return PyBool_FromLong(result);
1890 }
1891
1892 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1893
1894 static PyObject *
set_remove(PySetObject * so,PyObject * key)1895 set_remove(PySetObject *so, PyObject *key)
1896 {
1897 PyObject *tmpkey;
1898 int rv;
1899
1900 rv = set_discard_key(so, key);
1901 if (rv == -1) {
1902 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1903 return NULL;
1904 PyErr_Clear();
1905 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1906 if (tmpkey == NULL)
1907 return NULL;
1908 rv = set_discard_key(so, tmpkey);
1909 Py_DECREF(tmpkey);
1910 if (rv == -1)
1911 return NULL;
1912 }
1913
1914 if (rv == DISCARD_NOTFOUND) {
1915 set_key_error(key);
1916 return NULL;
1917 }
1918 Py_RETURN_NONE;
1919 }
1920
1921 PyDoc_STRVAR(remove_doc,
1922 "Remove an element from a set; it must be a member.\n\
1923 \n\
1924 If the element is not a member, raise a KeyError.");
1925
1926 static PyObject *
set_discard(PySetObject * so,PyObject * key)1927 set_discard(PySetObject *so, PyObject *key)
1928 {
1929 PyObject *tmpkey;
1930 int rv;
1931
1932 rv = set_discard_key(so, key);
1933 if (rv == -1) {
1934 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1935 return NULL;
1936 PyErr_Clear();
1937 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1938 if (tmpkey == NULL)
1939 return NULL;
1940 rv = set_discard_key(so, tmpkey);
1941 Py_DECREF(tmpkey);
1942 if (rv == -1)
1943 return NULL;
1944 }
1945 Py_RETURN_NONE;
1946 }
1947
1948 PyDoc_STRVAR(discard_doc,
1949 "Remove an element from a set if it is a member.\n\
1950 \n\
1951 If the element is not a member, do nothing.");
1952
1953 static PyObject *
set_reduce(PySetObject * so)1954 set_reduce(PySetObject *so)
1955 {
1956 PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1957
1958 keys = PySequence_List((PyObject *)so);
1959 if (keys == NULL)
1960 goto done;
1961 args = PyTuple_Pack(1, keys);
1962 if (args == NULL)
1963 goto done;
1964 dict = PyObject_GetAttrString((PyObject *)so, "__dict__");
1965 if (dict == NULL) {
1966 PyErr_Clear();
1967 dict = Py_None;
1968 Py_INCREF(dict);
1969 }
1970 result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1971 done:
1972 Py_XDECREF(args);
1973 Py_XDECREF(keys);
1974 Py_XDECREF(dict);
1975 return result;
1976 }
1977
1978 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
1979
1980 static PyObject *
set_sizeof(PySetObject * so)1981 set_sizeof(PySetObject *so)
1982 {
1983 Py_ssize_t res;
1984
1985 res = _PyObject_SIZE(Py_TYPE(so));
1986 if (so->table != so->smalltable)
1987 res = res + (so->mask + 1) * sizeof(setentry);
1988 return PyInt_FromSsize_t(res);
1989 }
1990
1991 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
1992 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)1993 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1994 {
1995 PyObject *iterable = NULL;
1996
1997 if (!PyAnySet_Check(self))
1998 return -1;
1999 if (PySet_Check(self) && !_PyArg_NoKeywords("set()", kwds))
2000 return -1;
2001 if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
2002 return -1;
2003 set_clear_internal(self);
2004 self->hash = -1;
2005 if (iterable == NULL)
2006 return 0;
2007 return set_update_internal(self, iterable);
2008 }
2009
2010 static PySequenceMethods set_as_sequence = {
2011 set_len, /* sq_length */
2012 0, /* sq_concat */
2013 0, /* sq_repeat */
2014 0, /* sq_item */
2015 0, /* sq_slice */
2016 0, /* sq_ass_item */
2017 0, /* sq_ass_slice */
2018 (objobjproc)set_contains, /* sq_contains */
2019 };
2020
2021 /* set object ********************************************************/
2022
2023 #ifdef Py_DEBUG
2024 static PyObject *test_c_api(PySetObject *so);
2025
2026 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
2027 All is well if assertions don't fail.");
2028 #endif
2029
2030 static PyMethodDef set_methods[] = {
2031 {"add", (PyCFunction)set_add, METH_O,
2032 add_doc},
2033 {"clear", (PyCFunction)set_clear, METH_NOARGS,
2034 clear_doc},
2035 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2036 contains_doc},
2037 {"copy", (PyCFunction)set_copy, METH_NOARGS,
2038 copy_doc},
2039 {"discard", (PyCFunction)set_discard, METH_O,
2040 discard_doc},
2041 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2042 difference_doc},
2043 {"difference_update", (PyCFunction)set_difference_update, METH_VARARGS,
2044 difference_update_doc},
2045 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2046 intersection_doc},
2047 {"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS,
2048 intersection_update_doc},
2049 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2050 isdisjoint_doc},
2051 {"issubset", (PyCFunction)set_issubset, METH_O,
2052 issubset_doc},
2053 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2054 issuperset_doc},
2055 {"pop", (PyCFunction)set_pop, METH_NOARGS,
2056 pop_doc},
2057 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2058 reduce_doc},
2059 {"remove", (PyCFunction)set_remove, METH_O,
2060 remove_doc},
2061 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2062 sizeof_doc},
2063 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2064 symmetric_difference_doc},
2065 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
2066 symmetric_difference_update_doc},
2067 #ifdef Py_DEBUG
2068 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
2069 test_c_api_doc},
2070 #endif
2071 {"union", (PyCFunction)set_union, METH_VARARGS,
2072 union_doc},
2073 {"update", (PyCFunction)set_update, METH_VARARGS,
2074 update_doc},
2075 {NULL, NULL} /* sentinel */
2076 };
2077
2078 static PyNumberMethods set_as_number = {
2079 0, /*nb_add*/
2080 (binaryfunc)set_sub, /*nb_subtract*/
2081 0, /*nb_multiply*/
2082 0, /*nb_divide*/
2083 0, /*nb_remainder*/
2084 0, /*nb_divmod*/
2085 0, /*nb_power*/
2086 0, /*nb_negative*/
2087 0, /*nb_positive*/
2088 0, /*nb_absolute*/
2089 0, /*nb_nonzero*/
2090 0, /*nb_invert*/
2091 0, /*nb_lshift*/
2092 0, /*nb_rshift*/
2093 (binaryfunc)set_and, /*nb_and*/
2094 (binaryfunc)set_xor, /*nb_xor*/
2095 (binaryfunc)set_or, /*nb_or*/
2096 0, /*nb_coerce*/
2097 0, /*nb_int*/
2098 0, /*nb_long*/
2099 0, /*nb_float*/
2100 0, /*nb_oct*/
2101 0, /*nb_hex*/
2102 0, /*nb_inplace_add*/
2103 (binaryfunc)set_isub, /*nb_inplace_subtract*/
2104 0, /*nb_inplace_multiply*/
2105 0, /*nb_inplace_divide*/
2106 0, /*nb_inplace_remainder*/
2107 0, /*nb_inplace_power*/
2108 0, /*nb_inplace_lshift*/
2109 0, /*nb_inplace_rshift*/
2110 (binaryfunc)set_iand, /*nb_inplace_and*/
2111 (binaryfunc)set_ixor, /*nb_inplace_xor*/
2112 (binaryfunc)set_ior, /*nb_inplace_or*/
2113 };
2114
2115 PyDoc_STRVAR(set_doc,
2116 "set() -> new empty set object\n\
2117 set(iterable) -> new set object\n\
2118 \n\
2119 Build an unordered collection of unique elements.");
2120
2121 PyTypeObject PySet_Type = {
2122 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2123 "set", /* tp_name */
2124 sizeof(PySetObject), /* tp_basicsize */
2125 0, /* tp_itemsize */
2126 /* methods */
2127 (destructor)set_dealloc, /* tp_dealloc */
2128 (printfunc)set_tp_print, /* tp_print */
2129 0, /* tp_getattr */
2130 0, /* tp_setattr */
2131 set_nocmp, /* tp_compare */
2132 (reprfunc)set_repr, /* tp_repr */
2133 &set_as_number, /* tp_as_number */
2134 &set_as_sequence, /* tp_as_sequence */
2135 0, /* tp_as_mapping */
2136 (hashfunc)PyObject_HashNotImplemented, /* tp_hash */
2137 0, /* tp_call */
2138 0, /* tp_str */
2139 PyObject_GenericGetAttr, /* tp_getattro */
2140 0, /* tp_setattro */
2141 0, /* tp_as_buffer */
2142 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES |
2143 Py_TPFLAGS_BASETYPE, /* tp_flags */
2144 set_doc, /* tp_doc */
2145 (traverseproc)set_traverse, /* tp_traverse */
2146 (inquiry)set_clear_internal, /* tp_clear */
2147 (richcmpfunc)set_richcompare, /* tp_richcompare */
2148 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2149 (getiterfunc)set_iter, /* tp_iter */
2150 0, /* tp_iternext */
2151 set_methods, /* tp_methods */
2152 0, /* tp_members */
2153 0, /* tp_getset */
2154 0, /* tp_base */
2155 0, /* tp_dict */
2156 0, /* tp_descr_get */
2157 0, /* tp_descr_set */
2158 0, /* tp_dictoffset */
2159 (initproc)set_init, /* tp_init */
2160 PyType_GenericAlloc, /* tp_alloc */
2161 set_new, /* tp_new */
2162 PyObject_GC_Del, /* tp_free */
2163 };
2164
2165 /* frozenset object ********************************************************/
2166
2167
2168 static PyMethodDef frozenset_methods[] = {
2169 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2170 contains_doc},
2171 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
2172 copy_doc},
2173 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2174 difference_doc},
2175 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2176 intersection_doc},
2177 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2178 isdisjoint_doc},
2179 {"issubset", (PyCFunction)set_issubset, METH_O,
2180 issubset_doc},
2181 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2182 issuperset_doc},
2183 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2184 reduce_doc},
2185 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2186 sizeof_doc},
2187 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2188 symmetric_difference_doc},
2189 {"union", (PyCFunction)set_union, METH_VARARGS,
2190 union_doc},
2191 {NULL, NULL} /* sentinel */
2192 };
2193
2194 static PyNumberMethods frozenset_as_number = {
2195 0, /*nb_add*/
2196 (binaryfunc)set_sub, /*nb_subtract*/
2197 0, /*nb_multiply*/
2198 0, /*nb_divide*/
2199 0, /*nb_remainder*/
2200 0, /*nb_divmod*/
2201 0, /*nb_power*/
2202 0, /*nb_negative*/
2203 0, /*nb_positive*/
2204 0, /*nb_absolute*/
2205 0, /*nb_nonzero*/
2206 0, /*nb_invert*/
2207 0, /*nb_lshift*/
2208 0, /*nb_rshift*/
2209 (binaryfunc)set_and, /*nb_and*/
2210 (binaryfunc)set_xor, /*nb_xor*/
2211 (binaryfunc)set_or, /*nb_or*/
2212 };
2213
2214 PyDoc_STRVAR(frozenset_doc,
2215 "frozenset() -> empty frozenset object\n\
2216 frozenset(iterable) -> frozenset object\n\
2217 \n\
2218 Build an immutable unordered collection of unique elements.");
2219
2220 PyTypeObject PyFrozenSet_Type = {
2221 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2222 "frozenset", /* tp_name */
2223 sizeof(PySetObject), /* tp_basicsize */
2224 0, /* tp_itemsize */
2225 /* methods */
2226 (destructor)set_dealloc, /* tp_dealloc */
2227 (printfunc)set_tp_print, /* tp_print */
2228 0, /* tp_getattr */
2229 0, /* tp_setattr */
2230 set_nocmp, /* tp_compare */
2231 (reprfunc)set_repr, /* tp_repr */
2232 &frozenset_as_number, /* tp_as_number */
2233 &set_as_sequence, /* tp_as_sequence */
2234 0, /* tp_as_mapping */
2235 frozenset_hash, /* tp_hash */
2236 0, /* tp_call */
2237 0, /* tp_str */
2238 PyObject_GenericGetAttr, /* tp_getattro */
2239 0, /* tp_setattro */
2240 0, /* tp_as_buffer */
2241 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES |
2242 Py_TPFLAGS_BASETYPE, /* tp_flags */
2243 frozenset_doc, /* tp_doc */
2244 (traverseproc)set_traverse, /* tp_traverse */
2245 (inquiry)set_clear_internal, /* tp_clear */
2246 (richcmpfunc)set_richcompare, /* tp_richcompare */
2247 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2248 (getiterfunc)set_iter, /* tp_iter */
2249 0, /* tp_iternext */
2250 frozenset_methods, /* tp_methods */
2251 0, /* tp_members */
2252 0, /* tp_getset */
2253 0, /* tp_base */
2254 0, /* tp_dict */
2255 0, /* tp_descr_get */
2256 0, /* tp_descr_set */
2257 0, /* tp_dictoffset */
2258 0, /* tp_init */
2259 PyType_GenericAlloc, /* tp_alloc */
2260 frozenset_new, /* tp_new */
2261 PyObject_GC_Del, /* tp_free */
2262 };
2263
2264
2265 /***** C API functions *************************************************/
2266
2267 PyObject *
PySet_New(PyObject * iterable)2268 PySet_New(PyObject *iterable)
2269 {
2270 return make_new_set(&PySet_Type, iterable);
2271 }
2272
2273 PyObject *
PyFrozenSet_New(PyObject * iterable)2274 PyFrozenSet_New(PyObject *iterable)
2275 {
2276 return make_new_set(&PyFrozenSet_Type, iterable);
2277 }
2278
2279 Py_ssize_t
PySet_Size(PyObject * anyset)2280 PySet_Size(PyObject *anyset)
2281 {
2282 if (!PyAnySet_Check(anyset)) {
2283 PyErr_BadInternalCall();
2284 return -1;
2285 }
2286 return PySet_GET_SIZE(anyset);
2287 }
2288
2289 int
PySet_Clear(PyObject * set)2290 PySet_Clear(PyObject *set)
2291 {
2292 if (!PySet_Check(set)) {
2293 PyErr_BadInternalCall();
2294 return -1;
2295 }
2296 return set_clear_internal((PySetObject *)set);
2297 }
2298
2299 int
PySet_Contains(PyObject * anyset,PyObject * key)2300 PySet_Contains(PyObject *anyset, PyObject *key)
2301 {
2302 if (!PyAnySet_Check(anyset)) {
2303 PyErr_BadInternalCall();
2304 return -1;
2305 }
2306 return set_contains_key((PySetObject *)anyset, key);
2307 }
2308
2309 int
PySet_Discard(PyObject * set,PyObject * key)2310 PySet_Discard(PyObject *set, PyObject *key)
2311 {
2312 if (!PySet_Check(set)) {
2313 PyErr_BadInternalCall();
2314 return -1;
2315 }
2316 return set_discard_key((PySetObject *)set, key);
2317 }
2318
2319 int
PySet_Add(PyObject * anyset,PyObject * key)2320 PySet_Add(PyObject *anyset, PyObject *key)
2321 {
2322 if (!PySet_Check(anyset) &&
2323 (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2324 PyErr_BadInternalCall();
2325 return -1;
2326 }
2327 return set_add_key((PySetObject *)anyset, key);
2328 }
2329
2330 int
_PySet_Next(PyObject * set,Py_ssize_t * pos,PyObject ** key)2331 _PySet_Next(PyObject *set, Py_ssize_t *pos, PyObject **key)
2332 {
2333 setentry *entry_ptr;
2334
2335 if (!PyAnySet_Check(set)) {
2336 PyErr_BadInternalCall();
2337 return -1;
2338 }
2339 if (set_next((PySetObject *)set, pos, &entry_ptr) == 0)
2340 return 0;
2341 *key = entry_ptr->key;
2342 return 1;
2343 }
2344
2345 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,long * hash)2346 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, long *hash)
2347 {
2348 setentry *entry;
2349
2350 if (!PyAnySet_Check(set)) {
2351 PyErr_BadInternalCall();
2352 return -1;
2353 }
2354 if (set_next((PySetObject *)set, pos, &entry) == 0)
2355 return 0;
2356 *key = entry->key;
2357 *hash = entry->hash;
2358 return 1;
2359 }
2360
2361 PyObject *
PySet_Pop(PyObject * set)2362 PySet_Pop(PyObject *set)
2363 {
2364 if (!PySet_Check(set)) {
2365 PyErr_BadInternalCall();
2366 return NULL;
2367 }
2368 return set_pop((PySetObject *)set);
2369 }
2370
2371 int
_PySet_Update(PyObject * set,PyObject * iterable)2372 _PySet_Update(PyObject *set, PyObject *iterable)
2373 {
2374 if (!PySet_Check(set)) {
2375 PyErr_BadInternalCall();
2376 return -1;
2377 }
2378 return set_update_internal((PySetObject *)set, iterable);
2379 }
2380
2381 #ifdef Py_DEBUG
2382
2383 /* Test code to be called with any three element set.
2384 Returns True and original set is restored. */
2385
2386 #define assertRaises(call_return_value, exception) \
2387 do { \
2388 assert(call_return_value); \
2389 assert(PyErr_ExceptionMatches(exception)); \
2390 PyErr_Clear(); \
2391 } while(0)
2392
2393 static PyObject *
test_c_api(PySetObject * so)2394 test_c_api(PySetObject *so)
2395 {
2396 Py_ssize_t count;
2397 char *s;
2398 Py_ssize_t i;
2399 PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x;
2400 PyObject *ob = (PyObject *)so;
2401 PyObject *str;
2402
2403 /* Verify preconditions */
2404 assert(PyAnySet_Check(ob));
2405 assert(PyAnySet_CheckExact(ob));
2406 assert(!PyFrozenSet_CheckExact(ob));
2407
2408 /* so.clear(); so |= set("abc"); */
2409 str = PyString_FromString("abc");
2410 if (str == NULL)
2411 return NULL;
2412 set_clear_internal(so);
2413 if (set_update_internal(so, str) == -1) {
2414 Py_DECREF(str);
2415 return NULL;
2416 }
2417 Py_DECREF(str);
2418
2419 /* Exercise type/size checks */
2420 assert(PySet_Size(ob) == 3);
2421 assert(PySet_GET_SIZE(ob) == 3);
2422
2423 /* Raise TypeError for non-iterable constructor arguments */
2424 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2425 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2426
2427 /* Raise TypeError for unhashable key */
2428 dup = PySet_New(ob);
2429 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2430 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2431 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2432
2433 /* Exercise successful pop, contains, add, and discard */
2434 elem = PySet_Pop(ob);
2435 assert(PySet_Contains(ob, elem) == 0);
2436 assert(PySet_GET_SIZE(ob) == 2);
2437 assert(PySet_Add(ob, elem) == 0);
2438 assert(PySet_Contains(ob, elem) == 1);
2439 assert(PySet_GET_SIZE(ob) == 3);
2440 assert(PySet_Discard(ob, elem) == 1);
2441 assert(PySet_GET_SIZE(ob) == 2);
2442 assert(PySet_Discard(ob, elem) == 0);
2443 assert(PySet_GET_SIZE(ob) == 2);
2444
2445 /* Exercise clear */
2446 dup2 = PySet_New(dup);
2447 assert(PySet_Clear(dup2) == 0);
2448 assert(PySet_Size(dup2) == 0);
2449 Py_DECREF(dup2);
2450
2451 /* Raise SystemError on clear or update of frozen set */
2452 f = PyFrozenSet_New(dup);
2453 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2454 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2455 assert(PySet_Add(f, elem) == 0);
2456 Py_INCREF(f);
2457 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2458 Py_DECREF(f);
2459 Py_DECREF(f);
2460
2461 /* Exercise direct iteration */
2462 i = 0, count = 0;
2463 while (_PySet_Next((PyObject *)dup, &i, &x)) {
2464 s = PyString_AsString(x);
2465 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2466 count++;
2467 }
2468 assert(count == 3);
2469
2470 /* Exercise updates */
2471 dup2 = PySet_New(NULL);
2472 assert(_PySet_Update(dup2, dup) == 0);
2473 assert(PySet_Size(dup2) == 3);
2474 assert(_PySet_Update(dup2, dup) == 0);
2475 assert(PySet_Size(dup2) == 3);
2476 Py_DECREF(dup2);
2477
2478 /* Raise SystemError when self argument is not a set or frozenset. */
2479 t = PyTuple_New(0);
2480 assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2481 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2482 Py_DECREF(t);
2483
2484 /* Raise SystemError when self argument is not a set. */
2485 f = PyFrozenSet_New(dup);
2486 assert(PySet_Size(f) == 3);
2487 assert(PyFrozenSet_CheckExact(f));
2488 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2489 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2490 Py_DECREF(f);
2491
2492 /* Raise KeyError when popping from an empty set */
2493 assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2494 Py_DECREF(ob);
2495 assert(PySet_GET_SIZE(ob) == 0);
2496 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2497
2498 /* Restore the set from the copy using the PyNumber API */
2499 assert(PyNumber_InPlaceOr(ob, dup) == ob);
2500 Py_DECREF(ob);
2501
2502 /* Verify constructors accept NULL arguments */
2503 f = PySet_New(NULL);
2504 assert(f != NULL);
2505 assert(PySet_GET_SIZE(f) == 0);
2506 Py_DECREF(f);
2507 f = PyFrozenSet_New(NULL);
2508 assert(f != NULL);
2509 assert(PyFrozenSet_CheckExact(f));
2510 assert(PySet_GET_SIZE(f) == 0);
2511 Py_DECREF(f);
2512
2513 Py_DECREF(elem);
2514 Py_DECREF(dup);
2515 Py_RETURN_TRUE;
2516 }
2517
2518 #undef assertRaises
2519
2520 #endif
2521