1
2 /* set object implementation
3
4 Written and maintained by Raymond D. Hettinger <python@rcn.com>
5 Derived from Lib/sets.py and Objects/dictobject.c.
6
7 The basic lookup function used by all operations.
8 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9
10 The initial probe index is computed as hash mod the table size.
11 Subsequent probe indices are computed as explained in Objects/dictobject.c.
12
13 To improve cache locality, each probe inspects a series of consecutive
14 nearby entries before moving on to probes elsewhere in memory. This leaves
15 us with a hybrid of linear probing and randomized probing. The linear probing
16 reduces the cost of hash collisions because consecutive memory accesses
17 tend to be much cheaper than scattered probes. After LINEAR_PROBES steps,
18 we then use more of the upper bits from the hash value and apply a simple
19 linear congruential random number genearator. This helps break-up long
20 chains of collisions.
21
22 All arithmetic on hash should ignore overflow.
23
24 Unlike the dictionary implementation, the lookkey function can return
25 NULL if the rich comparison returns an error.
26
27 Use cases for sets differ considerably from dictionaries where looked-up
28 keys are more likely to be present. In contrast, sets are primarily
29 about membership testing where the presence of an element is not known in
30 advance. Accordingly, the set implementation needs to optimize for both
31 the found and not-found case.
32 */
33
34 #include "Python.h"
35 #include "internal/pystate.h"
36 #include "structmember.h"
37
38 /* Object used as dummy key to fill deleted entries */
39 static PyObject _dummy_struct;
40
41 #define dummy (&_dummy_struct)
42
43
44 /* ======================================================================== */
45 /* ======= Begin logic for probing the hash table ========================= */
46
47 /* Set this to zero to turn-off linear probing */
48 #ifndef LINEAR_PROBES
49 #define LINEAR_PROBES 9
50 #endif
51
52 /* This must be >= 1 */
53 #define PERTURB_SHIFT 5
54
55 static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)56 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
57 {
58 setentry *table;
59 setentry *entry;
60 size_t perturb;
61 size_t mask = so->mask;
62 size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
63 size_t j;
64 int cmp;
65
66 entry = &so->table[i];
67 if (entry->key == NULL)
68 return entry;
69
70 perturb = hash;
71
72 while (1) {
73 if (entry->hash == hash) {
74 PyObject *startkey = entry->key;
75 /* startkey cannot be a dummy because the dummy hash field is -1 */
76 assert(startkey != dummy);
77 if (startkey == key)
78 return entry;
79 if (PyUnicode_CheckExact(startkey)
80 && PyUnicode_CheckExact(key)
81 && _PyUnicode_EQ(startkey, key))
82 return entry;
83 table = so->table;
84 Py_INCREF(startkey);
85 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
86 Py_DECREF(startkey);
87 if (cmp < 0) /* unlikely */
88 return NULL;
89 if (table != so->table || entry->key != startkey) /* unlikely */
90 return set_lookkey(so, key, hash);
91 if (cmp > 0) /* likely */
92 return entry;
93 mask = so->mask; /* help avoid a register spill */
94 }
95
96 if (i + LINEAR_PROBES <= mask) {
97 for (j = 0 ; j < LINEAR_PROBES ; j++) {
98 entry++;
99 if (entry->hash == 0 && entry->key == NULL)
100 return entry;
101 if (entry->hash == hash) {
102 PyObject *startkey = entry->key;
103 assert(startkey != dummy);
104 if (startkey == key)
105 return entry;
106 if (PyUnicode_CheckExact(startkey)
107 && PyUnicode_CheckExact(key)
108 && _PyUnicode_EQ(startkey, key))
109 return entry;
110 table = so->table;
111 Py_INCREF(startkey);
112 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
113 Py_DECREF(startkey);
114 if (cmp < 0)
115 return NULL;
116 if (table != so->table || entry->key != startkey)
117 return set_lookkey(so, key, hash);
118 if (cmp > 0)
119 return entry;
120 mask = so->mask;
121 }
122 }
123 }
124
125 perturb >>= PERTURB_SHIFT;
126 i = (i * 5 + 1 + perturb) & mask;
127
128 entry = &so->table[i];
129 if (entry->key == NULL)
130 return entry;
131 }
132 }
133
134 static int set_table_resize(PySetObject *, Py_ssize_t);
135
136 static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)137 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
138 {
139 setentry *table;
140 setentry *freeslot;
141 setentry *entry;
142 size_t perturb;
143 size_t mask;
144 size_t i; /* Unsigned for defined overflow behavior */
145 size_t j;
146 int cmp;
147
148 /* Pre-increment is necessary to prevent arbitrary code in the rich
149 comparison from deallocating the key just before the insertion. */
150 Py_INCREF(key);
151
152 restart:
153
154 mask = so->mask;
155 i = (size_t)hash & mask;
156
157 entry = &so->table[i];
158 if (entry->key == NULL)
159 goto found_unused;
160
161 freeslot = NULL;
162 perturb = hash;
163
164 while (1) {
165 if (entry->hash == hash) {
166 PyObject *startkey = entry->key;
167 /* startkey cannot be a dummy because the dummy hash field is -1 */
168 assert(startkey != dummy);
169 if (startkey == key)
170 goto found_active;
171 if (PyUnicode_CheckExact(startkey)
172 && PyUnicode_CheckExact(key)
173 && _PyUnicode_EQ(startkey, key))
174 goto found_active;
175 table = so->table;
176 Py_INCREF(startkey);
177 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
178 Py_DECREF(startkey);
179 if (cmp > 0) /* likely */
180 goto found_active;
181 if (cmp < 0)
182 goto comparison_error;
183 /* Continuing the search from the current entry only makes
184 sense if the table and entry are unchanged; otherwise,
185 we have to restart from the beginning */
186 if (table != so->table || entry->key != startkey)
187 goto restart;
188 mask = so->mask; /* help avoid a register spill */
189 }
190 else if (entry->hash == -1)
191 freeslot = entry;
192
193 if (i + LINEAR_PROBES <= mask) {
194 for (j = 0 ; j < LINEAR_PROBES ; j++) {
195 entry++;
196 if (entry->hash == 0 && entry->key == NULL)
197 goto found_unused_or_dummy;
198 if (entry->hash == hash) {
199 PyObject *startkey = entry->key;
200 assert(startkey != dummy);
201 if (startkey == key)
202 goto found_active;
203 if (PyUnicode_CheckExact(startkey)
204 && PyUnicode_CheckExact(key)
205 && _PyUnicode_EQ(startkey, key))
206 goto found_active;
207 table = so->table;
208 Py_INCREF(startkey);
209 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
210 Py_DECREF(startkey);
211 if (cmp > 0)
212 goto found_active;
213 if (cmp < 0)
214 goto comparison_error;
215 if (table != so->table || entry->key != startkey)
216 goto restart;
217 mask = so->mask;
218 }
219 else if (entry->hash == -1)
220 freeslot = entry;
221 }
222 }
223
224 perturb >>= PERTURB_SHIFT;
225 i = (i * 5 + 1 + perturb) & mask;
226
227 entry = &so->table[i];
228 if (entry->key == NULL)
229 goto found_unused_or_dummy;
230 }
231
232 found_unused_or_dummy:
233 if (freeslot == NULL)
234 goto found_unused;
235 so->used++;
236 freeslot->key = key;
237 freeslot->hash = hash;
238 return 0;
239
240 found_unused:
241 so->fill++;
242 so->used++;
243 entry->key = key;
244 entry->hash = hash;
245 if ((size_t)so->fill*5 < mask*3)
246 return 0;
247 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
248
249 found_active:
250 Py_DECREF(key);
251 return 0;
252
253 comparison_error:
254 Py_DECREF(key);
255 return -1;
256 }
257
258 /*
259 Internal routine used by set_table_resize() to insert an item which is
260 known to be absent from the set. This routine also assumes that
261 the set contains no deleted entries. Besides the performance benefit,
262 there is also safety benefit since using set_add_entry() risks making
263 a callback in the middle of a set_table_resize(), see issue 1456209.
264 The caller is responsible for updating the key's reference count and
265 the setobject's fill and used fields.
266 */
267 static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)268 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
269 {
270 setentry *entry;
271 size_t perturb = hash;
272 size_t i = (size_t)hash & mask;
273 size_t j;
274
275 while (1) {
276 entry = &table[i];
277 if (entry->key == NULL)
278 goto found_null;
279 if (i + LINEAR_PROBES <= mask) {
280 for (j = 0; j < LINEAR_PROBES; j++) {
281 entry++;
282 if (entry->key == NULL)
283 goto found_null;
284 }
285 }
286 perturb >>= PERTURB_SHIFT;
287 i = (i * 5 + 1 + perturb) & mask;
288 }
289 found_null:
290 entry->key = key;
291 entry->hash = hash;
292 }
293
294 /* ======== End logic for probing the hash table ========================== */
295 /* ======================================================================== */
296
297 /*
298 Restructure the table by allocating a new table and reinserting all
299 keys again. When entries have been deleted, the new table may
300 actually be smaller than the old one.
301 */
302 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)303 set_table_resize(PySetObject *so, Py_ssize_t minused)
304 {
305 setentry *oldtable, *newtable, *entry;
306 Py_ssize_t oldmask = so->mask;
307 size_t newmask;
308 int is_oldtable_malloced;
309 setentry small_copy[PySet_MINSIZE];
310
311 assert(minused >= 0);
312
313 /* Find the smallest table size > minused. */
314 /* XXX speed-up with intrinsics */
315 size_t newsize = PySet_MINSIZE;
316 while (newsize <= (size_t)minused) {
317 newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1.
318 }
319
320 /* Get space for a new table. */
321 oldtable = so->table;
322 assert(oldtable != NULL);
323 is_oldtable_malloced = oldtable != so->smalltable;
324
325 if (newsize == PySet_MINSIZE) {
326 /* A large table is shrinking, or we can't get any smaller. */
327 newtable = so->smalltable;
328 if (newtable == oldtable) {
329 if (so->fill == so->used) {
330 /* No dummies, so no point doing anything. */
331 return 0;
332 }
333 /* We're not going to resize it, but rebuild the
334 table anyway to purge old dummy entries.
335 Subtle: This is *necessary* if fill==size,
336 as set_lookkey needs at least one virgin slot to
337 terminate failing searches. If fill < size, it's
338 merely desirable, as dummies slow searches. */
339 assert(so->fill > so->used);
340 memcpy(small_copy, oldtable, sizeof(small_copy));
341 oldtable = small_copy;
342 }
343 }
344 else {
345 newtable = PyMem_NEW(setentry, newsize);
346 if (newtable == NULL) {
347 PyErr_NoMemory();
348 return -1;
349 }
350 }
351
352 /* Make the set empty, using the new table. */
353 assert(newtable != oldtable);
354 memset(newtable, 0, sizeof(setentry) * newsize);
355 so->mask = newsize - 1;
356 so->table = newtable;
357
358 /* Copy the data over; this is refcount-neutral for active entries;
359 dummy entries aren't copied over, of course */
360 newmask = (size_t)so->mask;
361 if (so->fill == so->used) {
362 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
363 if (entry->key != NULL) {
364 set_insert_clean(newtable, newmask, entry->key, entry->hash);
365 }
366 }
367 } else {
368 so->fill = so->used;
369 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
370 if (entry->key != NULL && entry->key != dummy) {
371 set_insert_clean(newtable, newmask, entry->key, entry->hash);
372 }
373 }
374 }
375
376 if (is_oldtable_malloced)
377 PyMem_DEL(oldtable);
378 return 0;
379 }
380
381 static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)382 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
383 {
384 setentry *entry;
385
386 entry = set_lookkey(so, key, hash);
387 if (entry != NULL)
388 return entry->key != NULL;
389 return -1;
390 }
391
392 #define DISCARD_NOTFOUND 0
393 #define DISCARD_FOUND 1
394
395 static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)396 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
397 {
398 setentry *entry;
399 PyObject *old_key;
400
401 entry = set_lookkey(so, key, hash);
402 if (entry == NULL)
403 return -1;
404 if (entry->key == NULL)
405 return DISCARD_NOTFOUND;
406 old_key = entry->key;
407 entry->key = dummy;
408 entry->hash = -1;
409 so->used--;
410 Py_DECREF(old_key);
411 return DISCARD_FOUND;
412 }
413
414 static int
set_add_key(PySetObject * so,PyObject * key)415 set_add_key(PySetObject *so, PyObject *key)
416 {
417 Py_hash_t hash;
418
419 if (!PyUnicode_CheckExact(key) ||
420 (hash = ((PyASCIIObject *) key)->hash) == -1) {
421 hash = PyObject_Hash(key);
422 if (hash == -1)
423 return -1;
424 }
425 return set_add_entry(so, key, hash);
426 }
427
428 static int
set_contains_key(PySetObject * so,PyObject * key)429 set_contains_key(PySetObject *so, PyObject *key)
430 {
431 Py_hash_t hash;
432
433 if (!PyUnicode_CheckExact(key) ||
434 (hash = ((PyASCIIObject *) key)->hash) == -1) {
435 hash = PyObject_Hash(key);
436 if (hash == -1)
437 return -1;
438 }
439 return set_contains_entry(so, key, hash);
440 }
441
442 static int
set_discard_key(PySetObject * so,PyObject * key)443 set_discard_key(PySetObject *so, PyObject *key)
444 {
445 Py_hash_t hash;
446
447 if (!PyUnicode_CheckExact(key) ||
448 (hash = ((PyASCIIObject *) key)->hash) == -1) {
449 hash = PyObject_Hash(key);
450 if (hash == -1)
451 return -1;
452 }
453 return set_discard_entry(so, key, hash);
454 }
455
456 static void
set_empty_to_minsize(PySetObject * so)457 set_empty_to_minsize(PySetObject *so)
458 {
459 memset(so->smalltable, 0, sizeof(so->smalltable));
460 so->fill = 0;
461 so->used = 0;
462 so->mask = PySet_MINSIZE - 1;
463 so->table = so->smalltable;
464 so->hash = -1;
465 }
466
467 static int
set_clear_internal(PySetObject * so)468 set_clear_internal(PySetObject *so)
469 {
470 setentry *entry;
471 setentry *table = so->table;
472 Py_ssize_t fill = so->fill;
473 Py_ssize_t used = so->used;
474 int table_is_malloced = table != so->smalltable;
475 setentry small_copy[PySet_MINSIZE];
476
477 assert (PyAnySet_Check(so));
478 assert(table != NULL);
479
480 /* This is delicate. During the process of clearing the set,
481 * decrefs can cause the set to mutate. To avoid fatal confusion
482 * (voice of experience), we have to make the set empty before
483 * clearing the slots, and never refer to anything via so->ref while
484 * clearing.
485 */
486 if (table_is_malloced)
487 set_empty_to_minsize(so);
488
489 else if (fill > 0) {
490 /* It's a small table with something that needs to be cleared.
491 * Afraid the only safe way is to copy the set entries into
492 * another small table first.
493 */
494 memcpy(small_copy, table, sizeof(small_copy));
495 table = small_copy;
496 set_empty_to_minsize(so);
497 }
498 /* else it's a small table that's already empty */
499
500 /* Now we can finally clear things. If C had refcounts, we could
501 * assert that the refcount on table is 1 now, i.e. that this function
502 * has unique access to it, so decref side-effects can't alter it.
503 */
504 for (entry = table; used > 0; entry++) {
505 if (entry->key && entry->key != dummy) {
506 used--;
507 Py_DECREF(entry->key);
508 }
509 }
510
511 if (table_is_malloced)
512 PyMem_DEL(table);
513 return 0;
514 }
515
516 /*
517 * Iterate over a set table. Use like so:
518 *
519 * Py_ssize_t pos;
520 * setentry *entry;
521 * pos = 0; # important! pos should not otherwise be changed by you
522 * while (set_next(yourset, &pos, &entry)) {
523 * Refer to borrowed reference in entry->key.
524 * }
525 *
526 * CAUTION: In general, it isn't safe to use set_next in a loop that
527 * mutates the table.
528 */
529 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)530 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
531 {
532 Py_ssize_t i;
533 Py_ssize_t mask;
534 setentry *entry;
535
536 assert (PyAnySet_Check(so));
537 i = *pos_ptr;
538 assert(i >= 0);
539 mask = so->mask;
540 entry = &so->table[i];
541 while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
542 i++;
543 entry++;
544 }
545 *pos_ptr = i+1;
546 if (i > mask)
547 return 0;
548 assert(entry != NULL);
549 *entry_ptr = entry;
550 return 1;
551 }
552
553 static void
set_dealloc(PySetObject * so)554 set_dealloc(PySetObject *so)
555 {
556 setentry *entry;
557 Py_ssize_t used = so->used;
558
559 /* bpo-31095: UnTrack is needed before calling any callbacks */
560 PyObject_GC_UnTrack(so);
561 Py_TRASHCAN_SAFE_BEGIN(so)
562 if (so->weakreflist != NULL)
563 PyObject_ClearWeakRefs((PyObject *) so);
564
565 for (entry = so->table; used > 0; entry++) {
566 if (entry->key && entry->key != dummy) {
567 used--;
568 Py_DECREF(entry->key);
569 }
570 }
571 if (so->table != so->smalltable)
572 PyMem_DEL(so->table);
573 Py_TYPE(so)->tp_free(so);
574 Py_TRASHCAN_SAFE_END(so)
575 }
576
577 static PyObject *
set_repr(PySetObject * so)578 set_repr(PySetObject *so)
579 {
580 PyObject *result=NULL, *keys, *listrepr, *tmp;
581 int status = Py_ReprEnter((PyObject*)so);
582
583 if (status != 0) {
584 if (status < 0)
585 return NULL;
586 return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
587 }
588
589 /* shortcut for the empty set */
590 if (!so->used) {
591 Py_ReprLeave((PyObject*)so);
592 return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
593 }
594
595 keys = PySequence_List((PyObject *)so);
596 if (keys == NULL)
597 goto done;
598
599 /* repr(keys)[1:-1] */
600 listrepr = PyObject_Repr(keys);
601 Py_DECREF(keys);
602 if (listrepr == NULL)
603 goto done;
604 tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
605 Py_DECREF(listrepr);
606 if (tmp == NULL)
607 goto done;
608 listrepr = tmp;
609
610 if (Py_TYPE(so) != &PySet_Type)
611 result = PyUnicode_FromFormat("%s({%U})",
612 Py_TYPE(so)->tp_name,
613 listrepr);
614 else
615 result = PyUnicode_FromFormat("{%U}", listrepr);
616 Py_DECREF(listrepr);
617 done:
618 Py_ReprLeave((PyObject*)so);
619 return result;
620 }
621
622 static Py_ssize_t
set_len(PyObject * so)623 set_len(PyObject *so)
624 {
625 return ((PySetObject *)so)->used;
626 }
627
628 static int
set_merge(PySetObject * so,PyObject * otherset)629 set_merge(PySetObject *so, PyObject *otherset)
630 {
631 PySetObject *other;
632 PyObject *key;
633 Py_ssize_t i;
634 setentry *so_entry;
635 setentry *other_entry;
636
637 assert (PyAnySet_Check(so));
638 assert (PyAnySet_Check(otherset));
639
640 other = (PySetObject*)otherset;
641 if (other == so || other->used == 0)
642 /* a.update(a) or a.update(set()); nothing to do */
643 return 0;
644 /* Do one big resize at the start, rather than
645 * incrementally resizing as we insert new keys. Expect
646 * that there will be no (or few) overlapping keys.
647 */
648 if ((so->fill + other->used)*5 >= so->mask*3) {
649 if (set_table_resize(so, (so->used + other->used)*2) != 0)
650 return -1;
651 }
652 so_entry = so->table;
653 other_entry = other->table;
654
655 /* If our table is empty, and both tables have the same size, and
656 there are no dummies to eliminate, then just copy the pointers. */
657 if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
658 for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
659 key = other_entry->key;
660 if (key != NULL) {
661 assert(so_entry->key == NULL);
662 Py_INCREF(key);
663 so_entry->key = key;
664 so_entry->hash = other_entry->hash;
665 }
666 }
667 so->fill = other->fill;
668 so->used = other->used;
669 return 0;
670 }
671
672 /* If our table is empty, we can use set_insert_clean() */
673 if (so->fill == 0) {
674 setentry *newtable = so->table;
675 size_t newmask = (size_t)so->mask;
676 so->fill = other->used;
677 so->used = other->used;
678 for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
679 key = other_entry->key;
680 if (key != NULL && key != dummy) {
681 Py_INCREF(key);
682 set_insert_clean(newtable, newmask, key, other_entry->hash);
683 }
684 }
685 return 0;
686 }
687
688 /* We can't assure there are no duplicates, so do normal insertions */
689 for (i = 0; i <= other->mask; i++) {
690 other_entry = &other->table[i];
691 key = other_entry->key;
692 if (key != NULL && key != dummy) {
693 if (set_add_entry(so, key, other_entry->hash))
694 return -1;
695 }
696 }
697 return 0;
698 }
699
700 static PyObject *
set_pop(PySetObject * so)701 set_pop(PySetObject *so)
702 {
703 /* Make sure the search finger is in bounds */
704 Py_ssize_t i = so->finger & so->mask;
705 setentry *entry;
706 PyObject *key;
707
708 assert (PyAnySet_Check(so));
709 if (so->used == 0) {
710 PyErr_SetString(PyExc_KeyError, "pop from an empty set");
711 return NULL;
712 }
713
714 while ((entry = &so->table[i])->key == NULL || entry->key==dummy) {
715 i++;
716 if (i > so->mask)
717 i = 0;
718 }
719 key = entry->key;
720 entry->key = dummy;
721 entry->hash = -1;
722 so->used--;
723 so->finger = i + 1; /* next place to start */
724 return key;
725 }
726
727 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
728 Raises KeyError if the set is empty.");
729
730 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)731 set_traverse(PySetObject *so, visitproc visit, void *arg)
732 {
733 Py_ssize_t pos = 0;
734 setentry *entry;
735
736 while (set_next(so, &pos, &entry))
737 Py_VISIT(entry->key);
738 return 0;
739 }
740
741 /* Work to increase the bit dispersion for closely spaced hash values.
742 This is important because some use cases have many combinations of a
743 small number of elements with nearby hashes so that many distinct
744 combinations collapse to only a handful of distinct hash values. */
745
746 static Py_uhash_t
_shuffle_bits(Py_uhash_t h)747 _shuffle_bits(Py_uhash_t h)
748 {
749 return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
750 }
751
752 /* Most of the constants in this hash algorithm are randomly chosen
753 large primes with "interesting bit patterns" and that passed tests
754 for good collision statistics on a variety of problematic datasets
755 including powersets and graph structures (such as David Eppstein's
756 graph recipes in Lib/test/test_set.py) */
757
758 static Py_hash_t
frozenset_hash(PyObject * self)759 frozenset_hash(PyObject *self)
760 {
761 PySetObject *so = (PySetObject *)self;
762 Py_uhash_t hash = 0;
763 setentry *entry;
764
765 if (so->hash != -1)
766 return so->hash;
767
768 /* Xor-in shuffled bits from every entry's hash field because xor is
769 commutative and a frozenset hash should be independent of order.
770
771 For speed, include null entries and dummy entries and then
772 subtract out their effect afterwards so that the final hash
773 depends only on active entries. This allows the code to be
774 vectorized by the compiler and it saves the unpredictable
775 branches that would arise when trying to exclude null and dummy
776 entries on every iteration. */
777
778 for (entry = so->table; entry <= &so->table[so->mask]; entry++)
779 hash ^= _shuffle_bits(entry->hash);
780
781 /* Remove the effect of an odd number of NULL entries */
782 if ((so->mask + 1 - so->fill) & 1)
783 hash ^= _shuffle_bits(0);
784
785 /* Remove the effect of an odd number of dummy entries */
786 if ((so->fill - so->used) & 1)
787 hash ^= _shuffle_bits(-1);
788
789 /* Factor in the number of active entries */
790 hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
791
792 /* Disperse patterns arising in nested frozensets */
793 hash ^= (hash >> 11) ^ (hash >> 25);
794 hash = hash * 69069U + 907133923UL;
795
796 /* -1 is reserved as an error code */
797 if (hash == (Py_uhash_t)-1)
798 hash = 590923713UL;
799
800 so->hash = hash;
801 return hash;
802 }
803
804 /***** Set iterator type ***********************************************/
805
806 typedef struct {
807 PyObject_HEAD
808 PySetObject *si_set; /* Set to NULL when iterator is exhausted */
809 Py_ssize_t si_used;
810 Py_ssize_t si_pos;
811 Py_ssize_t len;
812 } setiterobject;
813
814 static void
setiter_dealloc(setiterobject * si)815 setiter_dealloc(setiterobject *si)
816 {
817 /* bpo-31095: UnTrack is needed before calling any callbacks */
818 _PyObject_GC_UNTRACK(si);
819 Py_XDECREF(si->si_set);
820 PyObject_GC_Del(si);
821 }
822
823 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)824 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
825 {
826 Py_VISIT(si->si_set);
827 return 0;
828 }
829
830 static PyObject *
setiter_len(setiterobject * si)831 setiter_len(setiterobject *si)
832 {
833 Py_ssize_t len = 0;
834 if (si->si_set != NULL && si->si_used == si->si_set->used)
835 len = si->len;
836 return PyLong_FromSsize_t(len);
837 }
838
839 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
840
841 static PyObject *setiter_iternext(setiterobject *si);
842
843 static PyObject *
setiter_reduce(setiterobject * si)844 setiter_reduce(setiterobject *si)
845 {
846 PyObject *list;
847 setiterobject tmp;
848
849 list = PyList_New(0);
850 if (!list)
851 return NULL;
852
853 /* copy the iterator state */
854 tmp = *si;
855 Py_XINCREF(tmp.si_set);
856
857 /* iterate the temporary into a list */
858 for(;;) {
859 PyObject *element = setiter_iternext(&tmp);
860 if (element) {
861 if (PyList_Append(list, element)) {
862 Py_DECREF(element);
863 Py_DECREF(list);
864 Py_XDECREF(tmp.si_set);
865 return NULL;
866 }
867 Py_DECREF(element);
868 } else
869 break;
870 }
871 Py_XDECREF(tmp.si_set);
872 /* check for error */
873 if (tmp.si_set != NULL) {
874 /* we have an error */
875 Py_DECREF(list);
876 return NULL;
877 }
878 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), list);
879 }
880
881 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
882
883 static PyMethodDef setiter_methods[] = {
884 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
885 {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
886 {NULL, NULL} /* sentinel */
887 };
888
setiter_iternext(setiterobject * si)889 static PyObject *setiter_iternext(setiterobject *si)
890 {
891 PyObject *key;
892 Py_ssize_t i, mask;
893 setentry *entry;
894 PySetObject *so = si->si_set;
895
896 if (so == NULL)
897 return NULL;
898 assert (PyAnySet_Check(so));
899
900 if (si->si_used != so->used) {
901 PyErr_SetString(PyExc_RuntimeError,
902 "Set changed size during iteration");
903 si->si_used = -1; /* Make this state sticky */
904 return NULL;
905 }
906
907 i = si->si_pos;
908 assert(i>=0);
909 entry = so->table;
910 mask = so->mask;
911 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
912 i++;
913 si->si_pos = i+1;
914 if (i > mask)
915 goto fail;
916 si->len--;
917 key = entry[i].key;
918 Py_INCREF(key);
919 return key;
920
921 fail:
922 si->si_set = NULL;
923 Py_DECREF(so);
924 return NULL;
925 }
926
927 PyTypeObject PySetIter_Type = {
928 PyVarObject_HEAD_INIT(&PyType_Type, 0)
929 "set_iterator", /* tp_name */
930 sizeof(setiterobject), /* tp_basicsize */
931 0, /* tp_itemsize */
932 /* methods */
933 (destructor)setiter_dealloc, /* tp_dealloc */
934 0, /* tp_print */
935 0, /* tp_getattr */
936 0, /* tp_setattr */
937 0, /* tp_reserved */
938 0, /* tp_repr */
939 0, /* tp_as_number */
940 0, /* tp_as_sequence */
941 0, /* tp_as_mapping */
942 0, /* tp_hash */
943 0, /* tp_call */
944 0, /* tp_str */
945 PyObject_GenericGetAttr, /* tp_getattro */
946 0, /* tp_setattro */
947 0, /* tp_as_buffer */
948 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
949 0, /* tp_doc */
950 (traverseproc)setiter_traverse, /* tp_traverse */
951 0, /* tp_clear */
952 0, /* tp_richcompare */
953 0, /* tp_weaklistoffset */
954 PyObject_SelfIter, /* tp_iter */
955 (iternextfunc)setiter_iternext, /* tp_iternext */
956 setiter_methods, /* tp_methods */
957 0,
958 };
959
960 static PyObject *
set_iter(PySetObject * so)961 set_iter(PySetObject *so)
962 {
963 setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
964 if (si == NULL)
965 return NULL;
966 Py_INCREF(so);
967 si->si_set = so;
968 si->si_used = so->used;
969 si->si_pos = 0;
970 si->len = so->used;
971 _PyObject_GC_TRACK(si);
972 return (PyObject *)si;
973 }
974
975 static int
set_update_internal(PySetObject * so,PyObject * other)976 set_update_internal(PySetObject *so, PyObject *other)
977 {
978 PyObject *key, *it;
979
980 if (PyAnySet_Check(other))
981 return set_merge(so, other);
982
983 if (PyDict_CheckExact(other)) {
984 PyObject *value;
985 Py_ssize_t pos = 0;
986 Py_hash_t hash;
987 Py_ssize_t dictsize = PyDict_GET_SIZE(other);
988
989 /* Do one big resize at the start, rather than
990 * incrementally resizing as we insert new keys. Expect
991 * that there will be no (or few) overlapping keys.
992 */
993 if (dictsize < 0)
994 return -1;
995 if ((so->fill + dictsize)*5 >= so->mask*3) {
996 if (set_table_resize(so, (so->used + dictsize)*2) != 0)
997 return -1;
998 }
999 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1000 if (set_add_entry(so, key, hash))
1001 return -1;
1002 }
1003 return 0;
1004 }
1005
1006 it = PyObject_GetIter(other);
1007 if (it == NULL)
1008 return -1;
1009
1010 while ((key = PyIter_Next(it)) != NULL) {
1011 if (set_add_key(so, key)) {
1012 Py_DECREF(it);
1013 Py_DECREF(key);
1014 return -1;
1015 }
1016 Py_DECREF(key);
1017 }
1018 Py_DECREF(it);
1019 if (PyErr_Occurred())
1020 return -1;
1021 return 0;
1022 }
1023
1024 static PyObject *
set_update(PySetObject * so,PyObject * args)1025 set_update(PySetObject *so, PyObject *args)
1026 {
1027 Py_ssize_t i;
1028
1029 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1030 PyObject *other = PyTuple_GET_ITEM(args, i);
1031 if (set_update_internal(so, other))
1032 return NULL;
1033 }
1034 Py_RETURN_NONE;
1035 }
1036
1037 PyDoc_STRVAR(update_doc,
1038 "Update a set with the union of itself and others.");
1039
1040 /* XXX Todo:
1041 If aligned memory allocations become available, make the
1042 set object 64 byte aligned so that most of the fields
1043 can be retrieved or updated in a single cache line.
1044 */
1045
1046 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)1047 make_new_set(PyTypeObject *type, PyObject *iterable)
1048 {
1049 PySetObject *so;
1050
1051 so = (PySetObject *)type->tp_alloc(type, 0);
1052 if (so == NULL)
1053 return NULL;
1054
1055 so->fill = 0;
1056 so->used = 0;
1057 so->mask = PySet_MINSIZE - 1;
1058 so->table = so->smalltable;
1059 so->hash = -1;
1060 so->finger = 0;
1061 so->weakreflist = NULL;
1062
1063 if (iterable != NULL) {
1064 if (set_update_internal(so, iterable)) {
1065 Py_DECREF(so);
1066 return NULL;
1067 }
1068 }
1069
1070 return (PyObject *)so;
1071 }
1072
1073 static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)1074 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
1075 {
1076 if (type != &PySet_Type && type != &PyFrozenSet_Type) {
1077 if (PyType_IsSubtype(type, &PySet_Type))
1078 type = &PySet_Type;
1079 else
1080 type = &PyFrozenSet_Type;
1081 }
1082 return make_new_set(type, iterable);
1083 }
1084
1085 /* The empty frozenset is a singleton */
1086 static PyObject *emptyfrozenset = NULL;
1087
1088 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1089 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1090 {
1091 PyObject *iterable = NULL, *result;
1092
1093 if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset", kwds))
1094 return NULL;
1095
1096 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
1097 return NULL;
1098
1099 if (type != &PyFrozenSet_Type)
1100 return make_new_set(type, iterable);
1101
1102 if (iterable != NULL) {
1103 /* frozenset(f) is idempotent */
1104 if (PyFrozenSet_CheckExact(iterable)) {
1105 Py_INCREF(iterable);
1106 return iterable;
1107 }
1108 result = make_new_set(type, iterable);
1109 if (result == NULL || PySet_GET_SIZE(result))
1110 return result;
1111 Py_DECREF(result);
1112 }
1113 /* The empty frozenset is a singleton */
1114 if (emptyfrozenset == NULL)
1115 emptyfrozenset = make_new_set(type, NULL);
1116 Py_XINCREF(emptyfrozenset);
1117 return emptyfrozenset;
1118 }
1119
1120 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1121 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1122 {
1123 return make_new_set(type, NULL);
1124 }
1125
1126 /* set_swap_bodies() switches the contents of any two sets by moving their
1127 internal data pointers and, if needed, copying the internal smalltables.
1128 Semantically equivalent to:
1129
1130 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1131
1132 The function always succeeds and it leaves both objects in a stable state.
1133 Useful for operations that update in-place (by allowing an intermediate
1134 result to be swapped into one of the original inputs).
1135 */
1136
1137 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1138 set_swap_bodies(PySetObject *a, PySetObject *b)
1139 {
1140 Py_ssize_t t;
1141 setentry *u;
1142 setentry tab[PySet_MINSIZE];
1143 Py_hash_t h;
1144
1145 t = a->fill; a->fill = b->fill; b->fill = t;
1146 t = a->used; a->used = b->used; b->used = t;
1147 t = a->mask; a->mask = b->mask; b->mask = t;
1148
1149 u = a->table;
1150 if (a->table == a->smalltable)
1151 u = b->smalltable;
1152 a->table = b->table;
1153 if (b->table == b->smalltable)
1154 a->table = a->smalltable;
1155 b->table = u;
1156
1157 if (a->table == a->smalltable || b->table == b->smalltable) {
1158 memcpy(tab, a->smalltable, sizeof(tab));
1159 memcpy(a->smalltable, b->smalltable, sizeof(tab));
1160 memcpy(b->smalltable, tab, sizeof(tab));
1161 }
1162
1163 if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) &&
1164 PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1165 h = a->hash; a->hash = b->hash; b->hash = h;
1166 } else {
1167 a->hash = -1;
1168 b->hash = -1;
1169 }
1170 }
1171
1172 static PyObject *
set_copy(PySetObject * so)1173 set_copy(PySetObject *so)
1174 {
1175 return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1176 }
1177
1178 static PyObject *
frozenset_copy(PySetObject * so)1179 frozenset_copy(PySetObject *so)
1180 {
1181 if (PyFrozenSet_CheckExact(so)) {
1182 Py_INCREF(so);
1183 return (PyObject *)so;
1184 }
1185 return set_copy(so);
1186 }
1187
1188 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1189
1190 static PyObject *
set_clear(PySetObject * so)1191 set_clear(PySetObject *so)
1192 {
1193 set_clear_internal(so);
1194 Py_RETURN_NONE;
1195 }
1196
1197 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1198
1199 static PyObject *
set_union(PySetObject * so,PyObject * args)1200 set_union(PySetObject *so, PyObject *args)
1201 {
1202 PySetObject *result;
1203 PyObject *other;
1204 Py_ssize_t i;
1205
1206 result = (PySetObject *)set_copy(so);
1207 if (result == NULL)
1208 return NULL;
1209
1210 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1211 other = PyTuple_GET_ITEM(args, i);
1212 if ((PyObject *)so == other)
1213 continue;
1214 if (set_update_internal(result, other)) {
1215 Py_DECREF(result);
1216 return NULL;
1217 }
1218 }
1219 return (PyObject *)result;
1220 }
1221
1222 PyDoc_STRVAR(union_doc,
1223 "Return the union of sets as a new set.\n\
1224 \n\
1225 (i.e. all elements that are in either set.)");
1226
1227 static PyObject *
set_or(PySetObject * so,PyObject * other)1228 set_or(PySetObject *so, PyObject *other)
1229 {
1230 PySetObject *result;
1231
1232 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1233 Py_RETURN_NOTIMPLEMENTED;
1234
1235 result = (PySetObject *)set_copy(so);
1236 if (result == NULL)
1237 return NULL;
1238 if ((PyObject *)so == other)
1239 return (PyObject *)result;
1240 if (set_update_internal(result, other)) {
1241 Py_DECREF(result);
1242 return NULL;
1243 }
1244 return (PyObject *)result;
1245 }
1246
1247 static PyObject *
set_ior(PySetObject * so,PyObject * other)1248 set_ior(PySetObject *so, PyObject *other)
1249 {
1250 if (!PyAnySet_Check(other))
1251 Py_RETURN_NOTIMPLEMENTED;
1252
1253 if (set_update_internal(so, other))
1254 return NULL;
1255 Py_INCREF(so);
1256 return (PyObject *)so;
1257 }
1258
1259 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1260 set_intersection(PySetObject *so, PyObject *other)
1261 {
1262 PySetObject *result;
1263 PyObject *key, *it, *tmp;
1264 Py_hash_t hash;
1265 int rv;
1266
1267 if ((PyObject *)so == other)
1268 return set_copy(so);
1269
1270 result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1271 if (result == NULL)
1272 return NULL;
1273
1274 if (PyAnySet_Check(other)) {
1275 Py_ssize_t pos = 0;
1276 setentry *entry;
1277
1278 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1279 tmp = (PyObject *)so;
1280 so = (PySetObject *)other;
1281 other = tmp;
1282 }
1283
1284 while (set_next((PySetObject *)other, &pos, &entry)) {
1285 key = entry->key;
1286 hash = entry->hash;
1287 rv = set_contains_entry(so, key, hash);
1288 if (rv < 0) {
1289 Py_DECREF(result);
1290 return NULL;
1291 }
1292 if (rv) {
1293 if (set_add_entry(result, key, hash)) {
1294 Py_DECREF(result);
1295 return NULL;
1296 }
1297 }
1298 }
1299 return (PyObject *)result;
1300 }
1301
1302 it = PyObject_GetIter(other);
1303 if (it == NULL) {
1304 Py_DECREF(result);
1305 return NULL;
1306 }
1307
1308 while ((key = PyIter_Next(it)) != NULL) {
1309 hash = PyObject_Hash(key);
1310 if (hash == -1)
1311 goto error;
1312 rv = set_contains_entry(so, key, hash);
1313 if (rv < 0)
1314 goto error;
1315 if (rv) {
1316 if (set_add_entry(result, key, hash))
1317 goto error;
1318 }
1319 Py_DECREF(key);
1320 }
1321 Py_DECREF(it);
1322 if (PyErr_Occurred()) {
1323 Py_DECREF(result);
1324 return NULL;
1325 }
1326 return (PyObject *)result;
1327 error:
1328 Py_DECREF(it);
1329 Py_DECREF(result);
1330 Py_DECREF(key);
1331 return NULL;
1332 }
1333
1334 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1335 set_intersection_multi(PySetObject *so, PyObject *args)
1336 {
1337 Py_ssize_t i;
1338 PyObject *result = (PyObject *)so;
1339
1340 if (PyTuple_GET_SIZE(args) == 0)
1341 return set_copy(so);
1342
1343 Py_INCREF(so);
1344 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1345 PyObject *other = PyTuple_GET_ITEM(args, i);
1346 PyObject *newresult = set_intersection((PySetObject *)result, other);
1347 if (newresult == NULL) {
1348 Py_DECREF(result);
1349 return NULL;
1350 }
1351 Py_DECREF(result);
1352 result = newresult;
1353 }
1354 return result;
1355 }
1356
1357 PyDoc_STRVAR(intersection_doc,
1358 "Return the intersection of two sets as a new set.\n\
1359 \n\
1360 (i.e. all elements that are in both sets.)");
1361
1362 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1363 set_intersection_update(PySetObject *so, PyObject *other)
1364 {
1365 PyObject *tmp;
1366
1367 tmp = set_intersection(so, other);
1368 if (tmp == NULL)
1369 return NULL;
1370 set_swap_bodies(so, (PySetObject *)tmp);
1371 Py_DECREF(tmp);
1372 Py_RETURN_NONE;
1373 }
1374
1375 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1376 set_intersection_update_multi(PySetObject *so, PyObject *args)
1377 {
1378 PyObject *tmp;
1379
1380 tmp = set_intersection_multi(so, args);
1381 if (tmp == NULL)
1382 return NULL;
1383 set_swap_bodies(so, (PySetObject *)tmp);
1384 Py_DECREF(tmp);
1385 Py_RETURN_NONE;
1386 }
1387
1388 PyDoc_STRVAR(intersection_update_doc,
1389 "Update a set with the intersection of itself and another.");
1390
1391 static PyObject *
set_and(PySetObject * so,PyObject * other)1392 set_and(PySetObject *so, PyObject *other)
1393 {
1394 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1395 Py_RETURN_NOTIMPLEMENTED;
1396 return set_intersection(so, other);
1397 }
1398
1399 static PyObject *
set_iand(PySetObject * so,PyObject * other)1400 set_iand(PySetObject *so, PyObject *other)
1401 {
1402 PyObject *result;
1403
1404 if (!PyAnySet_Check(other))
1405 Py_RETURN_NOTIMPLEMENTED;
1406 result = set_intersection_update(so, other);
1407 if (result == NULL)
1408 return NULL;
1409 Py_DECREF(result);
1410 Py_INCREF(so);
1411 return (PyObject *)so;
1412 }
1413
1414 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1415 set_isdisjoint(PySetObject *so, PyObject *other)
1416 {
1417 PyObject *key, *it, *tmp;
1418 int rv;
1419
1420 if ((PyObject *)so == other) {
1421 if (PySet_GET_SIZE(so) == 0)
1422 Py_RETURN_TRUE;
1423 else
1424 Py_RETURN_FALSE;
1425 }
1426
1427 if (PyAnySet_CheckExact(other)) {
1428 Py_ssize_t pos = 0;
1429 setentry *entry;
1430
1431 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1432 tmp = (PyObject *)so;
1433 so = (PySetObject *)other;
1434 other = tmp;
1435 }
1436 while (set_next((PySetObject *)other, &pos, &entry)) {
1437 rv = set_contains_entry(so, entry->key, entry->hash);
1438 if (rv < 0)
1439 return NULL;
1440 if (rv)
1441 Py_RETURN_FALSE;
1442 }
1443 Py_RETURN_TRUE;
1444 }
1445
1446 it = PyObject_GetIter(other);
1447 if (it == NULL)
1448 return NULL;
1449
1450 while ((key = PyIter_Next(it)) != NULL) {
1451 Py_hash_t hash = PyObject_Hash(key);
1452
1453 if (hash == -1) {
1454 Py_DECREF(key);
1455 Py_DECREF(it);
1456 return NULL;
1457 }
1458 rv = set_contains_entry(so, key, hash);
1459 Py_DECREF(key);
1460 if (rv < 0) {
1461 Py_DECREF(it);
1462 return NULL;
1463 }
1464 if (rv) {
1465 Py_DECREF(it);
1466 Py_RETURN_FALSE;
1467 }
1468 }
1469 Py_DECREF(it);
1470 if (PyErr_Occurred())
1471 return NULL;
1472 Py_RETURN_TRUE;
1473 }
1474
1475 PyDoc_STRVAR(isdisjoint_doc,
1476 "Return True if two sets have a null intersection.");
1477
1478 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1479 set_difference_update_internal(PySetObject *so, PyObject *other)
1480 {
1481 if (PySet_GET_SIZE(so) == 0) {
1482 return 0;
1483 }
1484
1485 if ((PyObject *)so == other)
1486 return set_clear_internal(so);
1487
1488 if (PyAnySet_Check(other)) {
1489 setentry *entry;
1490 Py_ssize_t pos = 0;
1491
1492 while (set_next((PySetObject *)other, &pos, &entry))
1493 if (set_discard_entry(so, entry->key, entry->hash) < 0)
1494 return -1;
1495 } else {
1496 PyObject *key, *it;
1497 it = PyObject_GetIter(other);
1498 if (it == NULL)
1499 return -1;
1500
1501 while ((key = PyIter_Next(it)) != NULL) {
1502 if (set_discard_key(so, key) < 0) {
1503 Py_DECREF(it);
1504 Py_DECREF(key);
1505 return -1;
1506 }
1507 Py_DECREF(key);
1508 }
1509 Py_DECREF(it);
1510 if (PyErr_Occurred())
1511 return -1;
1512 }
1513 /* If more than 1/4th are dummies, then resize them away. */
1514 if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1515 return 0;
1516 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1517 }
1518
1519 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1520 set_difference_update(PySetObject *so, PyObject *args)
1521 {
1522 Py_ssize_t i;
1523
1524 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1525 PyObject *other = PyTuple_GET_ITEM(args, i);
1526 if (set_difference_update_internal(so, other))
1527 return NULL;
1528 }
1529 Py_RETURN_NONE;
1530 }
1531
1532 PyDoc_STRVAR(difference_update_doc,
1533 "Remove all elements of another set from this set.");
1534
1535 static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1536 set_copy_and_difference(PySetObject *so, PyObject *other)
1537 {
1538 PyObject *result;
1539
1540 result = set_copy(so);
1541 if (result == NULL)
1542 return NULL;
1543 if (set_difference_update_internal((PySetObject *) result, other) == 0)
1544 return result;
1545 Py_DECREF(result);
1546 return NULL;
1547 }
1548
1549 static PyObject *
set_difference(PySetObject * so,PyObject * other)1550 set_difference(PySetObject *so, PyObject *other)
1551 {
1552 PyObject *result;
1553 PyObject *key;
1554 Py_hash_t hash;
1555 setentry *entry;
1556 Py_ssize_t pos = 0, other_size;
1557 int rv;
1558
1559 if (PySet_GET_SIZE(so) == 0) {
1560 return set_copy(so);
1561 }
1562
1563 if (PyAnySet_Check(other)) {
1564 other_size = PySet_GET_SIZE(other);
1565 }
1566 else if (PyDict_CheckExact(other)) {
1567 other_size = PyDict_GET_SIZE(other);
1568 }
1569 else {
1570 return set_copy_and_difference(so, other);
1571 }
1572
1573 /* If len(so) much more than len(other), it's more efficient to simply copy
1574 * so and then iterate other looking for common elements. */
1575 if ((PySet_GET_SIZE(so) >> 2) > other_size) {
1576 return set_copy_and_difference(so, other);
1577 }
1578
1579 result = make_new_set_basetype(Py_TYPE(so), NULL);
1580 if (result == NULL)
1581 return NULL;
1582
1583 if (PyDict_CheckExact(other)) {
1584 while (set_next(so, &pos, &entry)) {
1585 key = entry->key;
1586 hash = entry->hash;
1587 rv = _PyDict_Contains(other, key, hash);
1588 if (rv < 0) {
1589 Py_DECREF(result);
1590 return NULL;
1591 }
1592 if (!rv) {
1593 if (set_add_entry((PySetObject *)result, key, hash)) {
1594 Py_DECREF(result);
1595 return NULL;
1596 }
1597 }
1598 }
1599 return result;
1600 }
1601
1602 /* Iterate over so, checking for common elements in other. */
1603 while (set_next(so, &pos, &entry)) {
1604 key = entry->key;
1605 hash = entry->hash;
1606 rv = set_contains_entry((PySetObject *)other, key, hash);
1607 if (rv < 0) {
1608 Py_DECREF(result);
1609 return NULL;
1610 }
1611 if (!rv) {
1612 if (set_add_entry((PySetObject *)result, key, hash)) {
1613 Py_DECREF(result);
1614 return NULL;
1615 }
1616 }
1617 }
1618 return result;
1619 }
1620
1621 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1622 set_difference_multi(PySetObject *so, PyObject *args)
1623 {
1624 Py_ssize_t i;
1625 PyObject *result, *other;
1626
1627 if (PyTuple_GET_SIZE(args) == 0)
1628 return set_copy(so);
1629
1630 other = PyTuple_GET_ITEM(args, 0);
1631 result = set_difference(so, other);
1632 if (result == NULL)
1633 return NULL;
1634
1635 for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1636 other = PyTuple_GET_ITEM(args, i);
1637 if (set_difference_update_internal((PySetObject *)result, other)) {
1638 Py_DECREF(result);
1639 return NULL;
1640 }
1641 }
1642 return result;
1643 }
1644
1645 PyDoc_STRVAR(difference_doc,
1646 "Return the difference of two or more sets as a new set.\n\
1647 \n\
1648 (i.e. all elements that are in this set but not the others.)");
1649 static PyObject *
set_sub(PySetObject * so,PyObject * other)1650 set_sub(PySetObject *so, PyObject *other)
1651 {
1652 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1653 Py_RETURN_NOTIMPLEMENTED;
1654 return set_difference(so, other);
1655 }
1656
1657 static PyObject *
set_isub(PySetObject * so,PyObject * other)1658 set_isub(PySetObject *so, PyObject *other)
1659 {
1660 if (!PyAnySet_Check(other))
1661 Py_RETURN_NOTIMPLEMENTED;
1662 if (set_difference_update_internal(so, other))
1663 return NULL;
1664 Py_INCREF(so);
1665 return (PyObject *)so;
1666 }
1667
1668 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1669 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1670 {
1671 PySetObject *otherset;
1672 PyObject *key;
1673 Py_ssize_t pos = 0;
1674 Py_hash_t hash;
1675 setentry *entry;
1676 int rv;
1677
1678 if ((PyObject *)so == other)
1679 return set_clear(so);
1680
1681 if (PyDict_CheckExact(other)) {
1682 PyObject *value;
1683 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1684 Py_INCREF(key);
1685 rv = set_discard_entry(so, key, hash);
1686 if (rv < 0) {
1687 Py_DECREF(key);
1688 return NULL;
1689 }
1690 if (rv == DISCARD_NOTFOUND) {
1691 if (set_add_entry(so, key, hash)) {
1692 Py_DECREF(key);
1693 return NULL;
1694 }
1695 }
1696 Py_DECREF(key);
1697 }
1698 Py_RETURN_NONE;
1699 }
1700
1701 if (PyAnySet_Check(other)) {
1702 Py_INCREF(other);
1703 otherset = (PySetObject *)other;
1704 } else {
1705 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1706 if (otherset == NULL)
1707 return NULL;
1708 }
1709
1710 while (set_next(otherset, &pos, &entry)) {
1711 key = entry->key;
1712 hash = entry->hash;
1713 rv = set_discard_entry(so, key, hash);
1714 if (rv < 0) {
1715 Py_DECREF(otherset);
1716 return NULL;
1717 }
1718 if (rv == DISCARD_NOTFOUND) {
1719 if (set_add_entry(so, key, hash)) {
1720 Py_DECREF(otherset);
1721 return NULL;
1722 }
1723 }
1724 }
1725 Py_DECREF(otherset);
1726 Py_RETURN_NONE;
1727 }
1728
1729 PyDoc_STRVAR(symmetric_difference_update_doc,
1730 "Update a set with the symmetric difference of itself and another.");
1731
1732 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1733 set_symmetric_difference(PySetObject *so, PyObject *other)
1734 {
1735 PyObject *rv;
1736 PySetObject *otherset;
1737
1738 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1739 if (otherset == NULL)
1740 return NULL;
1741 rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1742 if (rv == NULL) {
1743 Py_DECREF(otherset);
1744 return NULL;
1745 }
1746 Py_DECREF(rv);
1747 return (PyObject *)otherset;
1748 }
1749
1750 PyDoc_STRVAR(symmetric_difference_doc,
1751 "Return the symmetric difference of two sets as a new set.\n\
1752 \n\
1753 (i.e. all elements that are in exactly one of the sets.)");
1754
1755 static PyObject *
set_xor(PySetObject * so,PyObject * other)1756 set_xor(PySetObject *so, PyObject *other)
1757 {
1758 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1759 Py_RETURN_NOTIMPLEMENTED;
1760 return set_symmetric_difference(so, other);
1761 }
1762
1763 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1764 set_ixor(PySetObject *so, PyObject *other)
1765 {
1766 PyObject *result;
1767
1768 if (!PyAnySet_Check(other))
1769 Py_RETURN_NOTIMPLEMENTED;
1770 result = set_symmetric_difference_update(so, other);
1771 if (result == NULL)
1772 return NULL;
1773 Py_DECREF(result);
1774 Py_INCREF(so);
1775 return (PyObject *)so;
1776 }
1777
1778 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1779 set_issubset(PySetObject *so, PyObject *other)
1780 {
1781 setentry *entry;
1782 Py_ssize_t pos = 0;
1783 int rv;
1784
1785 if (!PyAnySet_Check(other)) {
1786 PyObject *tmp, *result;
1787 tmp = make_new_set(&PySet_Type, other);
1788 if (tmp == NULL)
1789 return NULL;
1790 result = set_issubset(so, tmp);
1791 Py_DECREF(tmp);
1792 return result;
1793 }
1794 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1795 Py_RETURN_FALSE;
1796
1797 while (set_next(so, &pos, &entry)) {
1798 rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
1799 if (rv < 0)
1800 return NULL;
1801 if (!rv)
1802 Py_RETURN_FALSE;
1803 }
1804 Py_RETURN_TRUE;
1805 }
1806
1807 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1808
1809 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1810 set_issuperset(PySetObject *so, PyObject *other)
1811 {
1812 PyObject *tmp, *result;
1813
1814 if (!PyAnySet_Check(other)) {
1815 tmp = make_new_set(&PySet_Type, other);
1816 if (tmp == NULL)
1817 return NULL;
1818 result = set_issuperset(so, tmp);
1819 Py_DECREF(tmp);
1820 return result;
1821 }
1822 return set_issubset((PySetObject *)other, (PyObject *)so);
1823 }
1824
1825 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1826
1827 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1828 set_richcompare(PySetObject *v, PyObject *w, int op)
1829 {
1830 PyObject *r1;
1831 int r2;
1832
1833 if(!PyAnySet_Check(w))
1834 Py_RETURN_NOTIMPLEMENTED;
1835
1836 switch (op) {
1837 case Py_EQ:
1838 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1839 Py_RETURN_FALSE;
1840 if (v->hash != -1 &&
1841 ((PySetObject *)w)->hash != -1 &&
1842 v->hash != ((PySetObject *)w)->hash)
1843 Py_RETURN_FALSE;
1844 return set_issubset(v, w);
1845 case Py_NE:
1846 r1 = set_richcompare(v, w, Py_EQ);
1847 if (r1 == NULL)
1848 return NULL;
1849 r2 = PyObject_IsTrue(r1);
1850 Py_DECREF(r1);
1851 if (r2 < 0)
1852 return NULL;
1853 return PyBool_FromLong(!r2);
1854 case Py_LE:
1855 return set_issubset(v, w);
1856 case Py_GE:
1857 return set_issuperset(v, w);
1858 case Py_LT:
1859 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1860 Py_RETURN_FALSE;
1861 return set_issubset(v, w);
1862 case Py_GT:
1863 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1864 Py_RETURN_FALSE;
1865 return set_issuperset(v, w);
1866 }
1867 Py_RETURN_NOTIMPLEMENTED;
1868 }
1869
1870 static PyObject *
set_add(PySetObject * so,PyObject * key)1871 set_add(PySetObject *so, PyObject *key)
1872 {
1873 if (set_add_key(so, key))
1874 return NULL;
1875 Py_RETURN_NONE;
1876 }
1877
1878 PyDoc_STRVAR(add_doc,
1879 "Add an element to a set.\n\
1880 \n\
1881 This has no effect if the element is already present.");
1882
1883 static int
set_contains(PySetObject * so,PyObject * key)1884 set_contains(PySetObject *so, PyObject *key)
1885 {
1886 PyObject *tmpkey;
1887 int rv;
1888
1889 rv = set_contains_key(so, key);
1890 if (rv < 0) {
1891 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1892 return -1;
1893 PyErr_Clear();
1894 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1895 if (tmpkey == NULL)
1896 return -1;
1897 rv = set_contains_key(so, tmpkey);
1898 Py_DECREF(tmpkey);
1899 }
1900 return rv;
1901 }
1902
1903 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1904 set_direct_contains(PySetObject *so, PyObject *key)
1905 {
1906 long result;
1907
1908 result = set_contains(so, key);
1909 if (result < 0)
1910 return NULL;
1911 return PyBool_FromLong(result);
1912 }
1913
1914 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1915
1916 static PyObject *
set_remove(PySetObject * so,PyObject * key)1917 set_remove(PySetObject *so, PyObject *key)
1918 {
1919 PyObject *tmpkey;
1920 int rv;
1921
1922 rv = set_discard_key(so, key);
1923 if (rv < 0) {
1924 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1925 return NULL;
1926 PyErr_Clear();
1927 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1928 if (tmpkey == NULL)
1929 return NULL;
1930 rv = set_discard_key(so, tmpkey);
1931 Py_DECREF(tmpkey);
1932 if (rv < 0)
1933 return NULL;
1934 }
1935
1936 if (rv == DISCARD_NOTFOUND) {
1937 _PyErr_SetKeyError(key);
1938 return NULL;
1939 }
1940 Py_RETURN_NONE;
1941 }
1942
1943 PyDoc_STRVAR(remove_doc,
1944 "Remove an element from a set; it must be a member.\n\
1945 \n\
1946 If the element is not a member, raise a KeyError.");
1947
1948 static PyObject *
set_discard(PySetObject * so,PyObject * key)1949 set_discard(PySetObject *so, PyObject *key)
1950 {
1951 PyObject *tmpkey;
1952 int rv;
1953
1954 rv = set_discard_key(so, key);
1955 if (rv < 0) {
1956 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1957 return NULL;
1958 PyErr_Clear();
1959 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1960 if (tmpkey == NULL)
1961 return NULL;
1962 rv = set_discard_key(so, tmpkey);
1963 Py_DECREF(tmpkey);
1964 if (rv < 0)
1965 return NULL;
1966 }
1967 Py_RETURN_NONE;
1968 }
1969
1970 PyDoc_STRVAR(discard_doc,
1971 "Remove an element from a set if it is a member.\n\
1972 \n\
1973 If the element is not a member, do nothing.");
1974
1975 static PyObject *
set_reduce(PySetObject * so)1976 set_reduce(PySetObject *so)
1977 {
1978 PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1979 _Py_IDENTIFIER(__dict__);
1980
1981 keys = PySequence_List((PyObject *)so);
1982 if (keys == NULL)
1983 goto done;
1984 args = PyTuple_Pack(1, keys);
1985 if (args == NULL)
1986 goto done;
1987 dict = _PyObject_GetAttrId((PyObject *)so, &PyId___dict__);
1988 if (dict == NULL) {
1989 PyErr_Clear();
1990 dict = Py_None;
1991 Py_INCREF(dict);
1992 }
1993 result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1994 done:
1995 Py_XDECREF(args);
1996 Py_XDECREF(keys);
1997 Py_XDECREF(dict);
1998 return result;
1999 }
2000
2001 static PyObject *
set_sizeof(PySetObject * so)2002 set_sizeof(PySetObject *so)
2003 {
2004 Py_ssize_t res;
2005
2006 res = _PyObject_SIZE(Py_TYPE(so));
2007 if (so->table != so->smalltable)
2008 res = res + (so->mask + 1) * sizeof(setentry);
2009 return PyLong_FromSsize_t(res);
2010 }
2011
2012 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
2013 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)2014 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
2015 {
2016 PyObject *iterable = NULL;
2017
2018 if (!_PyArg_NoKeywords("set", kwds))
2019 return -1;
2020 if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
2021 return -1;
2022 if (self->fill)
2023 set_clear_internal(self);
2024 self->hash = -1;
2025 if (iterable == NULL)
2026 return 0;
2027 return set_update_internal(self, iterable);
2028 }
2029
2030 static PySequenceMethods set_as_sequence = {
2031 set_len, /* sq_length */
2032 0, /* sq_concat */
2033 0, /* sq_repeat */
2034 0, /* sq_item */
2035 0, /* sq_slice */
2036 0, /* sq_ass_item */
2037 0, /* sq_ass_slice */
2038 (objobjproc)set_contains, /* sq_contains */
2039 };
2040
2041 /* set object ********************************************************/
2042
2043 #ifdef Py_DEBUG
2044 static PyObject *test_c_api(PySetObject *so);
2045
2046 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
2047 All is well if assertions don't fail.");
2048 #endif
2049
2050 static PyMethodDef set_methods[] = {
2051 {"add", (PyCFunction)set_add, METH_O,
2052 add_doc},
2053 {"clear", (PyCFunction)set_clear, METH_NOARGS,
2054 clear_doc},
2055 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2056 contains_doc},
2057 {"copy", (PyCFunction)set_copy, METH_NOARGS,
2058 copy_doc},
2059 {"discard", (PyCFunction)set_discard, METH_O,
2060 discard_doc},
2061 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2062 difference_doc},
2063 {"difference_update", (PyCFunction)set_difference_update, METH_VARARGS,
2064 difference_update_doc},
2065 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2066 intersection_doc},
2067 {"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS,
2068 intersection_update_doc},
2069 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2070 isdisjoint_doc},
2071 {"issubset", (PyCFunction)set_issubset, METH_O,
2072 issubset_doc},
2073 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2074 issuperset_doc},
2075 {"pop", (PyCFunction)set_pop, METH_NOARGS,
2076 pop_doc},
2077 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2078 reduce_doc},
2079 {"remove", (PyCFunction)set_remove, METH_O,
2080 remove_doc},
2081 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2082 sizeof_doc},
2083 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2084 symmetric_difference_doc},
2085 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
2086 symmetric_difference_update_doc},
2087 #ifdef Py_DEBUG
2088 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
2089 test_c_api_doc},
2090 #endif
2091 {"union", (PyCFunction)set_union, METH_VARARGS,
2092 union_doc},
2093 {"update", (PyCFunction)set_update, METH_VARARGS,
2094 update_doc},
2095 {NULL, NULL} /* sentinel */
2096 };
2097
2098 static PyNumberMethods set_as_number = {
2099 0, /*nb_add*/
2100 (binaryfunc)set_sub, /*nb_subtract*/
2101 0, /*nb_multiply*/
2102 0, /*nb_remainder*/
2103 0, /*nb_divmod*/
2104 0, /*nb_power*/
2105 0, /*nb_negative*/
2106 0, /*nb_positive*/
2107 0, /*nb_absolute*/
2108 0, /*nb_bool*/
2109 0, /*nb_invert*/
2110 0, /*nb_lshift*/
2111 0, /*nb_rshift*/
2112 (binaryfunc)set_and, /*nb_and*/
2113 (binaryfunc)set_xor, /*nb_xor*/
2114 (binaryfunc)set_or, /*nb_or*/
2115 0, /*nb_int*/
2116 0, /*nb_reserved*/
2117 0, /*nb_float*/
2118 0, /*nb_inplace_add*/
2119 (binaryfunc)set_isub, /*nb_inplace_subtract*/
2120 0, /*nb_inplace_multiply*/
2121 0, /*nb_inplace_remainder*/
2122 0, /*nb_inplace_power*/
2123 0, /*nb_inplace_lshift*/
2124 0, /*nb_inplace_rshift*/
2125 (binaryfunc)set_iand, /*nb_inplace_and*/
2126 (binaryfunc)set_ixor, /*nb_inplace_xor*/
2127 (binaryfunc)set_ior, /*nb_inplace_or*/
2128 };
2129
2130 PyDoc_STRVAR(set_doc,
2131 "set() -> new empty set object\n\
2132 set(iterable) -> new set object\n\
2133 \n\
2134 Build an unordered collection of unique elements.");
2135
2136 PyTypeObject PySet_Type = {
2137 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2138 "set", /* tp_name */
2139 sizeof(PySetObject), /* tp_basicsize */
2140 0, /* tp_itemsize */
2141 /* methods */
2142 (destructor)set_dealloc, /* tp_dealloc */
2143 0, /* tp_print */
2144 0, /* tp_getattr */
2145 0, /* tp_setattr */
2146 0, /* tp_reserved */
2147 (reprfunc)set_repr, /* tp_repr */
2148 &set_as_number, /* tp_as_number */
2149 &set_as_sequence, /* tp_as_sequence */
2150 0, /* tp_as_mapping */
2151 PyObject_HashNotImplemented, /* tp_hash */
2152 0, /* tp_call */
2153 0, /* tp_str */
2154 PyObject_GenericGetAttr, /* tp_getattro */
2155 0, /* tp_setattro */
2156 0, /* tp_as_buffer */
2157 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2158 Py_TPFLAGS_BASETYPE, /* tp_flags */
2159 set_doc, /* tp_doc */
2160 (traverseproc)set_traverse, /* tp_traverse */
2161 (inquiry)set_clear_internal, /* tp_clear */
2162 (richcmpfunc)set_richcompare, /* tp_richcompare */
2163 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2164 (getiterfunc)set_iter, /* tp_iter */
2165 0, /* tp_iternext */
2166 set_methods, /* tp_methods */
2167 0, /* tp_members */
2168 0, /* tp_getset */
2169 0, /* tp_base */
2170 0, /* tp_dict */
2171 0, /* tp_descr_get */
2172 0, /* tp_descr_set */
2173 0, /* tp_dictoffset */
2174 (initproc)set_init, /* tp_init */
2175 PyType_GenericAlloc, /* tp_alloc */
2176 set_new, /* tp_new */
2177 PyObject_GC_Del, /* tp_free */
2178 };
2179
2180 /* frozenset object ********************************************************/
2181
2182
2183 static PyMethodDef frozenset_methods[] = {
2184 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2185 contains_doc},
2186 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
2187 copy_doc},
2188 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2189 difference_doc},
2190 {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS,
2191 intersection_doc},
2192 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2193 isdisjoint_doc},
2194 {"issubset", (PyCFunction)set_issubset, METH_O,
2195 issubset_doc},
2196 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2197 issuperset_doc},
2198 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2199 reduce_doc},
2200 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2201 sizeof_doc},
2202 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2203 symmetric_difference_doc},
2204 {"union", (PyCFunction)set_union, METH_VARARGS,
2205 union_doc},
2206 {NULL, NULL} /* sentinel */
2207 };
2208
2209 static PyNumberMethods frozenset_as_number = {
2210 0, /*nb_add*/
2211 (binaryfunc)set_sub, /*nb_subtract*/
2212 0, /*nb_multiply*/
2213 0, /*nb_remainder*/
2214 0, /*nb_divmod*/
2215 0, /*nb_power*/
2216 0, /*nb_negative*/
2217 0, /*nb_positive*/
2218 0, /*nb_absolute*/
2219 0, /*nb_bool*/
2220 0, /*nb_invert*/
2221 0, /*nb_lshift*/
2222 0, /*nb_rshift*/
2223 (binaryfunc)set_and, /*nb_and*/
2224 (binaryfunc)set_xor, /*nb_xor*/
2225 (binaryfunc)set_or, /*nb_or*/
2226 };
2227
2228 PyDoc_STRVAR(frozenset_doc,
2229 "frozenset() -> empty frozenset object\n\
2230 frozenset(iterable) -> frozenset object\n\
2231 \n\
2232 Build an immutable unordered collection of unique elements.");
2233
2234 PyTypeObject PyFrozenSet_Type = {
2235 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2236 "frozenset", /* tp_name */
2237 sizeof(PySetObject), /* tp_basicsize */
2238 0, /* tp_itemsize */
2239 /* methods */
2240 (destructor)set_dealloc, /* tp_dealloc */
2241 0, /* tp_print */
2242 0, /* tp_getattr */
2243 0, /* tp_setattr */
2244 0, /* tp_reserved */
2245 (reprfunc)set_repr, /* tp_repr */
2246 &frozenset_as_number, /* tp_as_number */
2247 &set_as_sequence, /* tp_as_sequence */
2248 0, /* tp_as_mapping */
2249 frozenset_hash, /* tp_hash */
2250 0, /* tp_call */
2251 0, /* tp_str */
2252 PyObject_GenericGetAttr, /* tp_getattro */
2253 0, /* tp_setattro */
2254 0, /* tp_as_buffer */
2255 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2256 Py_TPFLAGS_BASETYPE, /* tp_flags */
2257 frozenset_doc, /* tp_doc */
2258 (traverseproc)set_traverse, /* tp_traverse */
2259 (inquiry)set_clear_internal, /* tp_clear */
2260 (richcmpfunc)set_richcompare, /* tp_richcompare */
2261 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2262 (getiterfunc)set_iter, /* tp_iter */
2263 0, /* tp_iternext */
2264 frozenset_methods, /* tp_methods */
2265 0, /* tp_members */
2266 0, /* tp_getset */
2267 0, /* tp_base */
2268 0, /* tp_dict */
2269 0, /* tp_descr_get */
2270 0, /* tp_descr_set */
2271 0, /* tp_dictoffset */
2272 0, /* tp_init */
2273 PyType_GenericAlloc, /* tp_alloc */
2274 frozenset_new, /* tp_new */
2275 PyObject_GC_Del, /* tp_free */
2276 };
2277
2278
2279 /***** C API functions *************************************************/
2280
2281 PyObject *
PySet_New(PyObject * iterable)2282 PySet_New(PyObject *iterable)
2283 {
2284 return make_new_set(&PySet_Type, iterable);
2285 }
2286
2287 PyObject *
PyFrozenSet_New(PyObject * iterable)2288 PyFrozenSet_New(PyObject *iterable)
2289 {
2290 return make_new_set(&PyFrozenSet_Type, iterable);
2291 }
2292
2293 Py_ssize_t
PySet_Size(PyObject * anyset)2294 PySet_Size(PyObject *anyset)
2295 {
2296 if (!PyAnySet_Check(anyset)) {
2297 PyErr_BadInternalCall();
2298 return -1;
2299 }
2300 return PySet_GET_SIZE(anyset);
2301 }
2302
2303 int
PySet_Clear(PyObject * set)2304 PySet_Clear(PyObject *set)
2305 {
2306 if (!PySet_Check(set)) {
2307 PyErr_BadInternalCall();
2308 return -1;
2309 }
2310 return set_clear_internal((PySetObject *)set);
2311 }
2312
2313 int
PySet_Contains(PyObject * anyset,PyObject * key)2314 PySet_Contains(PyObject *anyset, PyObject *key)
2315 {
2316 if (!PyAnySet_Check(anyset)) {
2317 PyErr_BadInternalCall();
2318 return -1;
2319 }
2320 return set_contains_key((PySetObject *)anyset, key);
2321 }
2322
2323 int
PySet_Discard(PyObject * set,PyObject * key)2324 PySet_Discard(PyObject *set, PyObject *key)
2325 {
2326 if (!PySet_Check(set)) {
2327 PyErr_BadInternalCall();
2328 return -1;
2329 }
2330 return set_discard_key((PySetObject *)set, key);
2331 }
2332
2333 int
PySet_Add(PyObject * anyset,PyObject * key)2334 PySet_Add(PyObject *anyset, PyObject *key)
2335 {
2336 if (!PySet_Check(anyset) &&
2337 (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2338 PyErr_BadInternalCall();
2339 return -1;
2340 }
2341 return set_add_key((PySetObject *)anyset, key);
2342 }
2343
2344 int
PySet_ClearFreeList(void)2345 PySet_ClearFreeList(void)
2346 {
2347 return 0;
2348 }
2349
2350 void
PySet_Fini(void)2351 PySet_Fini(void)
2352 {
2353 Py_CLEAR(emptyfrozenset);
2354 }
2355
2356 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2357 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2358 {
2359 setentry *entry;
2360
2361 if (!PyAnySet_Check(set)) {
2362 PyErr_BadInternalCall();
2363 return -1;
2364 }
2365 if (set_next((PySetObject *)set, pos, &entry) == 0)
2366 return 0;
2367 *key = entry->key;
2368 *hash = entry->hash;
2369 return 1;
2370 }
2371
2372 PyObject *
PySet_Pop(PyObject * set)2373 PySet_Pop(PyObject *set)
2374 {
2375 if (!PySet_Check(set)) {
2376 PyErr_BadInternalCall();
2377 return NULL;
2378 }
2379 return set_pop((PySetObject *)set);
2380 }
2381
2382 int
_PySet_Update(PyObject * set,PyObject * iterable)2383 _PySet_Update(PyObject *set, PyObject *iterable)
2384 {
2385 if (!PySet_Check(set)) {
2386 PyErr_BadInternalCall();
2387 return -1;
2388 }
2389 return set_update_internal((PySetObject *)set, iterable);
2390 }
2391
2392 /* Exported for the gdb plugin's benefit. */
2393 PyObject *_PySet_Dummy = dummy;
2394
2395 #ifdef Py_DEBUG
2396
2397 /* Test code to be called with any three element set.
2398 Returns True and original set is restored. */
2399
2400 #define assertRaises(call_return_value, exception) \
2401 do { \
2402 assert(call_return_value); \
2403 assert(PyErr_ExceptionMatches(exception)); \
2404 PyErr_Clear(); \
2405 } while(0)
2406
2407 static PyObject *
test_c_api(PySetObject * so)2408 test_c_api(PySetObject *so)
2409 {
2410 Py_ssize_t count;
2411 const char *s;
2412 Py_ssize_t i;
2413 PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2414 PyObject *ob = (PyObject *)so;
2415 Py_hash_t hash;
2416 PyObject *str;
2417
2418 /* Verify preconditions */
2419 assert(PyAnySet_Check(ob));
2420 assert(PyAnySet_CheckExact(ob));
2421 assert(!PyFrozenSet_CheckExact(ob));
2422
2423 /* so.clear(); so |= set("abc"); */
2424 str = PyUnicode_FromString("abc");
2425 if (str == NULL)
2426 return NULL;
2427 set_clear_internal(so);
2428 if (set_update_internal(so, str)) {
2429 Py_DECREF(str);
2430 return NULL;
2431 }
2432 Py_DECREF(str);
2433
2434 /* Exercise type/size checks */
2435 assert(PySet_Size(ob) == 3);
2436 assert(PySet_GET_SIZE(ob) == 3);
2437
2438 /* Raise TypeError for non-iterable constructor arguments */
2439 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2440 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2441
2442 /* Raise TypeError for unhashable key */
2443 dup = PySet_New(ob);
2444 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2445 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2446 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2447
2448 /* Exercise successful pop, contains, add, and discard */
2449 elem = PySet_Pop(ob);
2450 assert(PySet_Contains(ob, elem) == 0);
2451 assert(PySet_GET_SIZE(ob) == 2);
2452 assert(PySet_Add(ob, elem) == 0);
2453 assert(PySet_Contains(ob, elem) == 1);
2454 assert(PySet_GET_SIZE(ob) == 3);
2455 assert(PySet_Discard(ob, elem) == 1);
2456 assert(PySet_GET_SIZE(ob) == 2);
2457 assert(PySet_Discard(ob, elem) == 0);
2458 assert(PySet_GET_SIZE(ob) == 2);
2459
2460 /* Exercise clear */
2461 dup2 = PySet_New(dup);
2462 assert(PySet_Clear(dup2) == 0);
2463 assert(PySet_Size(dup2) == 0);
2464 Py_DECREF(dup2);
2465
2466 /* Raise SystemError on clear or update of frozen set */
2467 f = PyFrozenSet_New(dup);
2468 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2469 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2470 assert(PySet_Add(f, elem) == 0);
2471 Py_INCREF(f);
2472 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2473 Py_DECREF(f);
2474 Py_DECREF(f);
2475
2476 /* Exercise direct iteration */
2477 i = 0, count = 0;
2478 while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2479 s = PyUnicode_AsUTF8(x);
2480 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2481 count++;
2482 }
2483 assert(count == 3);
2484
2485 /* Exercise updates */
2486 dup2 = PySet_New(NULL);
2487 assert(_PySet_Update(dup2, dup) == 0);
2488 assert(PySet_Size(dup2) == 3);
2489 assert(_PySet_Update(dup2, dup) == 0);
2490 assert(PySet_Size(dup2) == 3);
2491 Py_DECREF(dup2);
2492
2493 /* Raise SystemError when self argument is not a set or frozenset. */
2494 t = PyTuple_New(0);
2495 assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2496 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2497 Py_DECREF(t);
2498
2499 /* Raise SystemError when self argument is not a set. */
2500 f = PyFrozenSet_New(dup);
2501 assert(PySet_Size(f) == 3);
2502 assert(PyFrozenSet_CheckExact(f));
2503 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2504 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2505 Py_DECREF(f);
2506
2507 /* Raise KeyError when popping from an empty set */
2508 assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2509 Py_DECREF(ob);
2510 assert(PySet_GET_SIZE(ob) == 0);
2511 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2512
2513 /* Restore the set from the copy using the PyNumber API */
2514 assert(PyNumber_InPlaceOr(ob, dup) == ob);
2515 Py_DECREF(ob);
2516
2517 /* Verify constructors accept NULL arguments */
2518 f = PySet_New(NULL);
2519 assert(f != NULL);
2520 assert(PySet_GET_SIZE(f) == 0);
2521 Py_DECREF(f);
2522 f = PyFrozenSet_New(NULL);
2523 assert(f != NULL);
2524 assert(PyFrozenSet_CheckExact(f));
2525 assert(PySet_GET_SIZE(f) == 0);
2526 Py_DECREF(f);
2527
2528 Py_DECREF(elem);
2529 Py_DECREF(dup);
2530 Py_RETURN_TRUE;
2531 }
2532
2533 #undef assertRaises
2534
2535 #endif
2536
2537 /***** Dummy Struct *************************************************/
2538
2539 static PyObject *
dummy_repr(PyObject * op)2540 dummy_repr(PyObject *op)
2541 {
2542 return PyUnicode_FromString("<dummy key>");
2543 }
2544
2545 static void
dummy_dealloc(PyObject * ignore)2546 dummy_dealloc(PyObject* ignore)
2547 {
2548 Py_FatalError("deallocating <dummy key>");
2549 }
2550
2551 static PyTypeObject _PySetDummy_Type = {
2552 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2553 "<dummy key> type",
2554 0,
2555 0,
2556 dummy_dealloc, /*tp_dealloc*/ /*never called*/
2557 0, /*tp_print*/
2558 0, /*tp_getattr*/
2559 0, /*tp_setattr*/
2560 0, /*tp_reserved*/
2561 dummy_repr, /*tp_repr*/
2562 0, /*tp_as_number*/
2563 0, /*tp_as_sequence*/
2564 0, /*tp_as_mapping*/
2565 0, /*tp_hash */
2566 0, /*tp_call */
2567 0, /*tp_str */
2568 0, /*tp_getattro */
2569 0, /*tp_setattro */
2570 0, /*tp_as_buffer */
2571 Py_TPFLAGS_DEFAULT, /*tp_flags */
2572 };
2573
2574 static PyObject _dummy_struct = {
2575 _PyObject_EXTRA_INIT
2576 2, &_PySetDummy_Type
2577 };
2578
2579