1
2 /* set object implementation
3
4 Written and maintained by Raymond D. Hettinger <python@rcn.com>
5 Derived from Lib/sets.py and Objects/dictobject.c.
6
7 The basic lookup function used by all operations.
8 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
9
10 The initial probe index is computed as hash mod the table size.
11 Subsequent probe indices are computed as explained in Objects/dictobject.c.
12
13 To improve cache locality, each probe inspects a series of consecutive
14 nearby entries before moving on to probes elsewhere in memory. This leaves
15 us with a hybrid of linear probing and open addressing. The linear probing
16 reduces the cost of hash collisions because consecutive memory accesses
17 tend to be much cheaper than scattered probes. After LINEAR_PROBES steps,
18 we then use open addressing with the upper bits from the hash value. This
19 helps break-up long chains of collisions.
20
21 All arithmetic on hash should ignore overflow.
22
23 Unlike the dictionary implementation, the lookkey function can return
24 NULL if the rich comparison returns an error.
25 */
26
27 #include "Python.h"
28 #include "structmember.h"
29
30 /* Object used as dummy key to fill deleted entries */
31 static PyObject _dummy_struct;
32
33 #define dummy (&_dummy_struct)
34
35
36 /* ======================================================================== */
37 /* ======= Begin logic for probing the hash table ========================= */
38
39 /* Set this to zero to turn-off linear probing */
40 #ifndef LINEAR_PROBES
41 #define LINEAR_PROBES 9
42 #endif
43
44 /* This must be >= 1 */
45 #define PERTURB_SHIFT 5
46
47 static setentry *
set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)48 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
49 {
50 setentry *table;
51 setentry *entry;
52 size_t perturb;
53 size_t mask = so->mask;
54 size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */
55 size_t j;
56 int cmp;
57
58 entry = &so->table[i];
59 if (entry->key == NULL)
60 return entry;
61
62 perturb = hash;
63
64 while (1) {
65 if (entry->hash == hash) {
66 PyObject *startkey = entry->key;
67 /* startkey cannot be a dummy because the dummy hash field is -1 */
68 assert(startkey != dummy);
69 if (startkey == key)
70 return entry;
71 if (PyUnicode_CheckExact(startkey)
72 && PyUnicode_CheckExact(key)
73 && _PyUnicode_EQ(startkey, key))
74 return entry;
75 table = so->table;
76 Py_INCREF(startkey);
77 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
78 Py_DECREF(startkey);
79 if (cmp < 0) /* unlikely */
80 return NULL;
81 if (table != so->table || entry->key != startkey) /* unlikely */
82 return set_lookkey(so, key, hash);
83 if (cmp > 0) /* likely */
84 return entry;
85 mask = so->mask; /* help avoid a register spill */
86 }
87
88 if (i + LINEAR_PROBES <= mask) {
89 for (j = 0 ; j < LINEAR_PROBES ; j++) {
90 entry++;
91 if (entry->hash == 0 && entry->key == NULL)
92 return entry;
93 if (entry->hash == hash) {
94 PyObject *startkey = entry->key;
95 assert(startkey != dummy);
96 if (startkey == key)
97 return entry;
98 if (PyUnicode_CheckExact(startkey)
99 && PyUnicode_CheckExact(key)
100 && _PyUnicode_EQ(startkey, key))
101 return entry;
102 table = so->table;
103 Py_INCREF(startkey);
104 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
105 Py_DECREF(startkey);
106 if (cmp < 0)
107 return NULL;
108 if (table != so->table || entry->key != startkey)
109 return set_lookkey(so, key, hash);
110 if (cmp > 0)
111 return entry;
112 mask = so->mask;
113 }
114 }
115 }
116
117 perturb >>= PERTURB_SHIFT;
118 i = (i * 5 + 1 + perturb) & mask;
119
120 entry = &so->table[i];
121 if (entry->key == NULL)
122 return entry;
123 }
124 }
125
126 static int set_table_resize(PySetObject *, Py_ssize_t);
127
128 static int
set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)129 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
130 {
131 setentry *table;
132 setentry *freeslot;
133 setentry *entry;
134 size_t perturb;
135 size_t mask;
136 size_t i; /* Unsigned for defined overflow behavior */
137 size_t j;
138 int cmp;
139
140 /* Pre-increment is necessary to prevent arbitrary code in the rich
141 comparison from deallocating the key just before the insertion. */
142 Py_INCREF(key);
143
144 restart:
145
146 mask = so->mask;
147 i = (size_t)hash & mask;
148
149 entry = &so->table[i];
150 if (entry->key == NULL)
151 goto found_unused;
152
153 freeslot = NULL;
154 perturb = hash;
155
156 while (1) {
157 if (entry->hash == hash) {
158 PyObject *startkey = entry->key;
159 /* startkey cannot be a dummy because the dummy hash field is -1 */
160 assert(startkey != dummy);
161 if (startkey == key)
162 goto found_active;
163 if (PyUnicode_CheckExact(startkey)
164 && PyUnicode_CheckExact(key)
165 && _PyUnicode_EQ(startkey, key))
166 goto found_active;
167 table = so->table;
168 Py_INCREF(startkey);
169 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
170 Py_DECREF(startkey);
171 if (cmp > 0) /* likely */
172 goto found_active;
173 if (cmp < 0)
174 goto comparison_error;
175 /* Continuing the search from the current entry only makes
176 sense if the table and entry are unchanged; otherwise,
177 we have to restart from the beginning */
178 if (table != so->table || entry->key != startkey)
179 goto restart;
180 mask = so->mask; /* help avoid a register spill */
181 }
182 else if (entry->hash == -1 && freeslot == NULL)
183 freeslot = entry;
184
185 if (i + LINEAR_PROBES <= mask) {
186 for (j = 0 ; j < LINEAR_PROBES ; j++) {
187 entry++;
188 if (entry->hash == 0 && entry->key == NULL)
189 goto found_unused_or_dummy;
190 if (entry->hash == hash) {
191 PyObject *startkey = entry->key;
192 assert(startkey != dummy);
193 if (startkey == key)
194 goto found_active;
195 if (PyUnicode_CheckExact(startkey)
196 && PyUnicode_CheckExact(key)
197 && _PyUnicode_EQ(startkey, key))
198 goto found_active;
199 table = so->table;
200 Py_INCREF(startkey);
201 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
202 Py_DECREF(startkey);
203 if (cmp > 0)
204 goto found_active;
205 if (cmp < 0)
206 goto comparison_error;
207 if (table != so->table || entry->key != startkey)
208 goto restart;
209 mask = so->mask;
210 }
211 else if (entry->hash == -1 && freeslot == NULL)
212 freeslot = entry;
213 }
214 }
215
216 perturb >>= PERTURB_SHIFT;
217 i = (i * 5 + 1 + perturb) & mask;
218
219 entry = &so->table[i];
220 if (entry->key == NULL)
221 goto found_unused_or_dummy;
222 }
223
224 found_unused_or_dummy:
225 if (freeslot == NULL)
226 goto found_unused;
227 so->used++;
228 freeslot->key = key;
229 freeslot->hash = hash;
230 return 0;
231
232 found_unused:
233 so->fill++;
234 so->used++;
235 entry->key = key;
236 entry->hash = hash;
237 if ((size_t)so->fill*3 < mask*2)
238 return 0;
239 return set_table_resize(so, so->used);
240
241 found_active:
242 Py_DECREF(key);
243 return 0;
244
245 comparison_error:
246 Py_DECREF(key);
247 return -1;
248 }
249
250 /*
251 Internal routine used by set_table_resize() to insert an item which is
252 known to be absent from the set. This routine also assumes that
253 the set contains no deleted entries. Besides the performance benefit,
254 there is also safety benefit since using set_add_entry() risks making
255 a callback in the middle of a set_table_resize(), see issue 1456209.
256 The caller is responsible for updating the key's reference count and
257 the setobject's fill and used fields.
258 */
259 static void
set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)260 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash)
261 {
262 setentry *entry;
263 size_t perturb = hash;
264 size_t i = (size_t)hash & mask;
265 size_t j;
266
267 while (1) {
268 entry = &table[i];
269 if (entry->key == NULL)
270 goto found_null;
271 if (i + LINEAR_PROBES <= mask) {
272 for (j = 0; j < LINEAR_PROBES; j++) {
273 entry++;
274 if (entry->key == NULL)
275 goto found_null;
276 }
277 }
278 perturb >>= PERTURB_SHIFT;
279 i = (i * 5 + 1 + perturb) & mask;
280 }
281 found_null:
282 entry->key = key;
283 entry->hash = hash;
284 }
285
286 /* ======== End logic for probing the hash table ========================== */
287 /* ======================================================================== */
288
289 /*
290 Restructure the table by allocating a new table and reinserting all
291 keys again. When entries have been deleted, the new table may
292 actually be smaller than the old one.
293 */
294 static int
set_table_resize(PySetObject * so,Py_ssize_t minused)295 set_table_resize(PySetObject *so, Py_ssize_t minused)
296 {
297 Py_ssize_t newsize;
298 setentry *oldtable, *newtable, *entry;
299 Py_ssize_t oldfill = so->fill;
300 Py_ssize_t oldused = so->used;
301 Py_ssize_t oldmask = so->mask;
302 size_t newmask;
303 int is_oldtable_malloced;
304 setentry small_copy[PySet_MINSIZE];
305
306 assert(minused >= 0);
307 minused = (minused > 50000) ? minused * 2 : minused * 4;
308
309 /* Find the smallest table size > minused. */
310 /* XXX speed-up with intrinsics */
311 for (newsize = PySet_MINSIZE;
312 newsize <= minused && newsize > 0;
313 newsize <<= 1)
314 ;
315 if (newsize <= 0) {
316 PyErr_NoMemory();
317 return -1;
318 }
319
320 /* Get space for a new table. */
321 oldtable = so->table;
322 assert(oldtable != NULL);
323 is_oldtable_malloced = oldtable != so->smalltable;
324
325 if (newsize == PySet_MINSIZE) {
326 /* A large table is shrinking, or we can't get any smaller. */
327 newtable = so->smalltable;
328 if (newtable == oldtable) {
329 if (so->fill == so->used) {
330 /* No dummies, so no point doing anything. */
331 return 0;
332 }
333 /* We're not going to resize it, but rebuild the
334 table anyway to purge old dummy entries.
335 Subtle: This is *necessary* if fill==size,
336 as set_lookkey needs at least one virgin slot to
337 terminate failing searches. If fill < size, it's
338 merely desirable, as dummies slow searches. */
339 assert(so->fill > so->used);
340 memcpy(small_copy, oldtable, sizeof(small_copy));
341 oldtable = small_copy;
342 }
343 }
344 else {
345 newtable = PyMem_NEW(setentry, newsize);
346 if (newtable == NULL) {
347 PyErr_NoMemory();
348 return -1;
349 }
350 }
351
352 /* Make the set empty, using the new table. */
353 assert(newtable != oldtable);
354 memset(newtable, 0, sizeof(setentry) * newsize);
355 so->fill = oldused;
356 so->used = oldused;
357 so->mask = newsize - 1;
358 so->table = newtable;
359
360 /* Copy the data over; this is refcount-neutral for active entries;
361 dummy entries aren't copied over, of course */
362 newmask = (size_t)so->mask;
363 if (oldfill == oldused) {
364 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
365 if (entry->key != NULL) {
366 set_insert_clean(newtable, newmask, entry->key, entry->hash);
367 }
368 }
369 } else {
370 for (entry = oldtable; entry <= oldtable + oldmask; entry++) {
371 if (entry->key != NULL && entry->key != dummy) {
372 set_insert_clean(newtable, newmask, entry->key, entry->hash);
373 }
374 }
375 }
376
377 if (is_oldtable_malloced)
378 PyMem_DEL(oldtable);
379 return 0;
380 }
381
382 static int
set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)383 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
384 {
385 setentry *entry;
386
387 entry = set_lookkey(so, key, hash);
388 if (entry != NULL)
389 return entry->key != NULL;
390 return -1;
391 }
392
393 #define DISCARD_NOTFOUND 0
394 #define DISCARD_FOUND 1
395
396 static int
set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)397 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
398 {
399 setentry *entry;
400 PyObject *old_key;
401
402 entry = set_lookkey(so, key, hash);
403 if (entry == NULL)
404 return -1;
405 if (entry->key == NULL)
406 return DISCARD_NOTFOUND;
407 old_key = entry->key;
408 entry->key = dummy;
409 entry->hash = -1;
410 so->used--;
411 Py_DECREF(old_key);
412 return DISCARD_FOUND;
413 }
414
415 static int
set_add_key(PySetObject * so,PyObject * key)416 set_add_key(PySetObject *so, PyObject *key)
417 {
418 Py_hash_t hash;
419
420 if (!PyUnicode_CheckExact(key) ||
421 (hash = ((PyASCIIObject *) key)->hash) == -1) {
422 hash = PyObject_Hash(key);
423 if (hash == -1)
424 return -1;
425 }
426 return set_add_entry(so, key, hash);
427 }
428
429 static int
set_contains_key(PySetObject * so,PyObject * key)430 set_contains_key(PySetObject *so, PyObject *key)
431 {
432 Py_hash_t hash;
433
434 if (!PyUnicode_CheckExact(key) ||
435 (hash = ((PyASCIIObject *) key)->hash) == -1) {
436 hash = PyObject_Hash(key);
437 if (hash == -1)
438 return -1;
439 }
440 return set_contains_entry(so, key, hash);
441 }
442
443 static int
set_discard_key(PySetObject * so,PyObject * key)444 set_discard_key(PySetObject *so, PyObject *key)
445 {
446 Py_hash_t hash;
447
448 if (!PyUnicode_CheckExact(key) ||
449 (hash = ((PyASCIIObject *) key)->hash) == -1) {
450 hash = PyObject_Hash(key);
451 if (hash == -1)
452 return -1;
453 }
454 return set_discard_entry(so, key, hash);
455 }
456
457 static void
set_empty_to_minsize(PySetObject * so)458 set_empty_to_minsize(PySetObject *so)
459 {
460 memset(so->smalltable, 0, sizeof(so->smalltable));
461 so->fill = 0;
462 so->used = 0;
463 so->mask = PySet_MINSIZE - 1;
464 so->table = so->smalltable;
465 so->hash = -1;
466 }
467
468 static int
set_clear_internal(PySetObject * so)469 set_clear_internal(PySetObject *so)
470 {
471 setentry *entry;
472 setentry *table = so->table;
473 Py_ssize_t fill = so->fill;
474 Py_ssize_t used = so->used;
475 int table_is_malloced = table != so->smalltable;
476 setentry small_copy[PySet_MINSIZE];
477
478 assert (PyAnySet_Check(so));
479 assert(table != NULL);
480
481 /* This is delicate. During the process of clearing the set,
482 * decrefs can cause the set to mutate. To avoid fatal confusion
483 * (voice of experience), we have to make the set empty before
484 * clearing the slots, and never refer to anything via so->ref while
485 * clearing.
486 */
487 if (table_is_malloced)
488 set_empty_to_minsize(so);
489
490 else if (fill > 0) {
491 /* It's a small table with something that needs to be cleared.
492 * Afraid the only safe way is to copy the set entries into
493 * another small table first.
494 */
495 memcpy(small_copy, table, sizeof(small_copy));
496 table = small_copy;
497 set_empty_to_minsize(so);
498 }
499 /* else it's a small table that's already empty */
500
501 /* Now we can finally clear things. If C had refcounts, we could
502 * assert that the refcount on table is 1 now, i.e. that this function
503 * has unique access to it, so decref side-effects can't alter it.
504 */
505 for (entry = table; used > 0; entry++) {
506 if (entry->key && entry->key != dummy) {
507 used--;
508 Py_DECREF(entry->key);
509 }
510 }
511
512 if (table_is_malloced)
513 PyMem_DEL(table);
514 return 0;
515 }
516
517 /*
518 * Iterate over a set table. Use like so:
519 *
520 * Py_ssize_t pos;
521 * setentry *entry;
522 * pos = 0; # important! pos should not otherwise be changed by you
523 * while (set_next(yourset, &pos, &entry)) {
524 * Refer to borrowed reference in entry->key.
525 * }
526 *
527 * CAUTION: In general, it isn't safe to use set_next in a loop that
528 * mutates the table.
529 */
530 static int
set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)531 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
532 {
533 Py_ssize_t i;
534 Py_ssize_t mask;
535 setentry *entry;
536
537 assert (PyAnySet_Check(so));
538 i = *pos_ptr;
539 assert(i >= 0);
540 mask = so->mask;
541 entry = &so->table[i];
542 while (i <= mask && (entry->key == NULL || entry->key == dummy)) {
543 i++;
544 entry++;
545 }
546 *pos_ptr = i+1;
547 if (i > mask)
548 return 0;
549 assert(entry != NULL);
550 *entry_ptr = entry;
551 return 1;
552 }
553
554 static void
set_dealloc(PySetObject * so)555 set_dealloc(PySetObject *so)
556 {
557 setentry *entry;
558 Py_ssize_t used = so->used;
559
560 PyObject_GC_UnTrack(so);
561 Py_TRASHCAN_SAFE_BEGIN(so)
562 if (so->weakreflist != NULL)
563 PyObject_ClearWeakRefs((PyObject *) so);
564
565 for (entry = so->table; used > 0; entry++) {
566 if (entry->key && entry->key != dummy) {
567 used--;
568 Py_DECREF(entry->key);
569 }
570 }
571 if (so->table != so->smalltable)
572 PyMem_DEL(so->table);
573 Py_TYPE(so)->tp_free(so);
574 Py_TRASHCAN_SAFE_END(so)
575 }
576
577 static PyObject *
set_repr(PySetObject * so)578 set_repr(PySetObject *so)
579 {
580 PyObject *result=NULL, *keys, *listrepr, *tmp;
581 int status = Py_ReprEnter((PyObject*)so);
582
583 if (status != 0) {
584 if (status < 0)
585 return NULL;
586 return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name);
587 }
588
589 /* shortcut for the empty set */
590 if (!so->used) {
591 Py_ReprLeave((PyObject*)so);
592 return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name);
593 }
594
595 keys = PySequence_List((PyObject *)so);
596 if (keys == NULL)
597 goto done;
598
599 /* repr(keys)[1:-1] */
600 listrepr = PyObject_Repr(keys);
601 Py_DECREF(keys);
602 if (listrepr == NULL)
603 goto done;
604 tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1);
605 Py_DECREF(listrepr);
606 if (tmp == NULL)
607 goto done;
608 listrepr = tmp;
609
610 if (Py_TYPE(so) != &PySet_Type)
611 result = PyUnicode_FromFormat("%s({%U})",
612 Py_TYPE(so)->tp_name,
613 listrepr);
614 else
615 result = PyUnicode_FromFormat("{%U}", listrepr);
616 Py_DECREF(listrepr);
617 done:
618 Py_ReprLeave((PyObject*)so);
619 return result;
620 }
621
622 static Py_ssize_t
set_len(PyObject * so)623 set_len(PyObject *so)
624 {
625 return ((PySetObject *)so)->used;
626 }
627
628 static int
set_merge(PySetObject * so,PyObject * otherset)629 set_merge(PySetObject *so, PyObject *otherset)
630 {
631 PySetObject *other;
632 PyObject *key;
633 Py_ssize_t i;
634 setentry *so_entry;
635 setentry *other_entry;
636
637 assert (PyAnySet_Check(so));
638 assert (PyAnySet_Check(otherset));
639
640 other = (PySetObject*)otherset;
641 if (other == so || other->used == 0)
642 /* a.update(a) or a.update(set()); nothing to do */
643 return 0;
644 /* Do one big resize at the start, rather than
645 * incrementally resizing as we insert new keys. Expect
646 * that there will be no (or few) overlapping keys.
647 */
648 if ((so->fill + other->used)*3 >= so->mask*2) {
649 if (set_table_resize(so, so->used + other->used) != 0)
650 return -1;
651 }
652 so_entry = so->table;
653 other_entry = other->table;
654
655 /* If our table is empty, and both tables have the same size, and
656 there are no dummies to eliminate, then just copy the pointers. */
657 if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) {
658 for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) {
659 key = other_entry->key;
660 if (key != NULL) {
661 assert(so_entry->key == NULL);
662 Py_INCREF(key);
663 so_entry->key = key;
664 so_entry->hash = other_entry->hash;
665 }
666 }
667 so->fill = other->fill;
668 so->used = other->used;
669 return 0;
670 }
671
672 /* If our table is empty, we can use set_insert_clean() */
673 if (so->fill == 0) {
674 setentry *newtable = so->table;
675 size_t newmask = (size_t)so->mask;
676 so->fill = other->used;
677 so->used = other->used;
678 for (i = other->mask + 1; i > 0 ; i--, other_entry++) {
679 key = other_entry->key;
680 if (key != NULL && key != dummy) {
681 Py_INCREF(key);
682 set_insert_clean(newtable, newmask, key, other_entry->hash);
683 }
684 }
685 return 0;
686 }
687
688 /* We can't assure there are no duplicates, so do normal insertions */
689 for (i = 0; i <= other->mask; i++) {
690 other_entry = &other->table[i];
691 key = other_entry->key;
692 if (key != NULL && key != dummy) {
693 if (set_add_entry(so, key, other_entry->hash))
694 return -1;
695 }
696 }
697 return 0;
698 }
699
700 static PyObject *
set_pop(PySetObject * so)701 set_pop(PySetObject *so)
702 {
703 /* Make sure the search finger is in bounds */
704 Py_ssize_t i = so->finger & so->mask;
705 setentry *entry;
706 PyObject *key;
707
708 assert (PyAnySet_Check(so));
709 if (so->used == 0) {
710 PyErr_SetString(PyExc_KeyError, "pop from an empty set");
711 return NULL;
712 }
713
714 while ((entry = &so->table[i])->key == NULL || entry->key==dummy) {
715 i++;
716 if (i > so->mask)
717 i = 0;
718 }
719 key = entry->key;
720 entry->key = dummy;
721 entry->hash = -1;
722 so->used--;
723 so->finger = i + 1; /* next place to start */
724 return key;
725 }
726
727 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\
728 Raises KeyError if the set is empty.");
729
730 static int
set_traverse(PySetObject * so,visitproc visit,void * arg)731 set_traverse(PySetObject *so, visitproc visit, void *arg)
732 {
733 Py_ssize_t pos = 0;
734 setentry *entry;
735
736 while (set_next(so, &pos, &entry))
737 Py_VISIT(entry->key);
738 return 0;
739 }
740
741 /* Work to increase the bit dispersion for closely spaced hash values.
742 This is important because some use cases have many combinations of a
743 small number of elements with nearby hashes so that many distinct
744 combinations collapse to only a handful of distinct hash values. */
745
746 static Py_uhash_t
_shuffle_bits(Py_uhash_t h)747 _shuffle_bits(Py_uhash_t h)
748 {
749 return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL;
750 }
751
752 /* Most of the constants in this hash algorithm are randomly chosen
753 large primes with "interesting bit patterns" and that passed tests
754 for good collision statistics on a variety of problematic datasets
755 including powersets and graph structures (such as David Eppstein's
756 graph recipes in Lib/test/test_set.py) */
757
758 static Py_hash_t
frozenset_hash(PyObject * self)759 frozenset_hash(PyObject *self)
760 {
761 PySetObject *so = (PySetObject *)self;
762 Py_uhash_t hash = 0;
763 setentry *entry;
764
765 if (so->hash != -1)
766 return so->hash;
767
768 /* Xor-in shuffled bits from every entry's hash field because xor is
769 commutative and a frozenset hash should be independent of order.
770
771 For speed, include null entries and dummy entries and then
772 subtract out their effect afterwards so that the final hash
773 depends only on active entries. This allows the code to be
774 vectorized by the compiler and it saves the unpredictable
775 branches that would arise when trying to exclude null and dummy
776 entries on every iteration. */
777
778 for (entry = so->table; entry <= &so->table[so->mask]; entry++)
779 hash ^= _shuffle_bits(entry->hash);
780
781 /* Remove the effect of an odd number of NULL entries */
782 if ((so->mask + 1 - so->fill) & 1)
783 hash ^= _shuffle_bits(0);
784
785 /* Remove the effect of an odd number of dummy entries */
786 if ((so->fill - so->used) & 1)
787 hash ^= _shuffle_bits(-1);
788
789 /* Factor in the number of active entries */
790 hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL;
791
792 /* Disperse patterns arising in nested frozensets */
793 hash = hash * 69069U + 907133923UL;
794
795 /* -1 is reserved as an error code */
796 if (hash == (Py_uhash_t)-1)
797 hash = 590923713UL;
798
799 so->hash = hash;
800 return hash;
801 }
802
803 /***** Set iterator type ***********************************************/
804
805 typedef struct {
806 PyObject_HEAD
807 PySetObject *si_set; /* Set to NULL when iterator is exhausted */
808 Py_ssize_t si_used;
809 Py_ssize_t si_pos;
810 Py_ssize_t len;
811 } setiterobject;
812
813 static void
setiter_dealloc(setiterobject * si)814 setiter_dealloc(setiterobject *si)
815 {
816 Py_XDECREF(si->si_set);
817 PyObject_GC_Del(si);
818 }
819
820 static int
setiter_traverse(setiterobject * si,visitproc visit,void * arg)821 setiter_traverse(setiterobject *si, visitproc visit, void *arg)
822 {
823 Py_VISIT(si->si_set);
824 return 0;
825 }
826
827 static PyObject *
setiter_len(setiterobject * si)828 setiter_len(setiterobject *si)
829 {
830 Py_ssize_t len = 0;
831 if (si->si_set != NULL && si->si_used == si->si_set->used)
832 len = si->len;
833 return PyLong_FromSsize_t(len);
834 }
835
836 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
837
838 static PyObject *setiter_iternext(setiterobject *si);
839
840 static PyObject *
setiter_reduce(setiterobject * si)841 setiter_reduce(setiterobject *si)
842 {
843 PyObject *list;
844 setiterobject tmp;
845
846 list = PyList_New(0);
847 if (!list)
848 return NULL;
849
850 /* copy the iterator state */
851 tmp = *si;
852 Py_XINCREF(tmp.si_set);
853
854 /* iterate the temporary into a list */
855 for(;;) {
856 PyObject *element = setiter_iternext(&tmp);
857 if (element) {
858 if (PyList_Append(list, element)) {
859 Py_DECREF(element);
860 Py_DECREF(list);
861 Py_XDECREF(tmp.si_set);
862 return NULL;
863 }
864 Py_DECREF(element);
865 } else
866 break;
867 }
868 Py_XDECREF(tmp.si_set);
869 /* check for error */
870 if (tmp.si_set != NULL) {
871 /* we have an error */
872 Py_DECREF(list);
873 return NULL;
874 }
875 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), list);
876 }
877
878 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
879
880 static PyMethodDef setiter_methods[] = {
881 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
882 {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc},
883 {NULL, NULL} /* sentinel */
884 };
885
setiter_iternext(setiterobject * si)886 static PyObject *setiter_iternext(setiterobject *si)
887 {
888 PyObject *key;
889 Py_ssize_t i, mask;
890 setentry *entry;
891 PySetObject *so = si->si_set;
892
893 if (so == NULL)
894 return NULL;
895 assert (PyAnySet_Check(so));
896
897 if (si->si_used != so->used) {
898 PyErr_SetString(PyExc_RuntimeError,
899 "Set changed size during iteration");
900 si->si_used = -1; /* Make this state sticky */
901 return NULL;
902 }
903
904 i = si->si_pos;
905 assert(i>=0);
906 entry = so->table;
907 mask = so->mask;
908 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
909 i++;
910 si->si_pos = i+1;
911 if (i > mask)
912 goto fail;
913 si->len--;
914 key = entry[i].key;
915 Py_INCREF(key);
916 return key;
917
918 fail:
919 si->si_set = NULL;
920 Py_DECREF(so);
921 return NULL;
922 }
923
924 PyTypeObject PySetIter_Type = {
925 PyVarObject_HEAD_INIT(&PyType_Type, 0)
926 "set_iterator", /* tp_name */
927 sizeof(setiterobject), /* tp_basicsize */
928 0, /* tp_itemsize */
929 /* methods */
930 (destructor)setiter_dealloc, /* tp_dealloc */
931 0, /* tp_print */
932 0, /* tp_getattr */
933 0, /* tp_setattr */
934 0, /* tp_reserved */
935 0, /* tp_repr */
936 0, /* tp_as_number */
937 0, /* tp_as_sequence */
938 0, /* tp_as_mapping */
939 0, /* tp_hash */
940 0, /* tp_call */
941 0, /* tp_str */
942 PyObject_GenericGetAttr, /* tp_getattro */
943 0, /* tp_setattro */
944 0, /* tp_as_buffer */
945 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
946 0, /* tp_doc */
947 (traverseproc)setiter_traverse, /* tp_traverse */
948 0, /* tp_clear */
949 0, /* tp_richcompare */
950 0, /* tp_weaklistoffset */
951 PyObject_SelfIter, /* tp_iter */
952 (iternextfunc)setiter_iternext, /* tp_iternext */
953 setiter_methods, /* tp_methods */
954 0,
955 };
956
957 static PyObject *
set_iter(PySetObject * so)958 set_iter(PySetObject *so)
959 {
960 setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type);
961 if (si == NULL)
962 return NULL;
963 Py_INCREF(so);
964 si->si_set = so;
965 si->si_used = so->used;
966 si->si_pos = 0;
967 si->len = so->used;
968 _PyObject_GC_TRACK(si);
969 return (PyObject *)si;
970 }
971
972 static int
set_update_internal(PySetObject * so,PyObject * other)973 set_update_internal(PySetObject *so, PyObject *other)
974 {
975 PyObject *key, *it;
976
977 if (PyAnySet_Check(other))
978 return set_merge(so, other);
979
980 if (PyDict_CheckExact(other)) {
981 PyObject *value;
982 Py_ssize_t pos = 0;
983 Py_hash_t hash;
984 Py_ssize_t dictsize = PyDict_Size(other);
985
986 /* Do one big resize at the start, rather than
987 * incrementally resizing as we insert new keys. Expect
988 * that there will be no (or few) overlapping keys.
989 */
990 if (dictsize < 0)
991 return -1;
992 if ((so->fill + dictsize)*3 >= so->mask*2) {
993 if (set_table_resize(so, so->used + dictsize) != 0)
994 return -1;
995 }
996 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
997 if (set_add_entry(so, key, hash))
998 return -1;
999 }
1000 return 0;
1001 }
1002
1003 it = PyObject_GetIter(other);
1004 if (it == NULL)
1005 return -1;
1006
1007 while ((key = PyIter_Next(it)) != NULL) {
1008 if (set_add_key(so, key)) {
1009 Py_DECREF(it);
1010 Py_DECREF(key);
1011 return -1;
1012 }
1013 Py_DECREF(key);
1014 }
1015 Py_DECREF(it);
1016 if (PyErr_Occurred())
1017 return -1;
1018 return 0;
1019 }
1020
1021 static PyObject *
set_update(PySetObject * so,PyObject * args)1022 set_update(PySetObject *so, PyObject *args)
1023 {
1024 Py_ssize_t i;
1025
1026 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1027 PyObject *other = PyTuple_GET_ITEM(args, i);
1028 if (set_update_internal(so, other))
1029 return NULL;
1030 }
1031 Py_RETURN_NONE;
1032 }
1033
1034 PyDoc_STRVAR(update_doc,
1035 "Update a set with the union of itself and others.");
1036
1037 /* XXX Todo:
1038 If aligned memory allocations become available, make the
1039 set object 64 byte aligned so that most of the fields
1040 can be retrieved or updated in a single cache line.
1041 */
1042
1043 static PyObject *
make_new_set(PyTypeObject * type,PyObject * iterable)1044 make_new_set(PyTypeObject *type, PyObject *iterable)
1045 {
1046 PySetObject *so;
1047
1048 so = (PySetObject *)type->tp_alloc(type, 0);
1049 if (so == NULL)
1050 return NULL;
1051
1052 so->fill = 0;
1053 so->used = 0;
1054 so->mask = PySet_MINSIZE - 1;
1055 so->table = so->smalltable;
1056 so->hash = -1;
1057 so->finger = 0;
1058 so->weakreflist = NULL;
1059
1060 if (iterable != NULL) {
1061 if (set_update_internal(so, iterable)) {
1062 Py_DECREF(so);
1063 return NULL;
1064 }
1065 }
1066
1067 return (PyObject *)so;
1068 }
1069
1070 static PyObject *
make_new_set_basetype(PyTypeObject * type,PyObject * iterable)1071 make_new_set_basetype(PyTypeObject *type, PyObject *iterable)
1072 {
1073 if (type != &PySet_Type && type != &PyFrozenSet_Type) {
1074 if (PyType_IsSubtype(type, &PySet_Type))
1075 type = &PySet_Type;
1076 else
1077 type = &PyFrozenSet_Type;
1078 }
1079 return make_new_set(type, iterable);
1080 }
1081
1082 /* The empty frozenset is a singleton */
1083 static PyObject *emptyfrozenset = NULL;
1084
1085 static PyObject *
frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1086 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1087 {
1088 PyObject *iterable = NULL, *result;
1089
1090 if (kwds != NULL && type == &PyFrozenSet_Type
1091 && !_PyArg_NoKeywords("frozenset()", kwds))
1092 return NULL;
1093
1094 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
1095 return NULL;
1096
1097 if (type != &PyFrozenSet_Type)
1098 return make_new_set(type, iterable);
1099
1100 if (iterable != NULL) {
1101 /* frozenset(f) is idempotent */
1102 if (PyFrozenSet_CheckExact(iterable)) {
1103 Py_INCREF(iterable);
1104 return iterable;
1105 }
1106 result = make_new_set(type, iterable);
1107 if (result == NULL || PySet_GET_SIZE(result))
1108 return result;
1109 Py_DECREF(result);
1110 }
1111 /* The empty frozenset is a singleton */
1112 if (emptyfrozenset == NULL)
1113 emptyfrozenset = make_new_set(type, NULL);
1114 Py_XINCREF(emptyfrozenset);
1115 return emptyfrozenset;
1116 }
1117
1118 static PyObject *
set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1119 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1120 {
1121 return make_new_set(type, NULL);
1122 }
1123
1124 /* set_swap_bodies() switches the contents of any two sets by moving their
1125 internal data pointers and, if needed, copying the internal smalltables.
1126 Semantically equivalent to:
1127
1128 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1129
1130 The function always succeeds and it leaves both objects in a stable state.
1131 Useful for operations that update in-place (by allowing an intermediate
1132 result to be swapped into one of the original inputs).
1133 */
1134
1135 static void
set_swap_bodies(PySetObject * a,PySetObject * b)1136 set_swap_bodies(PySetObject *a, PySetObject *b)
1137 {
1138 Py_ssize_t t;
1139 setentry *u;
1140 setentry tab[PySet_MINSIZE];
1141 Py_hash_t h;
1142
1143 t = a->fill; a->fill = b->fill; b->fill = t;
1144 t = a->used; a->used = b->used; b->used = t;
1145 t = a->mask; a->mask = b->mask; b->mask = t;
1146
1147 u = a->table;
1148 if (a->table == a->smalltable)
1149 u = b->smalltable;
1150 a->table = b->table;
1151 if (b->table == b->smalltable)
1152 a->table = a->smalltable;
1153 b->table = u;
1154
1155 if (a->table == a->smalltable || b->table == b->smalltable) {
1156 memcpy(tab, a->smalltable, sizeof(tab));
1157 memcpy(a->smalltable, b->smalltable, sizeof(tab));
1158 memcpy(b->smalltable, tab, sizeof(tab));
1159 }
1160
1161 if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) &&
1162 PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) {
1163 h = a->hash; a->hash = b->hash; b->hash = h;
1164 } else {
1165 a->hash = -1;
1166 b->hash = -1;
1167 }
1168 }
1169
1170 static PyObject *
set_copy(PySetObject * so)1171 set_copy(PySetObject *so)
1172 {
1173 return make_new_set_basetype(Py_TYPE(so), (PyObject *)so);
1174 }
1175
1176 static PyObject *
frozenset_copy(PySetObject * so)1177 frozenset_copy(PySetObject *so)
1178 {
1179 if (PyFrozenSet_CheckExact(so)) {
1180 Py_INCREF(so);
1181 return (PyObject *)so;
1182 }
1183 return set_copy(so);
1184 }
1185
1186 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1187
1188 static PyObject *
set_clear(PySetObject * so)1189 set_clear(PySetObject *so)
1190 {
1191 set_clear_internal(so);
1192 Py_RETURN_NONE;
1193 }
1194
1195 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1196
1197 static PyObject *
set_union(PySetObject * so,PyObject * args)1198 set_union(PySetObject *so, PyObject *args)
1199 {
1200 PySetObject *result;
1201 PyObject *other;
1202 Py_ssize_t i;
1203
1204 result = (PySetObject *)set_copy(so);
1205 if (result == NULL)
1206 return NULL;
1207
1208 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1209 other = PyTuple_GET_ITEM(args, i);
1210 if ((PyObject *)so == other)
1211 continue;
1212 if (set_update_internal(result, other)) {
1213 Py_DECREF(result);
1214 return NULL;
1215 }
1216 }
1217 return (PyObject *)result;
1218 }
1219
1220 PyDoc_STRVAR(union_doc,
1221 "Return the union of sets as a new set.\n\
1222 \n\
1223 (i.e. all elements that are in either set.)");
1224
1225 static PyObject *
set_or(PySetObject * so,PyObject * other)1226 set_or(PySetObject *so, PyObject *other)
1227 {
1228 PySetObject *result;
1229
1230 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1231 Py_RETURN_NOTIMPLEMENTED;
1232
1233 result = (PySetObject *)set_copy(so);
1234 if (result == NULL)
1235 return NULL;
1236 if ((PyObject *)so == other)
1237 return (PyObject *)result;
1238 if (set_update_internal(result, other)) {
1239 Py_DECREF(result);
1240 return NULL;
1241 }
1242 return (PyObject *)result;
1243 }
1244
1245 static PyObject *
set_ior(PySetObject * so,PyObject * other)1246 set_ior(PySetObject *so, PyObject *other)
1247 {
1248 if (!PyAnySet_Check(other))
1249 Py_RETURN_NOTIMPLEMENTED;
1250
1251 if (set_update_internal(so, other))
1252 return NULL;
1253 Py_INCREF(so);
1254 return (PyObject *)so;
1255 }
1256
1257 static PyObject *
set_intersection(PySetObject * so,PyObject * other)1258 set_intersection(PySetObject *so, PyObject *other)
1259 {
1260 PySetObject *result;
1261 PyObject *key, *it, *tmp;
1262 Py_hash_t hash;
1263 int rv;
1264
1265 if ((PyObject *)so == other)
1266 return set_copy(so);
1267
1268 result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL);
1269 if (result == NULL)
1270 return NULL;
1271
1272 if (PyAnySet_Check(other)) {
1273 Py_ssize_t pos = 0;
1274 setentry *entry;
1275
1276 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1277 tmp = (PyObject *)so;
1278 so = (PySetObject *)other;
1279 other = tmp;
1280 }
1281
1282 while (set_next((PySetObject *)other, &pos, &entry)) {
1283 key = entry->key;
1284 hash = entry->hash;
1285 rv = set_contains_entry(so, key, hash);
1286 if (rv < 0) {
1287 Py_DECREF(result);
1288 return NULL;
1289 }
1290 if (rv) {
1291 if (set_add_entry(result, key, hash)) {
1292 Py_DECREF(result);
1293 return NULL;
1294 }
1295 }
1296 }
1297 return (PyObject *)result;
1298 }
1299
1300 it = PyObject_GetIter(other);
1301 if (it == NULL) {
1302 Py_DECREF(result);
1303 return NULL;
1304 }
1305
1306 while ((key = PyIter_Next(it)) != NULL) {
1307 hash = PyObject_Hash(key);
1308 if (hash == -1)
1309 goto error;
1310 rv = set_contains_entry(so, key, hash);
1311 if (rv < 0)
1312 goto error;
1313 if (rv) {
1314 if (set_add_entry(result, key, hash))
1315 goto error;
1316 }
1317 Py_DECREF(key);
1318 }
1319 Py_DECREF(it);
1320 if (PyErr_Occurred()) {
1321 Py_DECREF(result);
1322 return NULL;
1323 }
1324 return (PyObject *)result;
1325 error:
1326 Py_DECREF(it);
1327 Py_DECREF(result);
1328 Py_DECREF(key);
1329 return NULL;
1330 }
1331
1332 static PyObject *
set_intersection_multi(PySetObject * so,PyObject * args)1333 set_intersection_multi(PySetObject *so, PyObject *args)
1334 {
1335 Py_ssize_t i;
1336 PyObject *result = (PyObject *)so;
1337
1338 if (PyTuple_GET_SIZE(args) == 0)
1339 return set_copy(so);
1340
1341 Py_INCREF(so);
1342 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1343 PyObject *other = PyTuple_GET_ITEM(args, i);
1344 PyObject *newresult = set_intersection((PySetObject *)result, other);
1345 if (newresult == NULL) {
1346 Py_DECREF(result);
1347 return NULL;
1348 }
1349 Py_DECREF(result);
1350 result = newresult;
1351 }
1352 return result;
1353 }
1354
1355 PyDoc_STRVAR(intersection_doc,
1356 "Return the intersection of two sets as a new set.\n\
1357 \n\
1358 (i.e. all elements that are in both sets.)");
1359
1360 static PyObject *
set_intersection_update(PySetObject * so,PyObject * other)1361 set_intersection_update(PySetObject *so, PyObject *other)
1362 {
1363 PyObject *tmp;
1364
1365 tmp = set_intersection(so, other);
1366 if (tmp == NULL)
1367 return NULL;
1368 set_swap_bodies(so, (PySetObject *)tmp);
1369 Py_DECREF(tmp);
1370 Py_RETURN_NONE;
1371 }
1372
1373 static PyObject *
set_intersection_update_multi(PySetObject * so,PyObject * args)1374 set_intersection_update_multi(PySetObject *so, PyObject *args)
1375 {
1376 PyObject *tmp;
1377
1378 tmp = set_intersection_multi(so, args);
1379 if (tmp == NULL)
1380 return NULL;
1381 set_swap_bodies(so, (PySetObject *)tmp);
1382 Py_DECREF(tmp);
1383 Py_RETURN_NONE;
1384 }
1385
1386 PyDoc_STRVAR(intersection_update_doc,
1387 "Update a set with the intersection of itself and another.");
1388
1389 static PyObject *
set_and(PySetObject * so,PyObject * other)1390 set_and(PySetObject *so, PyObject *other)
1391 {
1392 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1393 Py_RETURN_NOTIMPLEMENTED;
1394 return set_intersection(so, other);
1395 }
1396
1397 static PyObject *
set_iand(PySetObject * so,PyObject * other)1398 set_iand(PySetObject *so, PyObject *other)
1399 {
1400 PyObject *result;
1401
1402 if (!PyAnySet_Check(other))
1403 Py_RETURN_NOTIMPLEMENTED;
1404 result = set_intersection_update(so, other);
1405 if (result == NULL)
1406 return NULL;
1407 Py_DECREF(result);
1408 Py_INCREF(so);
1409 return (PyObject *)so;
1410 }
1411
1412 static PyObject *
set_isdisjoint(PySetObject * so,PyObject * other)1413 set_isdisjoint(PySetObject *so, PyObject *other)
1414 {
1415 PyObject *key, *it, *tmp;
1416 int rv;
1417
1418 if ((PyObject *)so == other) {
1419 if (PySet_GET_SIZE(so) == 0)
1420 Py_RETURN_TRUE;
1421 else
1422 Py_RETURN_FALSE;
1423 }
1424
1425 if (PyAnySet_CheckExact(other)) {
1426 Py_ssize_t pos = 0;
1427 setentry *entry;
1428
1429 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1430 tmp = (PyObject *)so;
1431 so = (PySetObject *)other;
1432 other = tmp;
1433 }
1434 while (set_next((PySetObject *)other, &pos, &entry)) {
1435 rv = set_contains_entry(so, entry->key, entry->hash);
1436 if (rv < 0)
1437 return NULL;
1438 if (rv)
1439 Py_RETURN_FALSE;
1440 }
1441 Py_RETURN_TRUE;
1442 }
1443
1444 it = PyObject_GetIter(other);
1445 if (it == NULL)
1446 return NULL;
1447
1448 while ((key = PyIter_Next(it)) != NULL) {
1449 Py_hash_t hash = PyObject_Hash(key);
1450
1451 if (hash == -1) {
1452 Py_DECREF(key);
1453 Py_DECREF(it);
1454 return NULL;
1455 }
1456 rv = set_contains_entry(so, key, hash);
1457 Py_DECREF(key);
1458 if (rv < 0) {
1459 Py_DECREF(it);
1460 return NULL;
1461 }
1462 if (rv) {
1463 Py_DECREF(it);
1464 Py_RETURN_FALSE;
1465 }
1466 }
1467 Py_DECREF(it);
1468 if (PyErr_Occurred())
1469 return NULL;
1470 Py_RETURN_TRUE;
1471 }
1472
1473 PyDoc_STRVAR(isdisjoint_doc,
1474 "Return True if two sets have a null intersection.");
1475
1476 static int
set_difference_update_internal(PySetObject * so,PyObject * other)1477 set_difference_update_internal(PySetObject *so, PyObject *other)
1478 {
1479 if (PySet_GET_SIZE(so) == 0) {
1480 return 0;
1481 }
1482
1483 if ((PyObject *)so == other)
1484 return set_clear_internal(so);
1485
1486 if (PyAnySet_Check(other)) {
1487 setentry *entry;
1488 Py_ssize_t pos = 0;
1489
1490 while (set_next((PySetObject *)other, &pos, &entry))
1491 if (set_discard_entry(so, entry->key, entry->hash) < 0)
1492 return -1;
1493 } else {
1494 PyObject *key, *it;
1495 it = PyObject_GetIter(other);
1496 if (it == NULL)
1497 return -1;
1498
1499 while ((key = PyIter_Next(it)) != NULL) {
1500 if (set_discard_key(so, key) < 0) {
1501 Py_DECREF(it);
1502 Py_DECREF(key);
1503 return -1;
1504 }
1505 Py_DECREF(key);
1506 }
1507 Py_DECREF(it);
1508 if (PyErr_Occurred())
1509 return -1;
1510 }
1511 /* If more than 1/4th are dummies, then resize them away. */
1512 if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4)
1513 return 0;
1514 return set_table_resize(so, so->used);
1515 }
1516
1517 static PyObject *
set_difference_update(PySetObject * so,PyObject * args)1518 set_difference_update(PySetObject *so, PyObject *args)
1519 {
1520 Py_ssize_t i;
1521
1522 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) {
1523 PyObject *other = PyTuple_GET_ITEM(args, i);
1524 if (set_difference_update_internal(so, other))
1525 return NULL;
1526 }
1527 Py_RETURN_NONE;
1528 }
1529
1530 PyDoc_STRVAR(difference_update_doc,
1531 "Remove all elements of another set from this set.");
1532
1533 static PyObject *
set_copy_and_difference(PySetObject * so,PyObject * other)1534 set_copy_and_difference(PySetObject *so, PyObject *other)
1535 {
1536 PyObject *result;
1537
1538 result = set_copy(so);
1539 if (result == NULL)
1540 return NULL;
1541 if (set_difference_update_internal((PySetObject *) result, other) == 0)
1542 return result;
1543 Py_DECREF(result);
1544 return NULL;
1545 }
1546
1547 static PyObject *
set_difference(PySetObject * so,PyObject * other)1548 set_difference(PySetObject *so, PyObject *other)
1549 {
1550 PyObject *result;
1551 PyObject *key;
1552 Py_hash_t hash;
1553 setentry *entry;
1554 Py_ssize_t pos = 0;
1555 int rv;
1556
1557 if (PySet_GET_SIZE(so) == 0) {
1558 return set_copy(so);
1559 }
1560
1561 if (!PyAnySet_Check(other) && !PyDict_CheckExact(other)) {
1562 return set_copy_and_difference(so, other);
1563 }
1564
1565 /* If len(so) much more than len(other), it's more efficient to simply copy
1566 * so and then iterate other looking for common elements. */
1567 if ((PySet_GET_SIZE(so) >> 2) > PyObject_Size(other)) {
1568 return set_copy_and_difference(so, other);
1569 }
1570
1571 result = make_new_set_basetype(Py_TYPE(so), NULL);
1572 if (result == NULL)
1573 return NULL;
1574
1575 if (PyDict_CheckExact(other)) {
1576 while (set_next(so, &pos, &entry)) {
1577 key = entry->key;
1578 hash = entry->hash;
1579 rv = _PyDict_Contains(other, key, hash);
1580 if (rv < 0) {
1581 Py_DECREF(result);
1582 return NULL;
1583 }
1584 if (!rv) {
1585 if (set_add_entry((PySetObject *)result, key, hash)) {
1586 Py_DECREF(result);
1587 return NULL;
1588 }
1589 }
1590 }
1591 return result;
1592 }
1593
1594 /* Iterate over so, checking for common elements in other. */
1595 while (set_next(so, &pos, &entry)) {
1596 key = entry->key;
1597 hash = entry->hash;
1598 rv = set_contains_entry((PySetObject *)other, key, hash);
1599 if (rv < 0) {
1600 Py_DECREF(result);
1601 return NULL;
1602 }
1603 if (!rv) {
1604 if (set_add_entry((PySetObject *)result, key, hash)) {
1605 Py_DECREF(result);
1606 return NULL;
1607 }
1608 }
1609 }
1610 return result;
1611 }
1612
1613 static PyObject *
set_difference_multi(PySetObject * so,PyObject * args)1614 set_difference_multi(PySetObject *so, PyObject *args)
1615 {
1616 Py_ssize_t i;
1617 PyObject *result, *other;
1618
1619 if (PyTuple_GET_SIZE(args) == 0)
1620 return set_copy(so);
1621
1622 other = PyTuple_GET_ITEM(args, 0);
1623 result = set_difference(so, other);
1624 if (result == NULL)
1625 return NULL;
1626
1627 for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) {
1628 other = PyTuple_GET_ITEM(args, i);
1629 if (set_difference_update_internal((PySetObject *)result, other)) {
1630 Py_DECREF(result);
1631 return NULL;
1632 }
1633 }
1634 return result;
1635 }
1636
1637 PyDoc_STRVAR(difference_doc,
1638 "Return the difference of two or more sets as a new set.\n\
1639 \n\
1640 (i.e. all elements that are in this set but not the others.)");
1641 static PyObject *
set_sub(PySetObject * so,PyObject * other)1642 set_sub(PySetObject *so, PyObject *other)
1643 {
1644 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1645 Py_RETURN_NOTIMPLEMENTED;
1646 return set_difference(so, other);
1647 }
1648
1649 static PyObject *
set_isub(PySetObject * so,PyObject * other)1650 set_isub(PySetObject *so, PyObject *other)
1651 {
1652 if (!PyAnySet_Check(other))
1653 Py_RETURN_NOTIMPLEMENTED;
1654 if (set_difference_update_internal(so, other))
1655 return NULL;
1656 Py_INCREF(so);
1657 return (PyObject *)so;
1658 }
1659
1660 static PyObject *
set_symmetric_difference_update(PySetObject * so,PyObject * other)1661 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1662 {
1663 PySetObject *otherset;
1664 PyObject *key;
1665 Py_ssize_t pos = 0;
1666 Py_hash_t hash;
1667 setentry *entry;
1668 int rv;
1669
1670 if ((PyObject *)so == other)
1671 return set_clear(so);
1672
1673 if (PyDict_CheckExact(other)) {
1674 PyObject *value;
1675 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1676 Py_INCREF(key);
1677 rv = set_discard_entry(so, key, hash);
1678 if (rv < 0) {
1679 Py_DECREF(key);
1680 return NULL;
1681 }
1682 if (rv == DISCARD_NOTFOUND) {
1683 if (set_add_entry(so, key, hash)) {
1684 Py_DECREF(key);
1685 return NULL;
1686 }
1687 }
1688 Py_DECREF(key);
1689 }
1690 Py_RETURN_NONE;
1691 }
1692
1693 if (PyAnySet_Check(other)) {
1694 Py_INCREF(other);
1695 otherset = (PySetObject *)other;
1696 } else {
1697 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1698 if (otherset == NULL)
1699 return NULL;
1700 }
1701
1702 while (set_next(otherset, &pos, &entry)) {
1703 key = entry->key;
1704 hash = entry->hash;
1705 rv = set_discard_entry(so, key, hash);
1706 if (rv < 0) {
1707 Py_DECREF(otherset);
1708 return NULL;
1709 }
1710 if (rv == DISCARD_NOTFOUND) {
1711 if (set_add_entry(so, key, hash)) {
1712 Py_DECREF(otherset);
1713 return NULL;
1714 }
1715 }
1716 }
1717 Py_DECREF(otherset);
1718 Py_RETURN_NONE;
1719 }
1720
1721 PyDoc_STRVAR(symmetric_difference_update_doc,
1722 "Update a set with the symmetric difference of itself and another.");
1723
1724 static PyObject *
set_symmetric_difference(PySetObject * so,PyObject * other)1725 set_symmetric_difference(PySetObject *so, PyObject *other)
1726 {
1727 PyObject *rv;
1728 PySetObject *otherset;
1729
1730 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other);
1731 if (otherset == NULL)
1732 return NULL;
1733 rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1734 if (rv == NULL)
1735 return NULL;
1736 Py_DECREF(rv);
1737 return (PyObject *)otherset;
1738 }
1739
1740 PyDoc_STRVAR(symmetric_difference_doc,
1741 "Return the symmetric difference of two sets as a new set.\n\
1742 \n\
1743 (i.e. all elements that are in exactly one of the sets.)");
1744
1745 static PyObject *
set_xor(PySetObject * so,PyObject * other)1746 set_xor(PySetObject *so, PyObject *other)
1747 {
1748 if (!PyAnySet_Check(so) || !PyAnySet_Check(other))
1749 Py_RETURN_NOTIMPLEMENTED;
1750 return set_symmetric_difference(so, other);
1751 }
1752
1753 static PyObject *
set_ixor(PySetObject * so,PyObject * other)1754 set_ixor(PySetObject *so, PyObject *other)
1755 {
1756 PyObject *result;
1757
1758 if (!PyAnySet_Check(other))
1759 Py_RETURN_NOTIMPLEMENTED;
1760 result = set_symmetric_difference_update(so, other);
1761 if (result == NULL)
1762 return NULL;
1763 Py_DECREF(result);
1764 Py_INCREF(so);
1765 return (PyObject *)so;
1766 }
1767
1768 static PyObject *
set_issubset(PySetObject * so,PyObject * other)1769 set_issubset(PySetObject *so, PyObject *other)
1770 {
1771 setentry *entry;
1772 Py_ssize_t pos = 0;
1773 int rv;
1774
1775 if (!PyAnySet_Check(other)) {
1776 PyObject *tmp, *result;
1777 tmp = make_new_set(&PySet_Type, other);
1778 if (tmp == NULL)
1779 return NULL;
1780 result = set_issubset(so, tmp);
1781 Py_DECREF(tmp);
1782 return result;
1783 }
1784 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1785 Py_RETURN_FALSE;
1786
1787 while (set_next(so, &pos, &entry)) {
1788 rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash);
1789 if (rv < 0)
1790 return NULL;
1791 if (!rv)
1792 Py_RETURN_FALSE;
1793 }
1794 Py_RETURN_TRUE;
1795 }
1796
1797 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1798
1799 static PyObject *
set_issuperset(PySetObject * so,PyObject * other)1800 set_issuperset(PySetObject *so, PyObject *other)
1801 {
1802 PyObject *tmp, *result;
1803
1804 if (!PyAnySet_Check(other)) {
1805 tmp = make_new_set(&PySet_Type, other);
1806 if (tmp == NULL)
1807 return NULL;
1808 result = set_issuperset(so, tmp);
1809 Py_DECREF(tmp);
1810 return result;
1811 }
1812 return set_issubset((PySetObject *)other, (PyObject *)so);
1813 }
1814
1815 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1816
1817 static PyObject *
set_richcompare(PySetObject * v,PyObject * w,int op)1818 set_richcompare(PySetObject *v, PyObject *w, int op)
1819 {
1820 PyObject *r1;
1821 int r2;
1822
1823 if(!PyAnySet_Check(w))
1824 Py_RETURN_NOTIMPLEMENTED;
1825
1826 switch (op) {
1827 case Py_EQ:
1828 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1829 Py_RETURN_FALSE;
1830 if (v->hash != -1 &&
1831 ((PySetObject *)w)->hash != -1 &&
1832 v->hash != ((PySetObject *)w)->hash)
1833 Py_RETURN_FALSE;
1834 return set_issubset(v, w);
1835 case Py_NE:
1836 r1 = set_richcompare(v, w, Py_EQ);
1837 if (r1 == NULL)
1838 return NULL;
1839 r2 = PyObject_IsTrue(r1);
1840 Py_DECREF(r1);
1841 if (r2 < 0)
1842 return NULL;
1843 return PyBool_FromLong(!r2);
1844 case Py_LE:
1845 return set_issubset(v, w);
1846 case Py_GE:
1847 return set_issuperset(v, w);
1848 case Py_LT:
1849 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1850 Py_RETURN_FALSE;
1851 return set_issubset(v, w);
1852 case Py_GT:
1853 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1854 Py_RETURN_FALSE;
1855 return set_issuperset(v, w);
1856 }
1857 Py_RETURN_NOTIMPLEMENTED;
1858 }
1859
1860 static PyObject *
set_add(PySetObject * so,PyObject * key)1861 set_add(PySetObject *so, PyObject *key)
1862 {
1863 if (set_add_key(so, key))
1864 return NULL;
1865 Py_RETURN_NONE;
1866 }
1867
1868 PyDoc_STRVAR(add_doc,
1869 "Add an element to a set.\n\
1870 \n\
1871 This has no effect if the element is already present.");
1872
1873 static int
set_contains(PySetObject * so,PyObject * key)1874 set_contains(PySetObject *so, PyObject *key)
1875 {
1876 PyObject *tmpkey;
1877 int rv;
1878
1879 rv = set_contains_key(so, key);
1880 if (rv < 0) {
1881 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1882 return -1;
1883 PyErr_Clear();
1884 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1885 if (tmpkey == NULL)
1886 return -1;
1887 rv = set_contains_key(so, tmpkey);
1888 Py_DECREF(tmpkey);
1889 }
1890 return rv;
1891 }
1892
1893 static PyObject *
set_direct_contains(PySetObject * so,PyObject * key)1894 set_direct_contains(PySetObject *so, PyObject *key)
1895 {
1896 long result;
1897
1898 result = set_contains(so, key);
1899 if (result < 0)
1900 return NULL;
1901 return PyBool_FromLong(result);
1902 }
1903
1904 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1905
1906 static PyObject *
set_remove(PySetObject * so,PyObject * key)1907 set_remove(PySetObject *so, PyObject *key)
1908 {
1909 PyObject *tmpkey;
1910 int rv;
1911
1912 rv = set_discard_key(so, key);
1913 if (rv < 0) {
1914 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1915 return NULL;
1916 PyErr_Clear();
1917 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1918 if (tmpkey == NULL)
1919 return NULL;
1920 rv = set_discard_key(so, tmpkey);
1921 Py_DECREF(tmpkey);
1922 if (rv < 0)
1923 return NULL;
1924 }
1925
1926 if (rv == DISCARD_NOTFOUND) {
1927 _PyErr_SetKeyError(key);
1928 return NULL;
1929 }
1930 Py_RETURN_NONE;
1931 }
1932
1933 PyDoc_STRVAR(remove_doc,
1934 "Remove an element from a set; it must be a member.\n\
1935 \n\
1936 If the element is not a member, raise a KeyError.");
1937
1938 static PyObject *
set_discard(PySetObject * so,PyObject * key)1939 set_discard(PySetObject *so, PyObject *key)
1940 {
1941 PyObject *tmpkey;
1942 int rv;
1943
1944 rv = set_discard_key(so, key);
1945 if (rv < 0) {
1946 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1947 return NULL;
1948 PyErr_Clear();
1949 tmpkey = make_new_set(&PyFrozenSet_Type, key);
1950 if (tmpkey == NULL)
1951 return NULL;
1952 rv = set_discard_key(so, tmpkey);
1953 Py_DECREF(tmpkey);
1954 if (rv < 0)
1955 return NULL;
1956 }
1957 Py_RETURN_NONE;
1958 }
1959
1960 PyDoc_STRVAR(discard_doc,
1961 "Remove an element from a set if it is a member.\n\
1962 \n\
1963 If the element is not a member, do nothing.");
1964
1965 static PyObject *
set_reduce(PySetObject * so)1966 set_reduce(PySetObject *so)
1967 {
1968 PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1969 _Py_IDENTIFIER(__dict__);
1970
1971 keys = PySequence_List((PyObject *)so);
1972 if (keys == NULL)
1973 goto done;
1974 args = PyTuple_Pack(1, keys);
1975 if (args == NULL)
1976 goto done;
1977 dict = _PyObject_GetAttrId((PyObject *)so, &PyId___dict__);
1978 if (dict == NULL) {
1979 PyErr_Clear();
1980 dict = Py_None;
1981 Py_INCREF(dict);
1982 }
1983 result = PyTuple_Pack(3, Py_TYPE(so), args, dict);
1984 done:
1985 Py_XDECREF(args);
1986 Py_XDECREF(keys);
1987 Py_XDECREF(dict);
1988 return result;
1989 }
1990
1991 static PyObject *
set_sizeof(PySetObject * so)1992 set_sizeof(PySetObject *so)
1993 {
1994 Py_ssize_t res;
1995
1996 res = _PyObject_SIZE(Py_TYPE(so));
1997 if (so->table != so->smalltable)
1998 res = res + (so->mask + 1) * sizeof(setentry);
1999 return PyLong_FromSsize_t(res);
2000 }
2001
2002 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes");
2003 static int
set_init(PySetObject * self,PyObject * args,PyObject * kwds)2004 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
2005 {
2006 PyObject *iterable = NULL;
2007
2008 if (kwds != NULL && !_PyArg_NoKeywords("set()", kwds))
2009 return -1;
2010 if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable))
2011 return -1;
2012 if (self->fill)
2013 set_clear_internal(self);
2014 self->hash = -1;
2015 if (iterable == NULL)
2016 return 0;
2017 return set_update_internal(self, iterable);
2018 }
2019
2020 static PySequenceMethods set_as_sequence = {
2021 set_len, /* sq_length */
2022 0, /* sq_concat */
2023 0, /* sq_repeat */
2024 0, /* sq_item */
2025 0, /* sq_slice */
2026 0, /* sq_ass_item */
2027 0, /* sq_ass_slice */
2028 (objobjproc)set_contains, /* sq_contains */
2029 };
2030
2031 /* set object ********************************************************/
2032
2033 #ifdef Py_DEBUG
2034 static PyObject *test_c_api(PySetObject *so);
2035
2036 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
2037 All is well if assertions don't fail.");
2038 #endif
2039
2040 static PyMethodDef set_methods[] = {
2041 {"add", (PyCFunction)set_add, METH_O,
2042 add_doc},
2043 {"clear", (PyCFunction)set_clear, METH_NOARGS,
2044 clear_doc},
2045 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2046 contains_doc},
2047 {"copy", (PyCFunction)set_copy, METH_NOARGS,
2048 copy_doc},
2049 {"discard", (PyCFunction)set_discard, METH_O,
2050 discard_doc},
2051 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2052 difference_doc},
2053 {"difference_update", (PyCFunction)set_difference_update, METH_VARARGS,
2054 difference_update_doc},
2055 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS,
2056 intersection_doc},
2057 {"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS,
2058 intersection_update_doc},
2059 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2060 isdisjoint_doc},
2061 {"issubset", (PyCFunction)set_issubset, METH_O,
2062 issubset_doc},
2063 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2064 issuperset_doc},
2065 {"pop", (PyCFunction)set_pop, METH_NOARGS,
2066 pop_doc},
2067 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2068 reduce_doc},
2069 {"remove", (PyCFunction)set_remove, METH_O,
2070 remove_doc},
2071 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2072 sizeof_doc},
2073 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2074 symmetric_difference_doc},
2075 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
2076 symmetric_difference_update_doc},
2077 #ifdef Py_DEBUG
2078 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
2079 test_c_api_doc},
2080 #endif
2081 {"union", (PyCFunction)set_union, METH_VARARGS,
2082 union_doc},
2083 {"update", (PyCFunction)set_update, METH_VARARGS,
2084 update_doc},
2085 {NULL, NULL} /* sentinel */
2086 };
2087
2088 static PyNumberMethods set_as_number = {
2089 0, /*nb_add*/
2090 (binaryfunc)set_sub, /*nb_subtract*/
2091 0, /*nb_multiply*/
2092 0, /*nb_remainder*/
2093 0, /*nb_divmod*/
2094 0, /*nb_power*/
2095 0, /*nb_negative*/
2096 0, /*nb_positive*/
2097 0, /*nb_absolute*/
2098 0, /*nb_bool*/
2099 0, /*nb_invert*/
2100 0, /*nb_lshift*/
2101 0, /*nb_rshift*/
2102 (binaryfunc)set_and, /*nb_and*/
2103 (binaryfunc)set_xor, /*nb_xor*/
2104 (binaryfunc)set_or, /*nb_or*/
2105 0, /*nb_int*/
2106 0, /*nb_reserved*/
2107 0, /*nb_float*/
2108 0, /*nb_inplace_add*/
2109 (binaryfunc)set_isub, /*nb_inplace_subtract*/
2110 0, /*nb_inplace_multiply*/
2111 0, /*nb_inplace_remainder*/
2112 0, /*nb_inplace_power*/
2113 0, /*nb_inplace_lshift*/
2114 0, /*nb_inplace_rshift*/
2115 (binaryfunc)set_iand, /*nb_inplace_and*/
2116 (binaryfunc)set_ixor, /*nb_inplace_xor*/
2117 (binaryfunc)set_ior, /*nb_inplace_or*/
2118 };
2119
2120 PyDoc_STRVAR(set_doc,
2121 "set() -> new empty set object\n\
2122 set(iterable) -> new set object\n\
2123 \n\
2124 Build an unordered collection of unique elements.");
2125
2126 PyTypeObject PySet_Type = {
2127 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2128 "set", /* tp_name */
2129 sizeof(PySetObject), /* tp_basicsize */
2130 0, /* tp_itemsize */
2131 /* methods */
2132 (destructor)set_dealloc, /* tp_dealloc */
2133 0, /* tp_print */
2134 0, /* tp_getattr */
2135 0, /* tp_setattr */
2136 0, /* tp_reserved */
2137 (reprfunc)set_repr, /* tp_repr */
2138 &set_as_number, /* tp_as_number */
2139 &set_as_sequence, /* tp_as_sequence */
2140 0, /* tp_as_mapping */
2141 PyObject_HashNotImplemented, /* tp_hash */
2142 0, /* tp_call */
2143 0, /* tp_str */
2144 PyObject_GenericGetAttr, /* tp_getattro */
2145 0, /* tp_setattro */
2146 0, /* tp_as_buffer */
2147 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2148 Py_TPFLAGS_BASETYPE, /* tp_flags */
2149 set_doc, /* tp_doc */
2150 (traverseproc)set_traverse, /* tp_traverse */
2151 (inquiry)set_clear_internal, /* tp_clear */
2152 (richcmpfunc)set_richcompare, /* tp_richcompare */
2153 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2154 (getiterfunc)set_iter, /* tp_iter */
2155 0, /* tp_iternext */
2156 set_methods, /* tp_methods */
2157 0, /* tp_members */
2158 0, /* tp_getset */
2159 0, /* tp_base */
2160 0, /* tp_dict */
2161 0, /* tp_descr_get */
2162 0, /* tp_descr_set */
2163 0, /* tp_dictoffset */
2164 (initproc)set_init, /* tp_init */
2165 PyType_GenericAlloc, /* tp_alloc */
2166 set_new, /* tp_new */
2167 PyObject_GC_Del, /* tp_free */
2168 };
2169
2170 /* frozenset object ********************************************************/
2171
2172
2173 static PyMethodDef frozenset_methods[] = {
2174 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
2175 contains_doc},
2176 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
2177 copy_doc},
2178 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS,
2179 difference_doc},
2180 {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS,
2181 intersection_doc},
2182 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O,
2183 isdisjoint_doc},
2184 {"issubset", (PyCFunction)set_issubset, METH_O,
2185 issubset_doc},
2186 {"issuperset", (PyCFunction)set_issuperset, METH_O,
2187 issuperset_doc},
2188 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
2189 reduce_doc},
2190 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS,
2191 sizeof_doc},
2192 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
2193 symmetric_difference_doc},
2194 {"union", (PyCFunction)set_union, METH_VARARGS,
2195 union_doc},
2196 {NULL, NULL} /* sentinel */
2197 };
2198
2199 static PyNumberMethods frozenset_as_number = {
2200 0, /*nb_add*/
2201 (binaryfunc)set_sub, /*nb_subtract*/
2202 0, /*nb_multiply*/
2203 0, /*nb_remainder*/
2204 0, /*nb_divmod*/
2205 0, /*nb_power*/
2206 0, /*nb_negative*/
2207 0, /*nb_positive*/
2208 0, /*nb_absolute*/
2209 0, /*nb_bool*/
2210 0, /*nb_invert*/
2211 0, /*nb_lshift*/
2212 0, /*nb_rshift*/
2213 (binaryfunc)set_and, /*nb_and*/
2214 (binaryfunc)set_xor, /*nb_xor*/
2215 (binaryfunc)set_or, /*nb_or*/
2216 };
2217
2218 PyDoc_STRVAR(frozenset_doc,
2219 "frozenset() -> empty frozenset object\n\
2220 frozenset(iterable) -> frozenset object\n\
2221 \n\
2222 Build an immutable unordered collection of unique elements.");
2223
2224 PyTypeObject PyFrozenSet_Type = {
2225 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2226 "frozenset", /* tp_name */
2227 sizeof(PySetObject), /* tp_basicsize */
2228 0, /* tp_itemsize */
2229 /* methods */
2230 (destructor)set_dealloc, /* tp_dealloc */
2231 0, /* tp_print */
2232 0, /* tp_getattr */
2233 0, /* tp_setattr */
2234 0, /* tp_reserved */
2235 (reprfunc)set_repr, /* tp_repr */
2236 &frozenset_as_number, /* tp_as_number */
2237 &set_as_sequence, /* tp_as_sequence */
2238 0, /* tp_as_mapping */
2239 frozenset_hash, /* tp_hash */
2240 0, /* tp_call */
2241 0, /* tp_str */
2242 PyObject_GenericGetAttr, /* tp_getattro */
2243 0, /* tp_setattro */
2244 0, /* tp_as_buffer */
2245 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2246 Py_TPFLAGS_BASETYPE, /* tp_flags */
2247 frozenset_doc, /* tp_doc */
2248 (traverseproc)set_traverse, /* tp_traverse */
2249 (inquiry)set_clear_internal, /* tp_clear */
2250 (richcmpfunc)set_richcompare, /* tp_richcompare */
2251 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2252 (getiterfunc)set_iter, /* tp_iter */
2253 0, /* tp_iternext */
2254 frozenset_methods, /* tp_methods */
2255 0, /* tp_members */
2256 0, /* tp_getset */
2257 0, /* tp_base */
2258 0, /* tp_dict */
2259 0, /* tp_descr_get */
2260 0, /* tp_descr_set */
2261 0, /* tp_dictoffset */
2262 0, /* tp_init */
2263 PyType_GenericAlloc, /* tp_alloc */
2264 frozenset_new, /* tp_new */
2265 PyObject_GC_Del, /* tp_free */
2266 };
2267
2268
2269 /***** C API functions *************************************************/
2270
2271 PyObject *
PySet_New(PyObject * iterable)2272 PySet_New(PyObject *iterable)
2273 {
2274 return make_new_set(&PySet_Type, iterable);
2275 }
2276
2277 PyObject *
PyFrozenSet_New(PyObject * iterable)2278 PyFrozenSet_New(PyObject *iterable)
2279 {
2280 return make_new_set(&PyFrozenSet_Type, iterable);
2281 }
2282
2283 Py_ssize_t
PySet_Size(PyObject * anyset)2284 PySet_Size(PyObject *anyset)
2285 {
2286 if (!PyAnySet_Check(anyset)) {
2287 PyErr_BadInternalCall();
2288 return -1;
2289 }
2290 return PySet_GET_SIZE(anyset);
2291 }
2292
2293 int
PySet_Clear(PyObject * set)2294 PySet_Clear(PyObject *set)
2295 {
2296 if (!PySet_Check(set)) {
2297 PyErr_BadInternalCall();
2298 return -1;
2299 }
2300 return set_clear_internal((PySetObject *)set);
2301 }
2302
2303 int
PySet_Contains(PyObject * anyset,PyObject * key)2304 PySet_Contains(PyObject *anyset, PyObject *key)
2305 {
2306 if (!PyAnySet_Check(anyset)) {
2307 PyErr_BadInternalCall();
2308 return -1;
2309 }
2310 return set_contains_key((PySetObject *)anyset, key);
2311 }
2312
2313 int
PySet_Discard(PyObject * set,PyObject * key)2314 PySet_Discard(PyObject *set, PyObject *key)
2315 {
2316 if (!PySet_Check(set)) {
2317 PyErr_BadInternalCall();
2318 return -1;
2319 }
2320 return set_discard_key((PySetObject *)set, key);
2321 }
2322
2323 int
PySet_Add(PyObject * anyset,PyObject * key)2324 PySet_Add(PyObject *anyset, PyObject *key)
2325 {
2326 if (!PySet_Check(anyset) &&
2327 (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) {
2328 PyErr_BadInternalCall();
2329 return -1;
2330 }
2331 return set_add_key((PySetObject *)anyset, key);
2332 }
2333
2334 int
PySet_ClearFreeList(void)2335 PySet_ClearFreeList(void)
2336 {
2337 return 0;
2338 }
2339
2340 void
PySet_Fini(void)2341 PySet_Fini(void)
2342 {
2343 Py_CLEAR(emptyfrozenset);
2344 }
2345
2346 int
_PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2347 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash)
2348 {
2349 setentry *entry;
2350
2351 if (!PyAnySet_Check(set)) {
2352 PyErr_BadInternalCall();
2353 return -1;
2354 }
2355 if (set_next((PySetObject *)set, pos, &entry) == 0)
2356 return 0;
2357 *key = entry->key;
2358 *hash = entry->hash;
2359 return 1;
2360 }
2361
2362 PyObject *
PySet_Pop(PyObject * set)2363 PySet_Pop(PyObject *set)
2364 {
2365 if (!PySet_Check(set)) {
2366 PyErr_BadInternalCall();
2367 return NULL;
2368 }
2369 return set_pop((PySetObject *)set);
2370 }
2371
2372 int
_PySet_Update(PyObject * set,PyObject * iterable)2373 _PySet_Update(PyObject *set, PyObject *iterable)
2374 {
2375 if (!PySet_Check(set)) {
2376 PyErr_BadInternalCall();
2377 return -1;
2378 }
2379 return set_update_internal((PySetObject *)set, iterable);
2380 }
2381
2382 /* Exported for the gdb plugin's benefit. */
2383 PyObject *_PySet_Dummy = dummy;
2384
2385 #ifdef Py_DEBUG
2386
2387 /* Test code to be called with any three element set.
2388 Returns True and original set is restored. */
2389
2390 #define assertRaises(call_return_value, exception) \
2391 do { \
2392 assert(call_return_value); \
2393 assert(PyErr_ExceptionMatches(exception)); \
2394 PyErr_Clear(); \
2395 } while(0)
2396
2397 static PyObject *
test_c_api(PySetObject * so)2398 test_c_api(PySetObject *so)
2399 {
2400 Py_ssize_t count;
2401 char *s;
2402 Py_ssize_t i;
2403 PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL;
2404 PyObject *ob = (PyObject *)so;
2405 Py_hash_t hash;
2406 PyObject *str;
2407
2408 /* Verify preconditions */
2409 assert(PyAnySet_Check(ob));
2410 assert(PyAnySet_CheckExact(ob));
2411 assert(!PyFrozenSet_CheckExact(ob));
2412
2413 /* so.clear(); so |= set("abc"); */
2414 str = PyUnicode_FromString("abc");
2415 if (str == NULL)
2416 return NULL;
2417 set_clear_internal(so);
2418 if (set_update_internal(so, str)) {
2419 Py_DECREF(str);
2420 return NULL;
2421 }
2422 Py_DECREF(str);
2423
2424 /* Exercise type/size checks */
2425 assert(PySet_Size(ob) == 3);
2426 assert(PySet_GET_SIZE(ob) == 3);
2427
2428 /* Raise TypeError for non-iterable constructor arguments */
2429 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2430 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2431
2432 /* Raise TypeError for unhashable key */
2433 dup = PySet_New(ob);
2434 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2435 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2436 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2437
2438 /* Exercise successful pop, contains, add, and discard */
2439 elem = PySet_Pop(ob);
2440 assert(PySet_Contains(ob, elem) == 0);
2441 assert(PySet_GET_SIZE(ob) == 2);
2442 assert(PySet_Add(ob, elem) == 0);
2443 assert(PySet_Contains(ob, elem) == 1);
2444 assert(PySet_GET_SIZE(ob) == 3);
2445 assert(PySet_Discard(ob, elem) == 1);
2446 assert(PySet_GET_SIZE(ob) == 2);
2447 assert(PySet_Discard(ob, elem) == 0);
2448 assert(PySet_GET_SIZE(ob) == 2);
2449
2450 /* Exercise clear */
2451 dup2 = PySet_New(dup);
2452 assert(PySet_Clear(dup2) == 0);
2453 assert(PySet_Size(dup2) == 0);
2454 Py_DECREF(dup2);
2455
2456 /* Raise SystemError on clear or update of frozen set */
2457 f = PyFrozenSet_New(dup);
2458 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2459 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2460 assert(PySet_Add(f, elem) == 0);
2461 Py_INCREF(f);
2462 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2463 Py_DECREF(f);
2464 Py_DECREF(f);
2465
2466 /* Exercise direct iteration */
2467 i = 0, count = 0;
2468 while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) {
2469 s = PyUnicode_AsUTF8(x);
2470 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2471 count++;
2472 }
2473 assert(count == 3);
2474
2475 /* Exercise updates */
2476 dup2 = PySet_New(NULL);
2477 assert(_PySet_Update(dup2, dup) == 0);
2478 assert(PySet_Size(dup2) == 3);
2479 assert(_PySet_Update(dup2, dup) == 0);
2480 assert(PySet_Size(dup2) == 3);
2481 Py_DECREF(dup2);
2482
2483 /* Raise SystemError when self argument is not a set or frozenset. */
2484 t = PyTuple_New(0);
2485 assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2486 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2487 Py_DECREF(t);
2488
2489 /* Raise SystemError when self argument is not a set. */
2490 f = PyFrozenSet_New(dup);
2491 assert(PySet_Size(f) == 3);
2492 assert(PyFrozenSet_CheckExact(f));
2493 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2494 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2495 Py_DECREF(f);
2496
2497 /* Raise KeyError when popping from an empty set */
2498 assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2499 Py_DECREF(ob);
2500 assert(PySet_GET_SIZE(ob) == 0);
2501 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2502
2503 /* Restore the set from the copy using the PyNumber API */
2504 assert(PyNumber_InPlaceOr(ob, dup) == ob);
2505 Py_DECREF(ob);
2506
2507 /* Verify constructors accept NULL arguments */
2508 f = PySet_New(NULL);
2509 assert(f != NULL);
2510 assert(PySet_GET_SIZE(f) == 0);
2511 Py_DECREF(f);
2512 f = PyFrozenSet_New(NULL);
2513 assert(f != NULL);
2514 assert(PyFrozenSet_CheckExact(f));
2515 assert(PySet_GET_SIZE(f) == 0);
2516 Py_DECREF(f);
2517
2518 Py_DECREF(elem);
2519 Py_DECREF(dup);
2520 Py_RETURN_TRUE;
2521 }
2522
2523 #undef assertRaises
2524
2525 #endif
2526
2527 /***** Dummy Struct *************************************************/
2528
2529 static PyObject *
dummy_repr(PyObject * op)2530 dummy_repr(PyObject *op)
2531 {
2532 return PyUnicode_FromString("<dummy key>");
2533 }
2534
2535 static void
dummy_dealloc(PyObject * ignore)2536 dummy_dealloc(PyObject* ignore)
2537 {
2538 Py_FatalError("deallocating <dummy key>");
2539 }
2540
2541 static PyTypeObject _PySetDummy_Type = {
2542 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2543 "<dummy key> type",
2544 0,
2545 0,
2546 dummy_dealloc, /*tp_dealloc*/ /*never called*/
2547 0, /*tp_print*/
2548 0, /*tp_getattr*/
2549 0, /*tp_setattr*/
2550 0, /*tp_reserved*/
2551 dummy_repr, /*tp_repr*/
2552 0, /*tp_as_number*/
2553 0, /*tp_as_sequence*/
2554 0, /*tp_as_mapping*/
2555 0, /*tp_hash */
2556 0, /*tp_call */
2557 0, /*tp_str */
2558 0, /*tp_getattro */
2559 0, /*tp_setattro */
2560 0, /*tp_as_buffer */
2561 Py_TPFLAGS_DEFAULT, /*tp_flags */
2562 };
2563
2564 static PyObject _dummy_struct = {
2565 _PyObject_EXTRA_INIT
2566 2, &_PySetDummy_Type
2567 };
2568
2569