1 2 /* set object implementation 3 4 Written and maintained by Raymond D. Hettinger <python@rcn.com> 5 Derived from Lib/sets.py and Objects/dictobject.c. 6 7 The basic lookup function used by all operations. 8 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. 9 10 The initial probe index is computed as hash mod the table size. 11 Subsequent probe indices are computed as explained in Objects/dictobject.c. 12 13 To improve cache locality, each probe inspects a series of consecutive 14 nearby entries before moving on to probes elsewhere in memory. This leaves 15 us with a hybrid of linear probing and randomized probing. The linear probing 16 reduces the cost of hash collisions because consecutive memory accesses 17 tend to be much cheaper than scattered probes. After LINEAR_PROBES steps, 18 we then use more of the upper bits from the hash value and apply a simple 19 linear congruential random number genearator. This helps break-up long 20 chains of collisions. 21 22 All arithmetic on hash should ignore overflow. 23 24 Unlike the dictionary implementation, the lookkey function can return 25 NULL if the rich comparison returns an error. 26 27 Use cases for sets differ considerably from dictionaries where looked-up 28 keys are more likely to be present. In contrast, sets are primarily 29 about membership testing where the presence of an element is not known in 30 advance. Accordingly, the set implementation needs to optimize for both 31 the found and not-found case. 32 */ 33 34 #include "Python.h" 35 #include "pycore_object.h" // _PyObject_GC_UNTRACK() 36 #include <stddef.h> // offsetof() 37 38 /* Object used as dummy key to fill deleted entries */ 39 static PyObject _dummy_struct; 40 41 #define dummy (&_dummy_struct) 42 43 44 /* ======================================================================== */ 45 /* ======= Begin logic for probing the hash table ========================= */ 46 47 /* Set this to zero to turn-off linear probing */ 48 #ifndef LINEAR_PROBES 49 #define LINEAR_PROBES 9 50 #endif 51 52 /* This must be >= 1 */ 53 #define PERTURB_SHIFT 5 54 55 static setentry * set_lookkey(PySetObject * so,PyObject * key,Py_hash_t hash)56 set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) 57 { 58 setentry *table; 59 setentry *entry; 60 size_t perturb = hash; 61 size_t mask = so->mask; 62 size_t i = (size_t)hash & mask; /* Unsigned for defined overflow behavior */ 63 int probes; 64 int cmp; 65 66 while (1) { 67 entry = &so->table[i]; 68 probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0; 69 do { 70 if (entry->hash == 0 && entry->key == NULL) 71 return entry; 72 if (entry->hash == hash) { 73 PyObject *startkey = entry->key; 74 assert(startkey != dummy); 75 if (startkey == key) 76 return entry; 77 if (PyUnicode_CheckExact(startkey) 78 && PyUnicode_CheckExact(key) 79 && _PyUnicode_EQ(startkey, key)) 80 return entry; 81 table = so->table; 82 Py_INCREF(startkey); 83 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); 84 Py_DECREF(startkey); 85 if (cmp < 0) 86 return NULL; 87 if (table != so->table || entry->key != startkey) 88 return set_lookkey(so, key, hash); 89 if (cmp > 0) 90 return entry; 91 mask = so->mask; 92 } 93 entry++; 94 } while (probes--); 95 perturb >>= PERTURB_SHIFT; 96 i = (i * 5 + 1 + perturb) & mask; 97 } 98 } 99 100 static int set_table_resize(PySetObject *, Py_ssize_t); 101 102 static int set_add_entry(PySetObject * so,PyObject * key,Py_hash_t hash)103 set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash) 104 { 105 setentry *table; 106 setentry *entry; 107 size_t perturb; 108 size_t mask; 109 size_t i; /* Unsigned for defined overflow behavior */ 110 int probes; 111 int cmp; 112 113 /* Pre-increment is necessary to prevent arbitrary code in the rich 114 comparison from deallocating the key just before the insertion. */ 115 Py_INCREF(key); 116 117 restart: 118 119 mask = so->mask; 120 i = (size_t)hash & mask; 121 perturb = hash; 122 123 while (1) { 124 entry = &so->table[i]; 125 probes = (i + LINEAR_PROBES <= mask) ? LINEAR_PROBES: 0; 126 do { 127 if (entry->hash == 0 && entry->key == NULL) 128 goto found_unused; 129 if (entry->hash == hash) { 130 PyObject *startkey = entry->key; 131 assert(startkey != dummy); 132 if (startkey == key) 133 goto found_active; 134 if (PyUnicode_CheckExact(startkey) 135 && PyUnicode_CheckExact(key) 136 && _PyUnicode_EQ(startkey, key)) 137 goto found_active; 138 table = so->table; 139 Py_INCREF(startkey); 140 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); 141 Py_DECREF(startkey); 142 if (cmp > 0) 143 goto found_active; 144 if (cmp < 0) 145 goto comparison_error; 146 if (table != so->table || entry->key != startkey) 147 goto restart; 148 mask = so->mask; 149 } 150 entry++; 151 } while (probes--); 152 perturb >>= PERTURB_SHIFT; 153 i = (i * 5 + 1 + perturb) & mask; 154 } 155 156 found_unused: 157 so->fill++; 158 so->used++; 159 entry->key = key; 160 entry->hash = hash; 161 if ((size_t)so->fill*5 < mask*3) 162 return 0; 163 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); 164 165 found_active: 166 Py_DECREF(key); 167 return 0; 168 169 comparison_error: 170 Py_DECREF(key); 171 return -1; 172 } 173 174 /* 175 Internal routine used by set_table_resize() to insert an item which is 176 known to be absent from the set. Besides the performance benefit, 177 there is also safety benefit since using set_add_entry() risks making 178 a callback in the middle of a set_table_resize(), see issue 1456209. 179 The caller is responsible for updating the key's reference count and 180 the setobject's fill and used fields. 181 */ 182 static void set_insert_clean(setentry * table,size_t mask,PyObject * key,Py_hash_t hash)183 set_insert_clean(setentry *table, size_t mask, PyObject *key, Py_hash_t hash) 184 { 185 setentry *entry; 186 size_t perturb = hash; 187 size_t i = (size_t)hash & mask; 188 size_t j; 189 190 while (1) { 191 entry = &table[i]; 192 if (entry->key == NULL) 193 goto found_null; 194 if (i + LINEAR_PROBES <= mask) { 195 for (j = 0; j < LINEAR_PROBES; j++) { 196 entry++; 197 if (entry->key == NULL) 198 goto found_null; 199 } 200 } 201 perturb >>= PERTURB_SHIFT; 202 i = (i * 5 + 1 + perturb) & mask; 203 } 204 found_null: 205 entry->key = key; 206 entry->hash = hash; 207 } 208 209 /* ======== End logic for probing the hash table ========================== */ 210 /* ======================================================================== */ 211 212 /* 213 Restructure the table by allocating a new table and reinserting all 214 keys again. When entries have been deleted, the new table may 215 actually be smaller than the old one. 216 */ 217 static int set_table_resize(PySetObject * so,Py_ssize_t minused)218 set_table_resize(PySetObject *so, Py_ssize_t minused) 219 { 220 setentry *oldtable, *newtable, *entry; 221 Py_ssize_t oldmask = so->mask; 222 size_t newmask; 223 int is_oldtable_malloced; 224 setentry small_copy[PySet_MINSIZE]; 225 226 assert(minused >= 0); 227 228 /* Find the smallest table size > minused. */ 229 /* XXX speed-up with intrinsics */ 230 size_t newsize = PySet_MINSIZE; 231 while (newsize <= (size_t)minused) { 232 newsize <<= 1; // The largest possible value is PY_SSIZE_T_MAX + 1. 233 } 234 235 /* Get space for a new table. */ 236 oldtable = so->table; 237 assert(oldtable != NULL); 238 is_oldtable_malloced = oldtable != so->smalltable; 239 240 if (newsize == PySet_MINSIZE) { 241 /* A large table is shrinking, or we can't get any smaller. */ 242 newtable = so->smalltable; 243 if (newtable == oldtable) { 244 if (so->fill == so->used) { 245 /* No dummies, so no point doing anything. */ 246 return 0; 247 } 248 /* We're not going to resize it, but rebuild the 249 table anyway to purge old dummy entries. 250 Subtle: This is *necessary* if fill==size, 251 as set_lookkey needs at least one virgin slot to 252 terminate failing searches. If fill < size, it's 253 merely desirable, as dummies slow searches. */ 254 assert(so->fill > so->used); 255 memcpy(small_copy, oldtable, sizeof(small_copy)); 256 oldtable = small_copy; 257 } 258 } 259 else { 260 newtable = PyMem_NEW(setentry, newsize); 261 if (newtable == NULL) { 262 PyErr_NoMemory(); 263 return -1; 264 } 265 } 266 267 /* Make the set empty, using the new table. */ 268 assert(newtable != oldtable); 269 memset(newtable, 0, sizeof(setentry) * newsize); 270 so->mask = newsize - 1; 271 so->table = newtable; 272 273 /* Copy the data over; this is refcount-neutral for active entries; 274 dummy entries aren't copied over, of course */ 275 newmask = (size_t)so->mask; 276 if (so->fill == so->used) { 277 for (entry = oldtable; entry <= oldtable + oldmask; entry++) { 278 if (entry->key != NULL) { 279 set_insert_clean(newtable, newmask, entry->key, entry->hash); 280 } 281 } 282 } else { 283 so->fill = so->used; 284 for (entry = oldtable; entry <= oldtable + oldmask; entry++) { 285 if (entry->key != NULL && entry->key != dummy) { 286 set_insert_clean(newtable, newmask, entry->key, entry->hash); 287 } 288 } 289 } 290 291 if (is_oldtable_malloced) 292 PyMem_DEL(oldtable); 293 return 0; 294 } 295 296 static int set_contains_entry(PySetObject * so,PyObject * key,Py_hash_t hash)297 set_contains_entry(PySetObject *so, PyObject *key, Py_hash_t hash) 298 { 299 setentry *entry; 300 301 entry = set_lookkey(so, key, hash); 302 if (entry != NULL) 303 return entry->key != NULL; 304 return -1; 305 } 306 307 #define DISCARD_NOTFOUND 0 308 #define DISCARD_FOUND 1 309 310 static int set_discard_entry(PySetObject * so,PyObject * key,Py_hash_t hash)311 set_discard_entry(PySetObject *so, PyObject *key, Py_hash_t hash) 312 { 313 setentry *entry; 314 PyObject *old_key; 315 316 entry = set_lookkey(so, key, hash); 317 if (entry == NULL) 318 return -1; 319 if (entry->key == NULL) 320 return DISCARD_NOTFOUND; 321 old_key = entry->key; 322 entry->key = dummy; 323 entry->hash = -1; 324 so->used--; 325 Py_DECREF(old_key); 326 return DISCARD_FOUND; 327 } 328 329 static int set_add_key(PySetObject * so,PyObject * key)330 set_add_key(PySetObject *so, PyObject *key) 331 { 332 Py_hash_t hash; 333 334 if (!PyUnicode_CheckExact(key) || 335 (hash = ((PyASCIIObject *) key)->hash) == -1) { 336 hash = PyObject_Hash(key); 337 if (hash == -1) 338 return -1; 339 } 340 return set_add_entry(so, key, hash); 341 } 342 343 static int set_contains_key(PySetObject * so,PyObject * key)344 set_contains_key(PySetObject *so, PyObject *key) 345 { 346 Py_hash_t hash; 347 348 if (!PyUnicode_CheckExact(key) || 349 (hash = ((PyASCIIObject *) key)->hash) == -1) { 350 hash = PyObject_Hash(key); 351 if (hash == -1) 352 return -1; 353 } 354 return set_contains_entry(so, key, hash); 355 } 356 357 static int set_discard_key(PySetObject * so,PyObject * key)358 set_discard_key(PySetObject *so, PyObject *key) 359 { 360 Py_hash_t hash; 361 362 if (!PyUnicode_CheckExact(key) || 363 (hash = ((PyASCIIObject *) key)->hash) == -1) { 364 hash = PyObject_Hash(key); 365 if (hash == -1) 366 return -1; 367 } 368 return set_discard_entry(so, key, hash); 369 } 370 371 static void set_empty_to_minsize(PySetObject * so)372 set_empty_to_minsize(PySetObject *so) 373 { 374 memset(so->smalltable, 0, sizeof(so->smalltable)); 375 so->fill = 0; 376 so->used = 0; 377 so->mask = PySet_MINSIZE - 1; 378 so->table = so->smalltable; 379 so->hash = -1; 380 } 381 382 static int set_clear_internal(PySetObject * so)383 set_clear_internal(PySetObject *so) 384 { 385 setentry *entry; 386 setentry *table = so->table; 387 Py_ssize_t fill = so->fill; 388 Py_ssize_t used = so->used; 389 int table_is_malloced = table != so->smalltable; 390 setentry small_copy[PySet_MINSIZE]; 391 392 assert (PyAnySet_Check(so)); 393 assert(table != NULL); 394 395 /* This is delicate. During the process of clearing the set, 396 * decrefs can cause the set to mutate. To avoid fatal confusion 397 * (voice of experience), we have to make the set empty before 398 * clearing the slots, and never refer to anything via so->ref while 399 * clearing. 400 */ 401 if (table_is_malloced) 402 set_empty_to_minsize(so); 403 404 else if (fill > 0) { 405 /* It's a small table with something that needs to be cleared. 406 * Afraid the only safe way is to copy the set entries into 407 * another small table first. 408 */ 409 memcpy(small_copy, table, sizeof(small_copy)); 410 table = small_copy; 411 set_empty_to_minsize(so); 412 } 413 /* else it's a small table that's already empty */ 414 415 /* Now we can finally clear things. If C had refcounts, we could 416 * assert that the refcount on table is 1 now, i.e. that this function 417 * has unique access to it, so decref side-effects can't alter it. 418 */ 419 for (entry = table; used > 0; entry++) { 420 if (entry->key && entry->key != dummy) { 421 used--; 422 Py_DECREF(entry->key); 423 } 424 } 425 426 if (table_is_malloced) 427 PyMem_DEL(table); 428 return 0; 429 } 430 431 /* 432 * Iterate over a set table. Use like so: 433 * 434 * Py_ssize_t pos; 435 * setentry *entry; 436 * pos = 0; # important! pos should not otherwise be changed by you 437 * while (set_next(yourset, &pos, &entry)) { 438 * Refer to borrowed reference in entry->key. 439 * } 440 * 441 * CAUTION: In general, it isn't safe to use set_next in a loop that 442 * mutates the table. 443 */ 444 static int set_next(PySetObject * so,Py_ssize_t * pos_ptr,setentry ** entry_ptr)445 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr) 446 { 447 Py_ssize_t i; 448 Py_ssize_t mask; 449 setentry *entry; 450 451 assert (PyAnySet_Check(so)); 452 i = *pos_ptr; 453 assert(i >= 0); 454 mask = so->mask; 455 entry = &so->table[i]; 456 while (i <= mask && (entry->key == NULL || entry->key == dummy)) { 457 i++; 458 entry++; 459 } 460 *pos_ptr = i+1; 461 if (i > mask) 462 return 0; 463 assert(entry != NULL); 464 *entry_ptr = entry; 465 return 1; 466 } 467 468 static void set_dealloc(PySetObject * so)469 set_dealloc(PySetObject *so) 470 { 471 setentry *entry; 472 Py_ssize_t used = so->used; 473 474 /* bpo-31095: UnTrack is needed before calling any callbacks */ 475 PyObject_GC_UnTrack(so); 476 Py_TRASHCAN_BEGIN(so, set_dealloc) 477 if (so->weakreflist != NULL) 478 PyObject_ClearWeakRefs((PyObject *) so); 479 480 for (entry = so->table; used > 0; entry++) { 481 if (entry->key && entry->key != dummy) { 482 used--; 483 Py_DECREF(entry->key); 484 } 485 } 486 if (so->table != so->smalltable) 487 PyMem_DEL(so->table); 488 Py_TYPE(so)->tp_free(so); 489 Py_TRASHCAN_END 490 } 491 492 static PyObject * set_repr(PySetObject * so)493 set_repr(PySetObject *so) 494 { 495 PyObject *result=NULL, *keys, *listrepr, *tmp; 496 int status = Py_ReprEnter((PyObject*)so); 497 498 if (status != 0) { 499 if (status < 0) 500 return NULL; 501 return PyUnicode_FromFormat("%s(...)", Py_TYPE(so)->tp_name); 502 } 503 504 /* shortcut for the empty set */ 505 if (!so->used) { 506 Py_ReprLeave((PyObject*)so); 507 return PyUnicode_FromFormat("%s()", Py_TYPE(so)->tp_name); 508 } 509 510 keys = PySequence_List((PyObject *)so); 511 if (keys == NULL) 512 goto done; 513 514 /* repr(keys)[1:-1] */ 515 listrepr = PyObject_Repr(keys); 516 Py_DECREF(keys); 517 if (listrepr == NULL) 518 goto done; 519 tmp = PyUnicode_Substring(listrepr, 1, PyUnicode_GET_LENGTH(listrepr)-1); 520 Py_DECREF(listrepr); 521 if (tmp == NULL) 522 goto done; 523 listrepr = tmp; 524 525 if (!Py_IS_TYPE(so, &PySet_Type)) 526 result = PyUnicode_FromFormat("%s({%U})", 527 Py_TYPE(so)->tp_name, 528 listrepr); 529 else 530 result = PyUnicode_FromFormat("{%U}", listrepr); 531 Py_DECREF(listrepr); 532 done: 533 Py_ReprLeave((PyObject*)so); 534 return result; 535 } 536 537 static Py_ssize_t set_len(PyObject * so)538 set_len(PyObject *so) 539 { 540 return ((PySetObject *)so)->used; 541 } 542 543 static int set_merge(PySetObject * so,PyObject * otherset)544 set_merge(PySetObject *so, PyObject *otherset) 545 { 546 PySetObject *other; 547 PyObject *key; 548 Py_ssize_t i; 549 setentry *so_entry; 550 setentry *other_entry; 551 552 assert (PyAnySet_Check(so)); 553 assert (PyAnySet_Check(otherset)); 554 555 other = (PySetObject*)otherset; 556 if (other == so || other->used == 0) 557 /* a.update(a) or a.update(set()); nothing to do */ 558 return 0; 559 /* Do one big resize at the start, rather than 560 * incrementally resizing as we insert new keys. Expect 561 * that there will be no (or few) overlapping keys. 562 */ 563 if ((so->fill + other->used)*5 >= so->mask*3) { 564 if (set_table_resize(so, (so->used + other->used)*2) != 0) 565 return -1; 566 } 567 so_entry = so->table; 568 other_entry = other->table; 569 570 /* If our table is empty, and both tables have the same size, and 571 there are no dummies to eliminate, then just copy the pointers. */ 572 if (so->fill == 0 && so->mask == other->mask && other->fill == other->used) { 573 for (i = 0; i <= other->mask; i++, so_entry++, other_entry++) { 574 key = other_entry->key; 575 if (key != NULL) { 576 assert(so_entry->key == NULL); 577 Py_INCREF(key); 578 so_entry->key = key; 579 so_entry->hash = other_entry->hash; 580 } 581 } 582 so->fill = other->fill; 583 so->used = other->used; 584 return 0; 585 } 586 587 /* If our table is empty, we can use set_insert_clean() */ 588 if (so->fill == 0) { 589 setentry *newtable = so->table; 590 size_t newmask = (size_t)so->mask; 591 so->fill = other->used; 592 so->used = other->used; 593 for (i = other->mask + 1; i > 0 ; i--, other_entry++) { 594 key = other_entry->key; 595 if (key != NULL && key != dummy) { 596 Py_INCREF(key); 597 set_insert_clean(newtable, newmask, key, other_entry->hash); 598 } 599 } 600 return 0; 601 } 602 603 /* We can't assure there are no duplicates, so do normal insertions */ 604 for (i = 0; i <= other->mask; i++) { 605 other_entry = &other->table[i]; 606 key = other_entry->key; 607 if (key != NULL && key != dummy) { 608 if (set_add_entry(so, key, other_entry->hash)) 609 return -1; 610 } 611 } 612 return 0; 613 } 614 615 static PyObject * set_pop(PySetObject * so,PyObject * Py_UNUSED (ignored))616 set_pop(PySetObject *so, PyObject *Py_UNUSED(ignored)) 617 { 618 /* Make sure the search finger is in bounds */ 619 setentry *entry = so->table + (so->finger & so->mask); 620 setentry *limit = so->table + so->mask; 621 PyObject *key; 622 623 if (so->used == 0) { 624 PyErr_SetString(PyExc_KeyError, "pop from an empty set"); 625 return NULL; 626 } 627 while (entry->key == NULL || entry->key==dummy) { 628 entry++; 629 if (entry > limit) 630 entry = so->table; 631 } 632 key = entry->key; 633 entry->key = dummy; 634 entry->hash = -1; 635 so->used--; 636 so->finger = entry - so->table + 1; /* next place to start */ 637 return key; 638 } 639 640 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.\n\ 641 Raises KeyError if the set is empty."); 642 643 static int set_traverse(PySetObject * so,visitproc visit,void * arg)644 set_traverse(PySetObject *so, visitproc visit, void *arg) 645 { 646 Py_ssize_t pos = 0; 647 setentry *entry; 648 649 while (set_next(so, &pos, &entry)) 650 Py_VISIT(entry->key); 651 return 0; 652 } 653 654 /* Work to increase the bit dispersion for closely spaced hash values. 655 This is important because some use cases have many combinations of a 656 small number of elements with nearby hashes so that many distinct 657 combinations collapse to only a handful of distinct hash values. */ 658 659 static Py_uhash_t _shuffle_bits(Py_uhash_t h)660 _shuffle_bits(Py_uhash_t h) 661 { 662 return ((h ^ 89869747UL) ^ (h << 16)) * 3644798167UL; 663 } 664 665 /* Most of the constants in this hash algorithm are randomly chosen 666 large primes with "interesting bit patterns" and that passed tests 667 for good collision statistics on a variety of problematic datasets 668 including powersets and graph structures (such as David Eppstein's 669 graph recipes in Lib/test/test_set.py) */ 670 671 static Py_hash_t frozenset_hash(PyObject * self)672 frozenset_hash(PyObject *self) 673 { 674 PySetObject *so = (PySetObject *)self; 675 Py_uhash_t hash = 0; 676 setentry *entry; 677 678 if (so->hash != -1) 679 return so->hash; 680 681 /* Xor-in shuffled bits from every entry's hash field because xor is 682 commutative and a frozenset hash should be independent of order. 683 684 For speed, include null entries and dummy entries and then 685 subtract out their effect afterwards so that the final hash 686 depends only on active entries. This allows the code to be 687 vectorized by the compiler and it saves the unpredictable 688 branches that would arise when trying to exclude null and dummy 689 entries on every iteration. */ 690 691 for (entry = so->table; entry <= &so->table[so->mask]; entry++) 692 hash ^= _shuffle_bits(entry->hash); 693 694 /* Remove the effect of an odd number of NULL entries */ 695 if ((so->mask + 1 - so->fill) & 1) 696 hash ^= _shuffle_bits(0); 697 698 /* Remove the effect of an odd number of dummy entries */ 699 if ((so->fill - so->used) & 1) 700 hash ^= _shuffle_bits(-1); 701 702 /* Factor in the number of active entries */ 703 hash ^= ((Py_uhash_t)PySet_GET_SIZE(self) + 1) * 1927868237UL; 704 705 /* Disperse patterns arising in nested frozensets */ 706 hash ^= (hash >> 11) ^ (hash >> 25); 707 hash = hash * 69069U + 907133923UL; 708 709 /* -1 is reserved as an error code */ 710 if (hash == (Py_uhash_t)-1) 711 hash = 590923713UL; 712 713 so->hash = hash; 714 return hash; 715 } 716 717 /***** Set iterator type ***********************************************/ 718 719 typedef struct { 720 PyObject_HEAD 721 PySetObject *si_set; /* Set to NULL when iterator is exhausted */ 722 Py_ssize_t si_used; 723 Py_ssize_t si_pos; 724 Py_ssize_t len; 725 } setiterobject; 726 727 static void setiter_dealloc(setiterobject * si)728 setiter_dealloc(setiterobject *si) 729 { 730 /* bpo-31095: UnTrack is needed before calling any callbacks */ 731 _PyObject_GC_UNTRACK(si); 732 Py_XDECREF(si->si_set); 733 PyObject_GC_Del(si); 734 } 735 736 static int setiter_traverse(setiterobject * si,visitproc visit,void * arg)737 setiter_traverse(setiterobject *si, visitproc visit, void *arg) 738 { 739 Py_VISIT(si->si_set); 740 return 0; 741 } 742 743 static PyObject * setiter_len(setiterobject * si,PyObject * Py_UNUSED (ignored))744 setiter_len(setiterobject *si, PyObject *Py_UNUSED(ignored)) 745 { 746 Py_ssize_t len = 0; 747 if (si->si_set != NULL && si->si_used == si->si_set->used) 748 len = si->len; 749 return PyLong_FromSsize_t(len); 750 } 751 752 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); 753 754 static PyObject *setiter_iternext(setiterobject *si); 755 756 static PyObject * setiter_reduce(setiterobject * si,PyObject * Py_UNUSED (ignored))757 setiter_reduce(setiterobject *si, PyObject *Py_UNUSED(ignored)) 758 { 759 _Py_IDENTIFIER(iter); 760 /* copy the iterator state */ 761 setiterobject tmp = *si; 762 Py_XINCREF(tmp.si_set); 763 764 /* iterate the temporary into a list */ 765 PyObject *list = PySequence_List((PyObject*)&tmp); 766 Py_XDECREF(tmp.si_set); 767 if (list == NULL) { 768 return NULL; 769 } 770 return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), list); 771 } 772 773 PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); 774 775 static PyMethodDef setiter_methods[] = { 776 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc}, 777 {"__reduce__", (PyCFunction)setiter_reduce, METH_NOARGS, reduce_doc}, 778 {NULL, NULL} /* sentinel */ 779 }; 780 setiter_iternext(setiterobject * si)781 static PyObject *setiter_iternext(setiterobject *si) 782 { 783 PyObject *key; 784 Py_ssize_t i, mask; 785 setentry *entry; 786 PySetObject *so = si->si_set; 787 788 if (so == NULL) 789 return NULL; 790 assert (PyAnySet_Check(so)); 791 792 if (si->si_used != so->used) { 793 PyErr_SetString(PyExc_RuntimeError, 794 "Set changed size during iteration"); 795 si->si_used = -1; /* Make this state sticky */ 796 return NULL; 797 } 798 799 i = si->si_pos; 800 assert(i>=0); 801 entry = so->table; 802 mask = so->mask; 803 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy)) 804 i++; 805 si->si_pos = i+1; 806 if (i > mask) 807 goto fail; 808 si->len--; 809 key = entry[i].key; 810 Py_INCREF(key); 811 return key; 812 813 fail: 814 si->si_set = NULL; 815 Py_DECREF(so); 816 return NULL; 817 } 818 819 PyTypeObject PySetIter_Type = { 820 PyVarObject_HEAD_INIT(&PyType_Type, 0) 821 "set_iterator", /* tp_name */ 822 sizeof(setiterobject), /* tp_basicsize */ 823 0, /* tp_itemsize */ 824 /* methods */ 825 (destructor)setiter_dealloc, /* tp_dealloc */ 826 0, /* tp_vectorcall_offset */ 827 0, /* tp_getattr */ 828 0, /* tp_setattr */ 829 0, /* tp_as_async */ 830 0, /* tp_repr */ 831 0, /* tp_as_number */ 832 0, /* tp_as_sequence */ 833 0, /* tp_as_mapping */ 834 0, /* tp_hash */ 835 0, /* tp_call */ 836 0, /* tp_str */ 837 PyObject_GenericGetAttr, /* tp_getattro */ 838 0, /* tp_setattro */ 839 0, /* tp_as_buffer */ 840 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 841 0, /* tp_doc */ 842 (traverseproc)setiter_traverse, /* tp_traverse */ 843 0, /* tp_clear */ 844 0, /* tp_richcompare */ 845 0, /* tp_weaklistoffset */ 846 PyObject_SelfIter, /* tp_iter */ 847 (iternextfunc)setiter_iternext, /* tp_iternext */ 848 setiter_methods, /* tp_methods */ 849 0, 850 }; 851 852 static PyObject * set_iter(PySetObject * so)853 set_iter(PySetObject *so) 854 { 855 setiterobject *si = PyObject_GC_New(setiterobject, &PySetIter_Type); 856 if (si == NULL) 857 return NULL; 858 Py_INCREF(so); 859 si->si_set = so; 860 si->si_used = so->used; 861 si->si_pos = 0; 862 si->len = so->used; 863 _PyObject_GC_TRACK(si); 864 return (PyObject *)si; 865 } 866 867 static int set_update_internal(PySetObject * so,PyObject * other)868 set_update_internal(PySetObject *so, PyObject *other) 869 { 870 PyObject *key, *it; 871 872 if (PyAnySet_Check(other)) 873 return set_merge(so, other); 874 875 if (PyDict_CheckExact(other)) { 876 PyObject *value; 877 Py_ssize_t pos = 0; 878 Py_hash_t hash; 879 Py_ssize_t dictsize = PyDict_GET_SIZE(other); 880 881 /* Do one big resize at the start, rather than 882 * incrementally resizing as we insert new keys. Expect 883 * that there will be no (or few) overlapping keys. 884 */ 885 if (dictsize < 0) 886 return -1; 887 if ((so->fill + dictsize)*5 >= so->mask*3) { 888 if (set_table_resize(so, (so->used + dictsize)*2) != 0) 889 return -1; 890 } 891 while (_PyDict_Next(other, &pos, &key, &value, &hash)) { 892 if (set_add_entry(so, key, hash)) 893 return -1; 894 } 895 return 0; 896 } 897 898 it = PyObject_GetIter(other); 899 if (it == NULL) 900 return -1; 901 902 while ((key = PyIter_Next(it)) != NULL) { 903 if (set_add_key(so, key)) { 904 Py_DECREF(it); 905 Py_DECREF(key); 906 return -1; 907 } 908 Py_DECREF(key); 909 } 910 Py_DECREF(it); 911 if (PyErr_Occurred()) 912 return -1; 913 return 0; 914 } 915 916 static PyObject * set_update(PySetObject * so,PyObject * args)917 set_update(PySetObject *so, PyObject *args) 918 { 919 Py_ssize_t i; 920 921 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) { 922 PyObject *other = PyTuple_GET_ITEM(args, i); 923 if (set_update_internal(so, other)) 924 return NULL; 925 } 926 Py_RETURN_NONE; 927 } 928 929 PyDoc_STRVAR(update_doc, 930 "Update a set with the union of itself and others."); 931 932 /* XXX Todo: 933 If aligned memory allocations become available, make the 934 set object 64 byte aligned so that most of the fields 935 can be retrieved or updated in a single cache line. 936 */ 937 938 static PyObject * make_new_set(PyTypeObject * type,PyObject * iterable)939 make_new_set(PyTypeObject *type, PyObject *iterable) 940 { 941 assert(PyType_Check(type)); 942 PySetObject *so; 943 944 so = (PySetObject *)type->tp_alloc(type, 0); 945 if (so == NULL) 946 return NULL; 947 948 so->fill = 0; 949 so->used = 0; 950 so->mask = PySet_MINSIZE - 1; 951 so->table = so->smalltable; 952 so->hash = -1; 953 so->finger = 0; 954 so->weakreflist = NULL; 955 956 if (iterable != NULL) { 957 if (set_update_internal(so, iterable)) { 958 Py_DECREF(so); 959 return NULL; 960 } 961 } 962 963 return (PyObject *)so; 964 } 965 966 static PyObject * make_new_set_basetype(PyTypeObject * type,PyObject * iterable)967 make_new_set_basetype(PyTypeObject *type, PyObject *iterable) 968 { 969 if (type != &PySet_Type && type != &PyFrozenSet_Type) { 970 if (PyType_IsSubtype(type, &PySet_Type)) 971 type = &PySet_Type; 972 else 973 type = &PyFrozenSet_Type; 974 } 975 return make_new_set(type, iterable); 976 } 977 978 /* The empty frozenset is a singleton */ 979 static PyObject *emptyfrozenset = NULL; 980 981 static PyObject * make_new_frozenset(PyTypeObject * type,PyObject * iterable)982 make_new_frozenset(PyTypeObject *type, PyObject *iterable) 983 { 984 if (type != &PyFrozenSet_Type) { 985 return make_new_set(type, iterable); 986 } 987 988 if (iterable != NULL) { 989 if (PyFrozenSet_CheckExact(iterable)) { 990 /* frozenset(f) is idempotent */ 991 Py_INCREF(iterable); 992 return iterable; 993 } 994 PyObject *res = make_new_set((PyTypeObject *)type, iterable); 995 if (res == NULL || PySet_GET_SIZE(res) != 0) { 996 return res; 997 } 998 /* If the created frozenset is empty, return the empty frozenset singleton instead */ 999 Py_DECREF(res); 1000 } 1001 1002 // The empty frozenset is a singleton 1003 if (emptyfrozenset == NULL) { 1004 emptyfrozenset = make_new_set((PyTypeObject *)type, NULL); 1005 } 1006 Py_XINCREF(emptyfrozenset); 1007 return emptyfrozenset; 1008 } 1009 1010 static PyObject * frozenset_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1011 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1012 { 1013 PyObject *iterable = NULL; 1014 1015 if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset", kwds)) { 1016 return NULL; 1017 } 1018 1019 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable)) { 1020 return NULL; 1021 } 1022 1023 return make_new_frozenset(type, iterable); 1024 } 1025 1026 static PyObject * frozenset_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1027 frozenset_vectorcall(PyObject *type, PyObject * const*args, 1028 size_t nargsf, PyObject *kwnames) 1029 { 1030 if (!_PyArg_NoKwnames("frozenset", kwnames)) { 1031 return NULL; 1032 } 1033 1034 Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); 1035 if (!_PyArg_CheckPositional("frozenset", nargs, 0, 1)) { 1036 return NULL; 1037 } 1038 1039 PyObject *iterable = (nargs ? args[0] : NULL); 1040 return make_new_frozenset((PyTypeObject *)type, iterable); 1041 } 1042 1043 static PyObject * set_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1044 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1045 { 1046 return make_new_set(type, NULL); 1047 } 1048 1049 /* set_swap_bodies() switches the contents of any two sets by moving their 1050 internal data pointers and, if needed, copying the internal smalltables. 1051 Semantically equivalent to: 1052 1053 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t 1054 1055 The function always succeeds and it leaves both objects in a stable state. 1056 Useful for operations that update in-place (by allowing an intermediate 1057 result to be swapped into one of the original inputs). 1058 */ 1059 1060 static void set_swap_bodies(PySetObject * a,PySetObject * b)1061 set_swap_bodies(PySetObject *a, PySetObject *b) 1062 { 1063 Py_ssize_t t; 1064 setentry *u; 1065 setentry tab[PySet_MINSIZE]; 1066 Py_hash_t h; 1067 1068 t = a->fill; a->fill = b->fill; b->fill = t; 1069 t = a->used; a->used = b->used; b->used = t; 1070 t = a->mask; a->mask = b->mask; b->mask = t; 1071 1072 u = a->table; 1073 if (a->table == a->smalltable) 1074 u = b->smalltable; 1075 a->table = b->table; 1076 if (b->table == b->smalltable) 1077 a->table = a->smalltable; 1078 b->table = u; 1079 1080 if (a->table == a->smalltable || b->table == b->smalltable) { 1081 memcpy(tab, a->smalltable, sizeof(tab)); 1082 memcpy(a->smalltable, b->smalltable, sizeof(tab)); 1083 memcpy(b->smalltable, tab, sizeof(tab)); 1084 } 1085 1086 if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) && 1087 PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) { 1088 h = a->hash; a->hash = b->hash; b->hash = h; 1089 } else { 1090 a->hash = -1; 1091 b->hash = -1; 1092 } 1093 } 1094 1095 static PyObject * set_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1096 set_copy(PySetObject *so, PyObject *Py_UNUSED(ignored)) 1097 { 1098 return make_new_set_basetype(Py_TYPE(so), (PyObject *)so); 1099 } 1100 1101 static PyObject * frozenset_copy(PySetObject * so,PyObject * Py_UNUSED (ignored))1102 frozenset_copy(PySetObject *so, PyObject *Py_UNUSED(ignored)) 1103 { 1104 if (PyFrozenSet_CheckExact(so)) { 1105 Py_INCREF(so); 1106 return (PyObject *)so; 1107 } 1108 return set_copy(so, NULL); 1109 } 1110 1111 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set."); 1112 1113 static PyObject * set_clear(PySetObject * so,PyObject * Py_UNUSED (ignored))1114 set_clear(PySetObject *so, PyObject *Py_UNUSED(ignored)) 1115 { 1116 set_clear_internal(so); 1117 Py_RETURN_NONE; 1118 } 1119 1120 PyDoc_STRVAR(clear_doc, "Remove all elements from this set."); 1121 1122 static PyObject * set_union(PySetObject * so,PyObject * args)1123 set_union(PySetObject *so, PyObject *args) 1124 { 1125 PySetObject *result; 1126 PyObject *other; 1127 Py_ssize_t i; 1128 1129 result = (PySetObject *)set_copy(so, NULL); 1130 if (result == NULL) 1131 return NULL; 1132 1133 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) { 1134 other = PyTuple_GET_ITEM(args, i); 1135 if ((PyObject *)so == other) 1136 continue; 1137 if (set_update_internal(result, other)) { 1138 Py_DECREF(result); 1139 return NULL; 1140 } 1141 } 1142 return (PyObject *)result; 1143 } 1144 1145 PyDoc_STRVAR(union_doc, 1146 "Return the union of sets as a new set.\n\ 1147 \n\ 1148 (i.e. all elements that are in either set.)"); 1149 1150 static PyObject * set_or(PySetObject * so,PyObject * other)1151 set_or(PySetObject *so, PyObject *other) 1152 { 1153 PySetObject *result; 1154 1155 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) 1156 Py_RETURN_NOTIMPLEMENTED; 1157 1158 result = (PySetObject *)set_copy(so, NULL); 1159 if (result == NULL) 1160 return NULL; 1161 if ((PyObject *)so == other) 1162 return (PyObject *)result; 1163 if (set_update_internal(result, other)) { 1164 Py_DECREF(result); 1165 return NULL; 1166 } 1167 return (PyObject *)result; 1168 } 1169 1170 static PyObject * set_ior(PySetObject * so,PyObject * other)1171 set_ior(PySetObject *so, PyObject *other) 1172 { 1173 if (!PyAnySet_Check(other)) 1174 Py_RETURN_NOTIMPLEMENTED; 1175 1176 if (set_update_internal(so, other)) 1177 return NULL; 1178 Py_INCREF(so); 1179 return (PyObject *)so; 1180 } 1181 1182 static PyObject * set_intersection(PySetObject * so,PyObject * other)1183 set_intersection(PySetObject *so, PyObject *other) 1184 { 1185 PySetObject *result; 1186 PyObject *key, *it, *tmp; 1187 Py_hash_t hash; 1188 int rv; 1189 1190 if ((PyObject *)so == other) 1191 return set_copy(so, NULL); 1192 1193 result = (PySetObject *)make_new_set_basetype(Py_TYPE(so), NULL); 1194 if (result == NULL) 1195 return NULL; 1196 1197 if (PyAnySet_Check(other)) { 1198 Py_ssize_t pos = 0; 1199 setentry *entry; 1200 1201 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) { 1202 tmp = (PyObject *)so; 1203 so = (PySetObject *)other; 1204 other = tmp; 1205 } 1206 1207 while (set_next((PySetObject *)other, &pos, &entry)) { 1208 key = entry->key; 1209 hash = entry->hash; 1210 rv = set_contains_entry(so, key, hash); 1211 if (rv < 0) { 1212 Py_DECREF(result); 1213 return NULL; 1214 } 1215 if (rv) { 1216 if (set_add_entry(result, key, hash)) { 1217 Py_DECREF(result); 1218 return NULL; 1219 } 1220 } 1221 } 1222 return (PyObject *)result; 1223 } 1224 1225 it = PyObject_GetIter(other); 1226 if (it == NULL) { 1227 Py_DECREF(result); 1228 return NULL; 1229 } 1230 1231 while ((key = PyIter_Next(it)) != NULL) { 1232 hash = PyObject_Hash(key); 1233 if (hash == -1) 1234 goto error; 1235 rv = set_contains_entry(so, key, hash); 1236 if (rv < 0) 1237 goto error; 1238 if (rv) { 1239 if (set_add_entry(result, key, hash)) 1240 goto error; 1241 } 1242 Py_DECREF(key); 1243 } 1244 Py_DECREF(it); 1245 if (PyErr_Occurred()) { 1246 Py_DECREF(result); 1247 return NULL; 1248 } 1249 return (PyObject *)result; 1250 error: 1251 Py_DECREF(it); 1252 Py_DECREF(result); 1253 Py_DECREF(key); 1254 return NULL; 1255 } 1256 1257 static PyObject * set_intersection_multi(PySetObject * so,PyObject * args)1258 set_intersection_multi(PySetObject *so, PyObject *args) 1259 { 1260 Py_ssize_t i; 1261 PyObject *result = (PyObject *)so; 1262 1263 if (PyTuple_GET_SIZE(args) == 0) 1264 return set_copy(so, NULL); 1265 1266 Py_INCREF(so); 1267 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) { 1268 PyObject *other = PyTuple_GET_ITEM(args, i); 1269 PyObject *newresult = set_intersection((PySetObject *)result, other); 1270 if (newresult == NULL) { 1271 Py_DECREF(result); 1272 return NULL; 1273 } 1274 Py_DECREF(result); 1275 result = newresult; 1276 } 1277 return result; 1278 } 1279 1280 PyDoc_STRVAR(intersection_doc, 1281 "Return the intersection of two sets as a new set.\n\ 1282 \n\ 1283 (i.e. all elements that are in both sets.)"); 1284 1285 static PyObject * set_intersection_update(PySetObject * so,PyObject * other)1286 set_intersection_update(PySetObject *so, PyObject *other) 1287 { 1288 PyObject *tmp; 1289 1290 tmp = set_intersection(so, other); 1291 if (tmp == NULL) 1292 return NULL; 1293 set_swap_bodies(so, (PySetObject *)tmp); 1294 Py_DECREF(tmp); 1295 Py_RETURN_NONE; 1296 } 1297 1298 static PyObject * set_intersection_update_multi(PySetObject * so,PyObject * args)1299 set_intersection_update_multi(PySetObject *so, PyObject *args) 1300 { 1301 PyObject *tmp; 1302 1303 tmp = set_intersection_multi(so, args); 1304 if (tmp == NULL) 1305 return NULL; 1306 set_swap_bodies(so, (PySetObject *)tmp); 1307 Py_DECREF(tmp); 1308 Py_RETURN_NONE; 1309 } 1310 1311 PyDoc_STRVAR(intersection_update_doc, 1312 "Update a set with the intersection of itself and another."); 1313 1314 static PyObject * set_and(PySetObject * so,PyObject * other)1315 set_and(PySetObject *so, PyObject *other) 1316 { 1317 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) 1318 Py_RETURN_NOTIMPLEMENTED; 1319 return set_intersection(so, other); 1320 } 1321 1322 static PyObject * set_iand(PySetObject * so,PyObject * other)1323 set_iand(PySetObject *so, PyObject *other) 1324 { 1325 PyObject *result; 1326 1327 if (!PyAnySet_Check(other)) 1328 Py_RETURN_NOTIMPLEMENTED; 1329 result = set_intersection_update(so, other); 1330 if (result == NULL) 1331 return NULL; 1332 Py_DECREF(result); 1333 Py_INCREF(so); 1334 return (PyObject *)so; 1335 } 1336 1337 static PyObject * set_isdisjoint(PySetObject * so,PyObject * other)1338 set_isdisjoint(PySetObject *so, PyObject *other) 1339 { 1340 PyObject *key, *it, *tmp; 1341 int rv; 1342 1343 if ((PyObject *)so == other) { 1344 if (PySet_GET_SIZE(so) == 0) 1345 Py_RETURN_TRUE; 1346 else 1347 Py_RETURN_FALSE; 1348 } 1349 1350 if (PyAnySet_CheckExact(other)) { 1351 Py_ssize_t pos = 0; 1352 setentry *entry; 1353 1354 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) { 1355 tmp = (PyObject *)so; 1356 so = (PySetObject *)other; 1357 other = tmp; 1358 } 1359 while (set_next((PySetObject *)other, &pos, &entry)) { 1360 rv = set_contains_entry(so, entry->key, entry->hash); 1361 if (rv < 0) 1362 return NULL; 1363 if (rv) 1364 Py_RETURN_FALSE; 1365 } 1366 Py_RETURN_TRUE; 1367 } 1368 1369 it = PyObject_GetIter(other); 1370 if (it == NULL) 1371 return NULL; 1372 1373 while ((key = PyIter_Next(it)) != NULL) { 1374 Py_hash_t hash = PyObject_Hash(key); 1375 1376 if (hash == -1) { 1377 Py_DECREF(key); 1378 Py_DECREF(it); 1379 return NULL; 1380 } 1381 rv = set_contains_entry(so, key, hash); 1382 Py_DECREF(key); 1383 if (rv < 0) { 1384 Py_DECREF(it); 1385 return NULL; 1386 } 1387 if (rv) { 1388 Py_DECREF(it); 1389 Py_RETURN_FALSE; 1390 } 1391 } 1392 Py_DECREF(it); 1393 if (PyErr_Occurred()) 1394 return NULL; 1395 Py_RETURN_TRUE; 1396 } 1397 1398 PyDoc_STRVAR(isdisjoint_doc, 1399 "Return True if two sets have a null intersection."); 1400 1401 static int set_difference_update_internal(PySetObject * so,PyObject * other)1402 set_difference_update_internal(PySetObject *so, PyObject *other) 1403 { 1404 if ((PyObject *)so == other) 1405 return set_clear_internal(so); 1406 1407 if (PyAnySet_Check(other)) { 1408 setentry *entry; 1409 Py_ssize_t pos = 0; 1410 1411 /* Optimization: When the other set is more than 8 times 1412 larger than the base set, replace the other set with 1413 interesection of the two sets. 1414 */ 1415 if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) { 1416 other = set_intersection(so, other); 1417 if (other == NULL) 1418 return -1; 1419 } else { 1420 Py_INCREF(other); 1421 } 1422 1423 while (set_next((PySetObject *)other, &pos, &entry)) 1424 if (set_discard_entry(so, entry->key, entry->hash) < 0) { 1425 Py_DECREF(other); 1426 return -1; 1427 } 1428 1429 Py_DECREF(other); 1430 } else { 1431 PyObject *key, *it; 1432 it = PyObject_GetIter(other); 1433 if (it == NULL) 1434 return -1; 1435 1436 while ((key = PyIter_Next(it)) != NULL) { 1437 if (set_discard_key(so, key) < 0) { 1438 Py_DECREF(it); 1439 Py_DECREF(key); 1440 return -1; 1441 } 1442 Py_DECREF(key); 1443 } 1444 Py_DECREF(it); 1445 if (PyErr_Occurred()) 1446 return -1; 1447 } 1448 /* If more than 1/4th are dummies, then resize them away. */ 1449 if ((size_t)(so->fill - so->used) <= (size_t)so->mask / 4) 1450 return 0; 1451 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4); 1452 } 1453 1454 static PyObject * set_difference_update(PySetObject * so,PyObject * args)1455 set_difference_update(PySetObject *so, PyObject *args) 1456 { 1457 Py_ssize_t i; 1458 1459 for (i=0 ; i<PyTuple_GET_SIZE(args) ; i++) { 1460 PyObject *other = PyTuple_GET_ITEM(args, i); 1461 if (set_difference_update_internal(so, other)) 1462 return NULL; 1463 } 1464 Py_RETURN_NONE; 1465 } 1466 1467 PyDoc_STRVAR(difference_update_doc, 1468 "Remove all elements of another set from this set."); 1469 1470 static PyObject * set_copy_and_difference(PySetObject * so,PyObject * other)1471 set_copy_and_difference(PySetObject *so, PyObject *other) 1472 { 1473 PyObject *result; 1474 1475 result = set_copy(so, NULL); 1476 if (result == NULL) 1477 return NULL; 1478 if (set_difference_update_internal((PySetObject *) result, other) == 0) 1479 return result; 1480 Py_DECREF(result); 1481 return NULL; 1482 } 1483 1484 static PyObject * set_difference(PySetObject * so,PyObject * other)1485 set_difference(PySetObject *so, PyObject *other) 1486 { 1487 PyObject *result; 1488 PyObject *key; 1489 Py_hash_t hash; 1490 setentry *entry; 1491 Py_ssize_t pos = 0, other_size; 1492 int rv; 1493 1494 if (PyAnySet_Check(other)) { 1495 other_size = PySet_GET_SIZE(other); 1496 } 1497 else if (PyDict_CheckExact(other)) { 1498 other_size = PyDict_GET_SIZE(other); 1499 } 1500 else { 1501 return set_copy_and_difference(so, other); 1502 } 1503 1504 /* If len(so) much more than len(other), it's more efficient to simply copy 1505 * so and then iterate other looking for common elements. */ 1506 if ((PySet_GET_SIZE(so) >> 2) > other_size) { 1507 return set_copy_and_difference(so, other); 1508 } 1509 1510 result = make_new_set_basetype(Py_TYPE(so), NULL); 1511 if (result == NULL) 1512 return NULL; 1513 1514 if (PyDict_CheckExact(other)) { 1515 while (set_next(so, &pos, &entry)) { 1516 key = entry->key; 1517 hash = entry->hash; 1518 rv = _PyDict_Contains(other, key, hash); 1519 if (rv < 0) { 1520 Py_DECREF(result); 1521 return NULL; 1522 } 1523 if (!rv) { 1524 if (set_add_entry((PySetObject *)result, key, hash)) { 1525 Py_DECREF(result); 1526 return NULL; 1527 } 1528 } 1529 } 1530 return result; 1531 } 1532 1533 /* Iterate over so, checking for common elements in other. */ 1534 while (set_next(so, &pos, &entry)) { 1535 key = entry->key; 1536 hash = entry->hash; 1537 rv = set_contains_entry((PySetObject *)other, key, hash); 1538 if (rv < 0) { 1539 Py_DECREF(result); 1540 return NULL; 1541 } 1542 if (!rv) { 1543 if (set_add_entry((PySetObject *)result, key, hash)) { 1544 Py_DECREF(result); 1545 return NULL; 1546 } 1547 } 1548 } 1549 return result; 1550 } 1551 1552 static PyObject * set_difference_multi(PySetObject * so,PyObject * args)1553 set_difference_multi(PySetObject *so, PyObject *args) 1554 { 1555 Py_ssize_t i; 1556 PyObject *result, *other; 1557 1558 if (PyTuple_GET_SIZE(args) == 0) 1559 return set_copy(so, NULL); 1560 1561 other = PyTuple_GET_ITEM(args, 0); 1562 result = set_difference(so, other); 1563 if (result == NULL) 1564 return NULL; 1565 1566 for (i=1 ; i<PyTuple_GET_SIZE(args) ; i++) { 1567 other = PyTuple_GET_ITEM(args, i); 1568 if (set_difference_update_internal((PySetObject *)result, other)) { 1569 Py_DECREF(result); 1570 return NULL; 1571 } 1572 } 1573 return result; 1574 } 1575 1576 PyDoc_STRVAR(difference_doc, 1577 "Return the difference of two or more sets as a new set.\n\ 1578 \n\ 1579 (i.e. all elements that are in this set but not the others.)"); 1580 static PyObject * set_sub(PySetObject * so,PyObject * other)1581 set_sub(PySetObject *so, PyObject *other) 1582 { 1583 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) 1584 Py_RETURN_NOTIMPLEMENTED; 1585 return set_difference(so, other); 1586 } 1587 1588 static PyObject * set_isub(PySetObject * so,PyObject * other)1589 set_isub(PySetObject *so, PyObject *other) 1590 { 1591 if (!PyAnySet_Check(other)) 1592 Py_RETURN_NOTIMPLEMENTED; 1593 if (set_difference_update_internal(so, other)) 1594 return NULL; 1595 Py_INCREF(so); 1596 return (PyObject *)so; 1597 } 1598 1599 static PyObject * set_symmetric_difference_update(PySetObject * so,PyObject * other)1600 set_symmetric_difference_update(PySetObject *so, PyObject *other) 1601 { 1602 PySetObject *otherset; 1603 PyObject *key; 1604 Py_ssize_t pos = 0; 1605 Py_hash_t hash; 1606 setentry *entry; 1607 int rv; 1608 1609 if ((PyObject *)so == other) 1610 return set_clear(so, NULL); 1611 1612 if (PyDict_CheckExact(other)) { 1613 PyObject *value; 1614 while (_PyDict_Next(other, &pos, &key, &value, &hash)) { 1615 Py_INCREF(key); 1616 rv = set_discard_entry(so, key, hash); 1617 if (rv < 0) { 1618 Py_DECREF(key); 1619 return NULL; 1620 } 1621 if (rv == DISCARD_NOTFOUND) { 1622 if (set_add_entry(so, key, hash)) { 1623 Py_DECREF(key); 1624 return NULL; 1625 } 1626 } 1627 Py_DECREF(key); 1628 } 1629 Py_RETURN_NONE; 1630 } 1631 1632 if (PyAnySet_Check(other)) { 1633 Py_INCREF(other); 1634 otherset = (PySetObject *)other; 1635 } else { 1636 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other); 1637 if (otherset == NULL) 1638 return NULL; 1639 } 1640 1641 while (set_next(otherset, &pos, &entry)) { 1642 key = entry->key; 1643 hash = entry->hash; 1644 rv = set_discard_entry(so, key, hash); 1645 if (rv < 0) { 1646 Py_DECREF(otherset); 1647 return NULL; 1648 } 1649 if (rv == DISCARD_NOTFOUND) { 1650 if (set_add_entry(so, key, hash)) { 1651 Py_DECREF(otherset); 1652 return NULL; 1653 } 1654 } 1655 } 1656 Py_DECREF(otherset); 1657 Py_RETURN_NONE; 1658 } 1659 1660 PyDoc_STRVAR(symmetric_difference_update_doc, 1661 "Update a set with the symmetric difference of itself and another."); 1662 1663 static PyObject * set_symmetric_difference(PySetObject * so,PyObject * other)1664 set_symmetric_difference(PySetObject *so, PyObject *other) 1665 { 1666 PyObject *rv; 1667 PySetObject *otherset; 1668 1669 otherset = (PySetObject *)make_new_set_basetype(Py_TYPE(so), other); 1670 if (otherset == NULL) 1671 return NULL; 1672 rv = set_symmetric_difference_update(otherset, (PyObject *)so); 1673 if (rv == NULL) { 1674 Py_DECREF(otherset); 1675 return NULL; 1676 } 1677 Py_DECREF(rv); 1678 return (PyObject *)otherset; 1679 } 1680 1681 PyDoc_STRVAR(symmetric_difference_doc, 1682 "Return the symmetric difference of two sets as a new set.\n\ 1683 \n\ 1684 (i.e. all elements that are in exactly one of the sets.)"); 1685 1686 static PyObject * set_xor(PySetObject * so,PyObject * other)1687 set_xor(PySetObject *so, PyObject *other) 1688 { 1689 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) 1690 Py_RETURN_NOTIMPLEMENTED; 1691 return set_symmetric_difference(so, other); 1692 } 1693 1694 static PyObject * set_ixor(PySetObject * so,PyObject * other)1695 set_ixor(PySetObject *so, PyObject *other) 1696 { 1697 PyObject *result; 1698 1699 if (!PyAnySet_Check(other)) 1700 Py_RETURN_NOTIMPLEMENTED; 1701 result = set_symmetric_difference_update(so, other); 1702 if (result == NULL) 1703 return NULL; 1704 Py_DECREF(result); 1705 Py_INCREF(so); 1706 return (PyObject *)so; 1707 } 1708 1709 static PyObject * set_issubset(PySetObject * so,PyObject * other)1710 set_issubset(PySetObject *so, PyObject *other) 1711 { 1712 setentry *entry; 1713 Py_ssize_t pos = 0; 1714 int rv; 1715 1716 if (!PyAnySet_Check(other)) { 1717 PyObject *tmp, *result; 1718 tmp = make_new_set(&PySet_Type, other); 1719 if (tmp == NULL) 1720 return NULL; 1721 result = set_issubset(so, tmp); 1722 Py_DECREF(tmp); 1723 return result; 1724 } 1725 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other)) 1726 Py_RETURN_FALSE; 1727 1728 while (set_next(so, &pos, &entry)) { 1729 rv = set_contains_entry((PySetObject *)other, entry->key, entry->hash); 1730 if (rv < 0) 1731 return NULL; 1732 if (!rv) 1733 Py_RETURN_FALSE; 1734 } 1735 Py_RETURN_TRUE; 1736 } 1737 1738 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set."); 1739 1740 static PyObject * set_issuperset(PySetObject * so,PyObject * other)1741 set_issuperset(PySetObject *so, PyObject *other) 1742 { 1743 PyObject *tmp, *result; 1744 1745 if (!PyAnySet_Check(other)) { 1746 tmp = make_new_set(&PySet_Type, other); 1747 if (tmp == NULL) 1748 return NULL; 1749 result = set_issuperset(so, tmp); 1750 Py_DECREF(tmp); 1751 return result; 1752 } 1753 return set_issubset((PySetObject *)other, (PyObject *)so); 1754 } 1755 1756 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set."); 1757 1758 static PyObject * set_richcompare(PySetObject * v,PyObject * w,int op)1759 set_richcompare(PySetObject *v, PyObject *w, int op) 1760 { 1761 PyObject *r1; 1762 int r2; 1763 1764 if(!PyAnySet_Check(w)) 1765 Py_RETURN_NOTIMPLEMENTED; 1766 1767 switch (op) { 1768 case Py_EQ: 1769 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w)) 1770 Py_RETURN_FALSE; 1771 if (v->hash != -1 && 1772 ((PySetObject *)w)->hash != -1 && 1773 v->hash != ((PySetObject *)w)->hash) 1774 Py_RETURN_FALSE; 1775 return set_issubset(v, w); 1776 case Py_NE: 1777 r1 = set_richcompare(v, w, Py_EQ); 1778 if (r1 == NULL) 1779 return NULL; 1780 r2 = PyObject_IsTrue(r1); 1781 Py_DECREF(r1); 1782 if (r2 < 0) 1783 return NULL; 1784 return PyBool_FromLong(!r2); 1785 case Py_LE: 1786 return set_issubset(v, w); 1787 case Py_GE: 1788 return set_issuperset(v, w); 1789 case Py_LT: 1790 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w)) 1791 Py_RETURN_FALSE; 1792 return set_issubset(v, w); 1793 case Py_GT: 1794 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w)) 1795 Py_RETURN_FALSE; 1796 return set_issuperset(v, w); 1797 } 1798 Py_RETURN_NOTIMPLEMENTED; 1799 } 1800 1801 static PyObject * set_add(PySetObject * so,PyObject * key)1802 set_add(PySetObject *so, PyObject *key) 1803 { 1804 if (set_add_key(so, key)) 1805 return NULL; 1806 Py_RETURN_NONE; 1807 } 1808 1809 PyDoc_STRVAR(add_doc, 1810 "Add an element to a set.\n\ 1811 \n\ 1812 This has no effect if the element is already present."); 1813 1814 static int set_contains(PySetObject * so,PyObject * key)1815 set_contains(PySetObject *so, PyObject *key) 1816 { 1817 PyObject *tmpkey; 1818 int rv; 1819 1820 rv = set_contains_key(so, key); 1821 if (rv < 0) { 1822 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) 1823 return -1; 1824 PyErr_Clear(); 1825 tmpkey = make_new_set(&PyFrozenSet_Type, key); 1826 if (tmpkey == NULL) 1827 return -1; 1828 rv = set_contains_key(so, tmpkey); 1829 Py_DECREF(tmpkey); 1830 } 1831 return rv; 1832 } 1833 1834 static PyObject * set_direct_contains(PySetObject * so,PyObject * key)1835 set_direct_contains(PySetObject *so, PyObject *key) 1836 { 1837 long result; 1838 1839 result = set_contains(so, key); 1840 if (result < 0) 1841 return NULL; 1842 return PyBool_FromLong(result); 1843 } 1844 1845 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x."); 1846 1847 static PyObject * set_remove(PySetObject * so,PyObject * key)1848 set_remove(PySetObject *so, PyObject *key) 1849 { 1850 PyObject *tmpkey; 1851 int rv; 1852 1853 rv = set_discard_key(so, key); 1854 if (rv < 0) { 1855 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) 1856 return NULL; 1857 PyErr_Clear(); 1858 tmpkey = make_new_set(&PyFrozenSet_Type, key); 1859 if (tmpkey == NULL) 1860 return NULL; 1861 rv = set_discard_key(so, tmpkey); 1862 Py_DECREF(tmpkey); 1863 if (rv < 0) 1864 return NULL; 1865 } 1866 1867 if (rv == DISCARD_NOTFOUND) { 1868 _PyErr_SetKeyError(key); 1869 return NULL; 1870 } 1871 Py_RETURN_NONE; 1872 } 1873 1874 PyDoc_STRVAR(remove_doc, 1875 "Remove an element from a set; it must be a member.\n\ 1876 \n\ 1877 If the element is not a member, raise a KeyError."); 1878 1879 static PyObject * set_discard(PySetObject * so,PyObject * key)1880 set_discard(PySetObject *so, PyObject *key) 1881 { 1882 PyObject *tmpkey; 1883 int rv; 1884 1885 rv = set_discard_key(so, key); 1886 if (rv < 0) { 1887 if (!PySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError)) 1888 return NULL; 1889 PyErr_Clear(); 1890 tmpkey = make_new_set(&PyFrozenSet_Type, key); 1891 if (tmpkey == NULL) 1892 return NULL; 1893 rv = set_discard_key(so, tmpkey); 1894 Py_DECREF(tmpkey); 1895 if (rv < 0) 1896 return NULL; 1897 } 1898 Py_RETURN_NONE; 1899 } 1900 1901 PyDoc_STRVAR(discard_doc, 1902 "Remove an element from a set if it is a member.\n\ 1903 \n\ 1904 If the element is not a member, do nothing."); 1905 1906 static PyObject * set_reduce(PySetObject * so,PyObject * Py_UNUSED (ignored))1907 set_reduce(PySetObject *so, PyObject *Py_UNUSED(ignored)) 1908 { 1909 PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL; 1910 _Py_IDENTIFIER(__dict__); 1911 1912 keys = PySequence_List((PyObject *)so); 1913 if (keys == NULL) 1914 goto done; 1915 args = PyTuple_Pack(1, keys); 1916 if (args == NULL) 1917 goto done; 1918 if (_PyObject_LookupAttrId((PyObject *)so, &PyId___dict__, &dict) < 0) { 1919 goto done; 1920 } 1921 if (dict == NULL) { 1922 dict = Py_None; 1923 Py_INCREF(dict); 1924 } 1925 result = PyTuple_Pack(3, Py_TYPE(so), args, dict); 1926 done: 1927 Py_XDECREF(args); 1928 Py_XDECREF(keys); 1929 Py_XDECREF(dict); 1930 return result; 1931 } 1932 1933 static PyObject * set_sizeof(PySetObject * so,PyObject * Py_UNUSED (ignored))1934 set_sizeof(PySetObject *so, PyObject *Py_UNUSED(ignored)) 1935 { 1936 Py_ssize_t res; 1937 1938 res = _PyObject_SIZE(Py_TYPE(so)); 1939 if (so->table != so->smalltable) 1940 res = res + (so->mask + 1) * sizeof(setentry); 1941 return PyLong_FromSsize_t(res); 1942 } 1943 1944 PyDoc_STRVAR(sizeof_doc, "S.__sizeof__() -> size of S in memory, in bytes"); 1945 static int set_init(PySetObject * self,PyObject * args,PyObject * kwds)1946 set_init(PySetObject *self, PyObject *args, PyObject *kwds) 1947 { 1948 PyObject *iterable = NULL; 1949 1950 if (!_PyArg_NoKeywords("set", kwds)) 1951 return -1; 1952 if (!PyArg_UnpackTuple(args, Py_TYPE(self)->tp_name, 0, 1, &iterable)) 1953 return -1; 1954 if (self->fill) 1955 set_clear_internal(self); 1956 self->hash = -1; 1957 if (iterable == NULL) 1958 return 0; 1959 return set_update_internal(self, iterable); 1960 } 1961 1962 static PyObject* set_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)1963 set_vectorcall(PyObject *type, PyObject * const*args, 1964 size_t nargsf, PyObject *kwnames) 1965 { 1966 assert(PyType_Check(type)); 1967 1968 if (!_PyArg_NoKwnames("set", kwnames)) { 1969 return NULL; 1970 } 1971 1972 Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); 1973 if (!_PyArg_CheckPositional("set", nargs, 0, 1)) { 1974 return NULL; 1975 } 1976 1977 if (nargs) { 1978 return make_new_set((PyTypeObject *)type, args[0]); 1979 } 1980 1981 return make_new_set((PyTypeObject *)type, NULL); 1982 } 1983 1984 static PySequenceMethods set_as_sequence = { 1985 set_len, /* sq_length */ 1986 0, /* sq_concat */ 1987 0, /* sq_repeat */ 1988 0, /* sq_item */ 1989 0, /* sq_slice */ 1990 0, /* sq_ass_item */ 1991 0, /* sq_ass_slice */ 1992 (objobjproc)set_contains, /* sq_contains */ 1993 }; 1994 1995 /* set object ********************************************************/ 1996 1997 #ifdef Py_DEBUG 1998 static PyObject *test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored)); 1999 2000 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\ 2001 All is well if assertions don't fail."); 2002 #endif 2003 2004 static PyMethodDef set_methods[] = { 2005 {"add", (PyCFunction)set_add, METH_O, 2006 add_doc}, 2007 {"clear", (PyCFunction)set_clear, METH_NOARGS, 2008 clear_doc}, 2009 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST, 2010 contains_doc}, 2011 {"copy", (PyCFunction)set_copy, METH_NOARGS, 2012 copy_doc}, 2013 {"discard", (PyCFunction)set_discard, METH_O, 2014 discard_doc}, 2015 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS, 2016 difference_doc}, 2017 {"difference_update", (PyCFunction)set_difference_update, METH_VARARGS, 2018 difference_update_doc}, 2019 {"intersection",(PyCFunction)set_intersection_multi, METH_VARARGS, 2020 intersection_doc}, 2021 {"intersection_update",(PyCFunction)set_intersection_update_multi, METH_VARARGS, 2022 intersection_update_doc}, 2023 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O, 2024 isdisjoint_doc}, 2025 {"issubset", (PyCFunction)set_issubset, METH_O, 2026 issubset_doc}, 2027 {"issuperset", (PyCFunction)set_issuperset, METH_O, 2028 issuperset_doc}, 2029 {"pop", (PyCFunction)set_pop, METH_NOARGS, 2030 pop_doc}, 2031 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS, 2032 reduce_doc}, 2033 {"remove", (PyCFunction)set_remove, METH_O, 2034 remove_doc}, 2035 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS, 2036 sizeof_doc}, 2037 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O, 2038 symmetric_difference_doc}, 2039 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O, 2040 symmetric_difference_update_doc}, 2041 #ifdef Py_DEBUG 2042 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS, 2043 test_c_api_doc}, 2044 #endif 2045 {"union", (PyCFunction)set_union, METH_VARARGS, 2046 union_doc}, 2047 {"update", (PyCFunction)set_update, METH_VARARGS, 2048 update_doc}, 2049 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, 2050 {NULL, NULL} /* sentinel */ 2051 }; 2052 2053 static PyNumberMethods set_as_number = { 2054 0, /*nb_add*/ 2055 (binaryfunc)set_sub, /*nb_subtract*/ 2056 0, /*nb_multiply*/ 2057 0, /*nb_remainder*/ 2058 0, /*nb_divmod*/ 2059 0, /*nb_power*/ 2060 0, /*nb_negative*/ 2061 0, /*nb_positive*/ 2062 0, /*nb_absolute*/ 2063 0, /*nb_bool*/ 2064 0, /*nb_invert*/ 2065 0, /*nb_lshift*/ 2066 0, /*nb_rshift*/ 2067 (binaryfunc)set_and, /*nb_and*/ 2068 (binaryfunc)set_xor, /*nb_xor*/ 2069 (binaryfunc)set_or, /*nb_or*/ 2070 0, /*nb_int*/ 2071 0, /*nb_reserved*/ 2072 0, /*nb_float*/ 2073 0, /*nb_inplace_add*/ 2074 (binaryfunc)set_isub, /*nb_inplace_subtract*/ 2075 0, /*nb_inplace_multiply*/ 2076 0, /*nb_inplace_remainder*/ 2077 0, /*nb_inplace_power*/ 2078 0, /*nb_inplace_lshift*/ 2079 0, /*nb_inplace_rshift*/ 2080 (binaryfunc)set_iand, /*nb_inplace_and*/ 2081 (binaryfunc)set_ixor, /*nb_inplace_xor*/ 2082 (binaryfunc)set_ior, /*nb_inplace_or*/ 2083 }; 2084 2085 PyDoc_STRVAR(set_doc, 2086 "set() -> new empty set object\n\ 2087 set(iterable) -> new set object\n\ 2088 \n\ 2089 Build an unordered collection of unique elements."); 2090 2091 PyTypeObject PySet_Type = { 2092 PyVarObject_HEAD_INIT(&PyType_Type, 0) 2093 "set", /* tp_name */ 2094 sizeof(PySetObject), /* tp_basicsize */ 2095 0, /* tp_itemsize */ 2096 /* methods */ 2097 (destructor)set_dealloc, /* tp_dealloc */ 2098 0, /* tp_vectorcall_offset */ 2099 0, /* tp_getattr */ 2100 0, /* tp_setattr */ 2101 0, /* tp_as_async */ 2102 (reprfunc)set_repr, /* tp_repr */ 2103 &set_as_number, /* tp_as_number */ 2104 &set_as_sequence, /* tp_as_sequence */ 2105 0, /* tp_as_mapping */ 2106 PyObject_HashNotImplemented, /* tp_hash */ 2107 0, /* tp_call */ 2108 0, /* tp_str */ 2109 PyObject_GenericGetAttr, /* tp_getattro */ 2110 0, /* tp_setattro */ 2111 0, /* tp_as_buffer */ 2112 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | 2113 Py_TPFLAGS_BASETYPE, /* tp_flags */ 2114 set_doc, /* tp_doc */ 2115 (traverseproc)set_traverse, /* tp_traverse */ 2116 (inquiry)set_clear_internal, /* tp_clear */ 2117 (richcmpfunc)set_richcompare, /* tp_richcompare */ 2118 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */ 2119 (getiterfunc)set_iter, /* tp_iter */ 2120 0, /* tp_iternext */ 2121 set_methods, /* tp_methods */ 2122 0, /* tp_members */ 2123 0, /* tp_getset */ 2124 0, /* tp_base */ 2125 0, /* tp_dict */ 2126 0, /* tp_descr_get */ 2127 0, /* tp_descr_set */ 2128 0, /* tp_dictoffset */ 2129 (initproc)set_init, /* tp_init */ 2130 PyType_GenericAlloc, /* tp_alloc */ 2131 set_new, /* tp_new */ 2132 PyObject_GC_Del, /* tp_free */ 2133 .tp_vectorcall = set_vectorcall, 2134 }; 2135 2136 /* frozenset object ********************************************************/ 2137 2138 2139 static PyMethodDef frozenset_methods[] = { 2140 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST, 2141 contains_doc}, 2142 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS, 2143 copy_doc}, 2144 {"difference", (PyCFunction)set_difference_multi, METH_VARARGS, 2145 difference_doc}, 2146 {"intersection", (PyCFunction)set_intersection_multi, METH_VARARGS, 2147 intersection_doc}, 2148 {"isdisjoint", (PyCFunction)set_isdisjoint, METH_O, 2149 isdisjoint_doc}, 2150 {"issubset", (PyCFunction)set_issubset, METH_O, 2151 issubset_doc}, 2152 {"issuperset", (PyCFunction)set_issuperset, METH_O, 2153 issuperset_doc}, 2154 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS, 2155 reduce_doc}, 2156 {"__sizeof__", (PyCFunction)set_sizeof, METH_NOARGS, 2157 sizeof_doc}, 2158 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O, 2159 symmetric_difference_doc}, 2160 {"union", (PyCFunction)set_union, METH_VARARGS, 2161 union_doc}, 2162 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, 2163 {NULL, NULL} /* sentinel */ 2164 }; 2165 2166 static PyNumberMethods frozenset_as_number = { 2167 0, /*nb_add*/ 2168 (binaryfunc)set_sub, /*nb_subtract*/ 2169 0, /*nb_multiply*/ 2170 0, /*nb_remainder*/ 2171 0, /*nb_divmod*/ 2172 0, /*nb_power*/ 2173 0, /*nb_negative*/ 2174 0, /*nb_positive*/ 2175 0, /*nb_absolute*/ 2176 0, /*nb_bool*/ 2177 0, /*nb_invert*/ 2178 0, /*nb_lshift*/ 2179 0, /*nb_rshift*/ 2180 (binaryfunc)set_and, /*nb_and*/ 2181 (binaryfunc)set_xor, /*nb_xor*/ 2182 (binaryfunc)set_or, /*nb_or*/ 2183 }; 2184 2185 PyDoc_STRVAR(frozenset_doc, 2186 "frozenset() -> empty frozenset object\n\ 2187 frozenset(iterable) -> frozenset object\n\ 2188 \n\ 2189 Build an immutable unordered collection of unique elements."); 2190 2191 PyTypeObject PyFrozenSet_Type = { 2192 PyVarObject_HEAD_INIT(&PyType_Type, 0) 2193 "frozenset", /* tp_name */ 2194 sizeof(PySetObject), /* tp_basicsize */ 2195 0, /* tp_itemsize */ 2196 /* methods */ 2197 (destructor)set_dealloc, /* tp_dealloc */ 2198 0, /* tp_vectorcall_offset */ 2199 0, /* tp_getattr */ 2200 0, /* tp_setattr */ 2201 0, /* tp_as_async */ 2202 (reprfunc)set_repr, /* tp_repr */ 2203 &frozenset_as_number, /* tp_as_number */ 2204 &set_as_sequence, /* tp_as_sequence */ 2205 0, /* tp_as_mapping */ 2206 frozenset_hash, /* tp_hash */ 2207 0, /* tp_call */ 2208 0, /* tp_str */ 2209 PyObject_GenericGetAttr, /* tp_getattro */ 2210 0, /* tp_setattro */ 2211 0, /* tp_as_buffer */ 2212 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | 2213 Py_TPFLAGS_BASETYPE, /* tp_flags */ 2214 frozenset_doc, /* tp_doc */ 2215 (traverseproc)set_traverse, /* tp_traverse */ 2216 (inquiry)set_clear_internal, /* tp_clear */ 2217 (richcmpfunc)set_richcompare, /* tp_richcompare */ 2218 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */ 2219 (getiterfunc)set_iter, /* tp_iter */ 2220 0, /* tp_iternext */ 2221 frozenset_methods, /* tp_methods */ 2222 0, /* tp_members */ 2223 0, /* tp_getset */ 2224 0, /* tp_base */ 2225 0, /* tp_dict */ 2226 0, /* tp_descr_get */ 2227 0, /* tp_descr_set */ 2228 0, /* tp_dictoffset */ 2229 0, /* tp_init */ 2230 PyType_GenericAlloc, /* tp_alloc */ 2231 frozenset_new, /* tp_new */ 2232 PyObject_GC_Del, /* tp_free */ 2233 .tp_vectorcall = frozenset_vectorcall, 2234 }; 2235 2236 2237 /***** C API functions *************************************************/ 2238 2239 PyObject * PySet_New(PyObject * iterable)2240 PySet_New(PyObject *iterable) 2241 { 2242 return make_new_set(&PySet_Type, iterable); 2243 } 2244 2245 PyObject * PyFrozenSet_New(PyObject * iterable)2246 PyFrozenSet_New(PyObject *iterable) 2247 { 2248 return make_new_set(&PyFrozenSet_Type, iterable); 2249 } 2250 2251 Py_ssize_t PySet_Size(PyObject * anyset)2252 PySet_Size(PyObject *anyset) 2253 { 2254 if (!PyAnySet_Check(anyset)) { 2255 PyErr_BadInternalCall(); 2256 return -1; 2257 } 2258 return PySet_GET_SIZE(anyset); 2259 } 2260 2261 int PySet_Clear(PyObject * set)2262 PySet_Clear(PyObject *set) 2263 { 2264 if (!PySet_Check(set)) { 2265 PyErr_BadInternalCall(); 2266 return -1; 2267 } 2268 return set_clear_internal((PySetObject *)set); 2269 } 2270 2271 int PySet_Contains(PyObject * anyset,PyObject * key)2272 PySet_Contains(PyObject *anyset, PyObject *key) 2273 { 2274 if (!PyAnySet_Check(anyset)) { 2275 PyErr_BadInternalCall(); 2276 return -1; 2277 } 2278 return set_contains_key((PySetObject *)anyset, key); 2279 } 2280 2281 int PySet_Discard(PyObject * set,PyObject * key)2282 PySet_Discard(PyObject *set, PyObject *key) 2283 { 2284 if (!PySet_Check(set)) { 2285 PyErr_BadInternalCall(); 2286 return -1; 2287 } 2288 return set_discard_key((PySetObject *)set, key); 2289 } 2290 2291 int PySet_Add(PyObject * anyset,PyObject * key)2292 PySet_Add(PyObject *anyset, PyObject *key) 2293 { 2294 if (!PySet_Check(anyset) && 2295 (!PyFrozenSet_Check(anyset) || Py_REFCNT(anyset) != 1)) { 2296 PyErr_BadInternalCall(); 2297 return -1; 2298 } 2299 return set_add_key((PySetObject *)anyset, key); 2300 } 2301 2302 void _PySet_Fini(void)2303 _PySet_Fini(void) 2304 { 2305 Py_CLEAR(emptyfrozenset); 2306 } 2307 2308 int _PySet_NextEntry(PyObject * set,Py_ssize_t * pos,PyObject ** key,Py_hash_t * hash)2309 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, Py_hash_t *hash) 2310 { 2311 setentry *entry; 2312 2313 if (!PyAnySet_Check(set)) { 2314 PyErr_BadInternalCall(); 2315 return -1; 2316 } 2317 if (set_next((PySetObject *)set, pos, &entry) == 0) 2318 return 0; 2319 *key = entry->key; 2320 *hash = entry->hash; 2321 return 1; 2322 } 2323 2324 PyObject * PySet_Pop(PyObject * set)2325 PySet_Pop(PyObject *set) 2326 { 2327 if (!PySet_Check(set)) { 2328 PyErr_BadInternalCall(); 2329 return NULL; 2330 } 2331 return set_pop((PySetObject *)set, NULL); 2332 } 2333 2334 int _PySet_Update(PyObject * set,PyObject * iterable)2335 _PySet_Update(PyObject *set, PyObject *iterable) 2336 { 2337 if (!PySet_Check(set)) { 2338 PyErr_BadInternalCall(); 2339 return -1; 2340 } 2341 return set_update_internal((PySetObject *)set, iterable); 2342 } 2343 2344 /* Exported for the gdb plugin's benefit. */ 2345 PyObject *_PySet_Dummy = dummy; 2346 2347 #ifdef Py_DEBUG 2348 2349 /* Test code to be called with any three element set. 2350 Returns True and original set is restored. */ 2351 2352 #define assertRaises(call_return_value, exception) \ 2353 do { \ 2354 assert(call_return_value); \ 2355 assert(PyErr_ExceptionMatches(exception)); \ 2356 PyErr_Clear(); \ 2357 } while(0) 2358 2359 static PyObject * test_c_api(PySetObject * so,PyObject * Py_UNUSED (ignored))2360 test_c_api(PySetObject *so, PyObject *Py_UNUSED(ignored)) 2361 { 2362 Py_ssize_t count; 2363 const char *s; 2364 Py_ssize_t i; 2365 PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x=NULL; 2366 PyObject *ob = (PyObject *)so; 2367 Py_hash_t hash; 2368 PyObject *str; 2369 2370 /* Verify preconditions */ 2371 assert(PyAnySet_Check(ob)); 2372 assert(PyAnySet_CheckExact(ob)); 2373 assert(!PyFrozenSet_CheckExact(ob)); 2374 2375 /* so.clear(); so |= set("abc"); */ 2376 str = PyUnicode_FromString("abc"); 2377 if (str == NULL) 2378 return NULL; 2379 set_clear_internal(so); 2380 if (set_update_internal(so, str)) { 2381 Py_DECREF(str); 2382 return NULL; 2383 } 2384 Py_DECREF(str); 2385 2386 /* Exercise type/size checks */ 2387 assert(PySet_Size(ob) == 3); 2388 assert(PySet_GET_SIZE(ob) == 3); 2389 2390 /* Raise TypeError for non-iterable constructor arguments */ 2391 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError); 2392 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError); 2393 2394 /* Raise TypeError for unhashable key */ 2395 dup = PySet_New(ob); 2396 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError); 2397 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError); 2398 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError); 2399 2400 /* Exercise successful pop, contains, add, and discard */ 2401 elem = PySet_Pop(ob); 2402 assert(PySet_Contains(ob, elem) == 0); 2403 assert(PySet_GET_SIZE(ob) == 2); 2404 assert(PySet_Add(ob, elem) == 0); 2405 assert(PySet_Contains(ob, elem) == 1); 2406 assert(PySet_GET_SIZE(ob) == 3); 2407 assert(PySet_Discard(ob, elem) == 1); 2408 assert(PySet_GET_SIZE(ob) == 2); 2409 assert(PySet_Discard(ob, elem) == 0); 2410 assert(PySet_GET_SIZE(ob) == 2); 2411 2412 /* Exercise clear */ 2413 dup2 = PySet_New(dup); 2414 assert(PySet_Clear(dup2) == 0); 2415 assert(PySet_Size(dup2) == 0); 2416 Py_DECREF(dup2); 2417 2418 /* Raise SystemError on clear or update of frozen set */ 2419 f = PyFrozenSet_New(dup); 2420 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError); 2421 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError); 2422 assert(PySet_Add(f, elem) == 0); 2423 Py_INCREF(f); 2424 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError); 2425 Py_DECREF(f); 2426 Py_DECREF(f); 2427 2428 /* Exercise direct iteration */ 2429 i = 0, count = 0; 2430 while (_PySet_NextEntry((PyObject *)dup, &i, &x, &hash)) { 2431 s = PyUnicode_AsUTF8(x); 2432 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c')); 2433 count++; 2434 } 2435 assert(count == 3); 2436 2437 /* Exercise updates */ 2438 dup2 = PySet_New(NULL); 2439 assert(_PySet_Update(dup2, dup) == 0); 2440 assert(PySet_Size(dup2) == 3); 2441 assert(_PySet_Update(dup2, dup) == 0); 2442 assert(PySet_Size(dup2) == 3); 2443 Py_DECREF(dup2); 2444 2445 /* Raise SystemError when self argument is not a set or frozenset. */ 2446 t = PyTuple_New(0); 2447 assertRaises(PySet_Size(t) == -1, PyExc_SystemError); 2448 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError); 2449 Py_DECREF(t); 2450 2451 /* Raise SystemError when self argument is not a set. */ 2452 f = PyFrozenSet_New(dup); 2453 assert(PySet_Size(f) == 3); 2454 assert(PyFrozenSet_CheckExact(f)); 2455 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError); 2456 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError); 2457 Py_DECREF(f); 2458 2459 /* Raise KeyError when popping from an empty set */ 2460 assert(PyNumber_InPlaceSubtract(ob, ob) == ob); 2461 Py_DECREF(ob); 2462 assert(PySet_GET_SIZE(ob) == 0); 2463 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError); 2464 2465 /* Restore the set from the copy using the PyNumber API */ 2466 assert(PyNumber_InPlaceOr(ob, dup) == ob); 2467 Py_DECREF(ob); 2468 2469 /* Verify constructors accept NULL arguments */ 2470 f = PySet_New(NULL); 2471 assert(f != NULL); 2472 assert(PySet_GET_SIZE(f) == 0); 2473 Py_DECREF(f); 2474 f = PyFrozenSet_New(NULL); 2475 assert(f != NULL); 2476 assert(PyFrozenSet_CheckExact(f)); 2477 assert(PySet_GET_SIZE(f) == 0); 2478 Py_DECREF(f); 2479 2480 Py_DECREF(elem); 2481 Py_DECREF(dup); 2482 Py_RETURN_TRUE; 2483 } 2484 2485 #undef assertRaises 2486 2487 #endif 2488 2489 /***** Dummy Struct *************************************************/ 2490 2491 static PyObject * dummy_repr(PyObject * op)2492 dummy_repr(PyObject *op) 2493 { 2494 return PyUnicode_FromString("<dummy key>"); 2495 } 2496 2497 static void _Py_NO_RETURN dummy_dealloc(PyObject * ignore)2498 dummy_dealloc(PyObject* ignore) 2499 { 2500 Py_FatalError("deallocating <dummy key>"); 2501 } 2502 2503 static PyTypeObject _PySetDummy_Type = { 2504 PyVarObject_HEAD_INIT(&PyType_Type, 0) 2505 "<dummy key> type", 2506 0, 2507 0, 2508 dummy_dealloc, /*tp_dealloc*/ /*never called*/ 2509 0, /*tp_vectorcall_offset*/ 2510 0, /*tp_getattr*/ 2511 0, /*tp_setattr*/ 2512 0, /*tp_as_async*/ 2513 dummy_repr, /*tp_repr*/ 2514 0, /*tp_as_number*/ 2515 0, /*tp_as_sequence*/ 2516 0, /*tp_as_mapping*/ 2517 0, /*tp_hash */ 2518 0, /*tp_call */ 2519 0, /*tp_str */ 2520 0, /*tp_getattro */ 2521 0, /*tp_setattro */ 2522 0, /*tp_as_buffer */ 2523 Py_TPFLAGS_DEFAULT, /*tp_flags */ 2524 }; 2525 2526 static PyObject _dummy_struct = { 2527 _PyObject_EXTRA_INIT 2528 2, &_PySetDummy_Type 2529 }; 2530 2531