1 /* Dictionary object implementation using a hash table */
2
3 /* The distribution includes a separate file, Objects/dictnotes.txt,
4 describing explorations into dictionary design and optimization.
5 It covers typical dictionary use patterns, the parameters for
6 tuning dictionaries, and several ideas for possible optimizations.
7 */
8
9 /* PyDictKeysObject
10
11 This implements the dictionary's hashtable.
12
13 As of Python 3.6, this is compact and ordered. Basic idea is described here:
14 * https://mail.python.org/pipermail/python-dev/2012-December/123028.html
15 * https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
16
17 layout:
18
19 +---------------+
20 | dk_refcnt |
21 | dk_size |
22 | dk_lookup |
23 | dk_usable |
24 | dk_nentries |
25 +---------------+
26 | dk_indices |
27 | |
28 +---------------+
29 | dk_entries |
30 | |
31 +---------------+
32
33 dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1)
34 or DKIX_DUMMY(-2).
35 Size of indices is dk_size. Type of each index in indices is vary on dk_size:
36
37 * int8 for dk_size <= 128
38 * int16 for 256 <= dk_size <= 2**15
39 * int32 for 2**16 <= dk_size <= 2**31
40 * int64 for 2**32 <= dk_size
41
42 dk_entries is array of PyDictKeyEntry. Its size is USABLE_FRACTION(dk_size).
43 DK_ENTRIES(dk) can be used to get pointer to entries.
44
45 NOTE: Since negative value is used for DKIX_EMPTY and DKIX_DUMMY, type of
46 dk_indices entry is signed integer and int16 is used for table which
47 dk_size == 256.
48 */
49
50
51 /*
52 The DictObject can be in one of two forms.
53
54 Either:
55 A combined table:
56 ma_values == NULL, dk_refcnt == 1.
57 Values are stored in the me_value field of the PyDictKeysObject.
58 Or:
59 A split table:
60 ma_values != NULL, dk_refcnt >= 1
61 Values are stored in the ma_values array.
62 Only string (unicode) keys are allowed.
63 All dicts sharing same key must have same insertion order.
64
65 There are four kinds of slots in the table (slot is index, and
66 DK_ENTRIES(keys)[index] if index >= 0):
67
68 1. Unused. index == DKIX_EMPTY
69 Does not hold an active (key, value) pair now and never did. Unused can
70 transition to Active upon key insertion. This is each slot's initial state.
71
72 2. Active. index >= 0, me_key != NULL and me_value != NULL
73 Holds an active (key, value) pair. Active can transition to Dummy or
74 Pending upon key deletion (for combined and split tables respectively).
75 This is the only case in which me_value != NULL.
76
77 3. Dummy. index == DKIX_DUMMY (combined only)
78 Previously held an active (key, value) pair, but that was deleted and an
79 active pair has not yet overwritten the slot. Dummy can transition to
80 Active upon key insertion. Dummy slots cannot be made Unused again
81 else the probe sequence in case of collision would have no way to know
82 they were once active.
83
84 4. Pending. index >= 0, key != NULL, and value == NULL (split only)
85 Not yet inserted in split-table.
86 */
87
88 /*
89 Preserving insertion order
90
91 It's simple for combined table. Since dk_entries is mostly append only, we can
92 get insertion order by just iterating dk_entries.
93
94 One exception is .popitem(). It removes last item in dk_entries and decrement
95 dk_nentries to achieve amortized O(1). Since there are DKIX_DUMMY remains in
96 dk_indices, we can't increment dk_usable even though dk_nentries is
97 decremented.
98
99 In split table, inserting into pending entry is allowed only for dk_entries[ix]
100 where ix == mp->ma_used. Inserting into other index and deleting item cause
101 converting the dict to the combined table.
102 */
103
104 /* PyDict_MINSIZE is the starting size for any new dict.
105 * 8 allows dicts with no more than 5 active entries; experiments suggested
106 * this suffices for the majority of dicts (consisting mostly of usually-small
107 * dicts created to pass keyword arguments).
108 * Making this 8, rather than 4 reduces the number of resizes for most
109 * dictionaries, without any significant extra memory use.
110 */
111 #define PyDict_MINSIZE 8
112
113 #include "Python.h"
114 #include "pycore_bitutils.h" // _Py_bit_length
115 #include "pycore_gc.h" // _PyObject_GC_IS_TRACKED()
116 #include "pycore_object.h" // _PyObject_GC_TRACK()
117 #include "pycore_pyerrors.h" // _PyErr_Fetch()
118 #include "pycore_pystate.h" // _PyThreadState_GET()
119 #include "dict-common.h"
120 #include "stringlib/eq.h" // unicode_eq()
121
122 /*[clinic input]
123 class dict "PyDictObject *" "&PyDict_Type"
124 [clinic start generated code]*/
125 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=f157a5a0ce9589d6]*/
126
127
128 /*
129 To ensure the lookup algorithm terminates, there must be at least one Unused
130 slot (NULL key) in the table.
131 To avoid slowing down lookups on a near-full table, we resize the table when
132 it's USABLE_FRACTION (currently two-thirds) full.
133 */
134
135 #define PERTURB_SHIFT 5
136
137 /*
138 Major subtleties ahead: Most hash schemes depend on having a "good" hash
139 function, in the sense of simulating randomness. Python doesn't: its most
140 important hash functions (for ints) are very regular in common
141 cases:
142
143 >>>[hash(i) for i in range(4)]
144 [0, 1, 2, 3]
145
146 This isn't necessarily bad! To the contrary, in a table of size 2**i, taking
147 the low-order i bits as the initial table index is extremely fast, and there
148 are no collisions at all for dicts indexed by a contiguous range of ints. So
149 this gives better-than-random behavior in common cases, and that's very
150 desirable.
151
152 OTOH, when collisions occur, the tendency to fill contiguous slices of the
153 hash table makes a good collision resolution strategy crucial. Taking only
154 the last i bits of the hash code is also vulnerable: for example, consider
155 the list [i << 16 for i in range(20000)] as a set of keys. Since ints are
156 their own hash codes, and this fits in a dict of size 2**15, the last 15 bits
157 of every hash code are all 0: they *all* map to the same table index.
158
159 But catering to unusual cases should not slow the usual ones, so we just take
160 the last i bits anyway. It's up to collision resolution to do the rest. If
161 we *usually* find the key we're looking for on the first try (and, it turns
162 out, we usually do -- the table load factor is kept under 2/3, so the odds
163 are solidly in our favor), then it makes best sense to keep the initial index
164 computation dirt cheap.
165
166 The first half of collision resolution is to visit table indices via this
167 recurrence:
168
169 j = ((5*j) + 1) mod 2**i
170
171 For any initial j in range(2**i), repeating that 2**i times generates each
172 int in range(2**i) exactly once (see any text on random-number generation for
173 proof). By itself, this doesn't help much: like linear probing (setting
174 j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed
175 order. This would be bad, except that's not the only thing we do, and it's
176 actually *good* in the common cases where hash keys are consecutive. In an
177 example that's really too small to make this entirely clear, for a table of
178 size 2**3 the order of indices is:
179
180 0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating]
181
182 If two things come in at index 5, the first place we look after is index 2,
183 not 6, so if another comes in at index 6 the collision at 5 didn't hurt it.
184 Linear probing is deadly in this case because there the fixed probe order
185 is the *same* as the order consecutive keys are likely to arrive. But it's
186 extremely unlikely hash codes will follow a 5*j+1 recurrence by accident,
187 and certain that consecutive hash codes do not.
188
189 The other half of the strategy is to get the other bits of the hash code
190 into play. This is done by initializing a (unsigned) vrbl "perturb" to the
191 full hash code, and changing the recurrence to:
192
193 perturb >>= PERTURB_SHIFT;
194 j = (5*j) + 1 + perturb;
195 use j % 2**i as the next table index;
196
197 Now the probe sequence depends (eventually) on every bit in the hash code,
198 and the pseudo-scrambling property of recurring on 5*j+1 is more valuable,
199 because it quickly magnifies small differences in the bits that didn't affect
200 the initial index. Note that because perturb is unsigned, if the recurrence
201 is executed often enough perturb eventually becomes and remains 0. At that
202 point (very rarely reached) the recurrence is on (just) 5*j+1 again, and
203 that's certain to find an empty slot eventually (since it generates every int
204 in range(2**i), and we make sure there's always at least one empty slot).
205
206 Selecting a good value for PERTURB_SHIFT is a balancing act. You want it
207 small so that the high bits of the hash code continue to affect the probe
208 sequence across iterations; but you want it large so that in really bad cases
209 the high-order hash bits have an effect on early iterations. 5 was "the
210 best" in minimizing total collisions across experiments Tim Peters ran (on
211 both normal and pathological cases), but 4 and 6 weren't significantly worse.
212
213 Historical: Reimer Behrends contributed the idea of using a polynomial-based
214 approach, using repeated multiplication by x in GF(2**n) where an irreducible
215 polynomial for each table size was chosen such that x was a primitive root.
216 Christian Tismer later extended that to use division by x instead, as an
217 efficient way to get the high bits of the hash code into play. This scheme
218 also gave excellent collision statistics, but was more expensive: two
219 if-tests were required inside the loop; computing "the next" index took about
220 the same number of operations but without as much potential parallelism
221 (e.g., computing 5*j can go on at the same time as computing 1+perturb in the
222 above, and then shifting perturb can be done while the table index is being
223 masked); and the PyDictObject struct required a member to hold the table's
224 polynomial. In Tim's experiments the current scheme ran faster, produced
225 equally good collision statistics, needed less code & used less memory.
226
227 */
228
229 /* forward declarations */
230 static Py_ssize_t lookdict(PyDictObject *mp, PyObject *key,
231 Py_hash_t hash, PyObject **value_addr);
232 static Py_ssize_t lookdict_unicode(PyDictObject *mp, PyObject *key,
233 Py_hash_t hash, PyObject **value_addr);
234 static Py_ssize_t
235 lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
236 Py_hash_t hash, PyObject **value_addr);
237 static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
238 Py_hash_t hash, PyObject **value_addr);
239
240 static int dictresize(PyDictObject *mp, Py_ssize_t newsize);
241
242 static PyObject* dict_iter(PyDictObject *dict);
243
244 /*Global counter used to set ma_version_tag field of dictionary.
245 * It is incremented each time that a dictionary is created and each
246 * time that a dictionary is modified. */
247 static uint64_t pydict_global_version = 0;
248
249 #define DICT_NEXT_VERSION() (++pydict_global_version)
250
251 #include "clinic/dictobject.c.h"
252
253
254 static struct _Py_dict_state *
get_dict_state(void)255 get_dict_state(void)
256 {
257 PyInterpreterState *interp = _PyInterpreterState_GET();
258 return &interp->dict_state;
259 }
260
261
262 void
_PyDict_ClearFreeList(PyInterpreterState * interp)263 _PyDict_ClearFreeList(PyInterpreterState *interp)
264 {
265 struct _Py_dict_state *state = &interp->dict_state;
266 while (state->numfree) {
267 PyDictObject *op = state->free_list[--state->numfree];
268 assert(PyDict_CheckExact(op));
269 PyObject_GC_Del(op);
270 }
271 while (state->keys_numfree) {
272 PyObject_Free(state->keys_free_list[--state->keys_numfree]);
273 }
274 }
275
276
277 void
_PyDict_Fini(PyInterpreterState * interp)278 _PyDict_Fini(PyInterpreterState *interp)
279 {
280 _PyDict_ClearFreeList(interp);
281 #ifdef Py_DEBUG
282 struct _Py_dict_state *state = &interp->dict_state;
283 state->numfree = -1;
284 state->keys_numfree = -1;
285 #endif
286 }
287
288
289 /* Print summary info about the state of the optimized allocator */
290 void
_PyDict_DebugMallocStats(FILE * out)291 _PyDict_DebugMallocStats(FILE *out)
292 {
293 struct _Py_dict_state *state = get_dict_state();
294 _PyDebugAllocatorStats(out, "free PyDictObject",
295 state->numfree, sizeof(PyDictObject));
296 }
297
298
299 #define DK_SIZE(dk) ((dk)->dk_size)
300 #if SIZEOF_VOID_P > 4
301 #define DK_IXSIZE(dk) \
302 (DK_SIZE(dk) <= 0xff ? \
303 1 : DK_SIZE(dk) <= 0xffff ? \
304 2 : DK_SIZE(dk) <= 0xffffffff ? \
305 4 : sizeof(int64_t))
306 #else
307 #define DK_IXSIZE(dk) \
308 (DK_SIZE(dk) <= 0xff ? \
309 1 : DK_SIZE(dk) <= 0xffff ? \
310 2 : sizeof(int32_t))
311 #endif
312 #define DK_ENTRIES(dk) \
313 ((PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)]))
314
315 #define DK_MASK(dk) (((dk)->dk_size)-1)
316 #define IS_POWER_OF_2(x) (((x) & (x-1)) == 0)
317
318 static void free_keys_object(PyDictKeysObject *keys);
319
320 static inline void
dictkeys_incref(PyDictKeysObject * dk)321 dictkeys_incref(PyDictKeysObject *dk)
322 {
323 #ifdef Py_REF_DEBUG
324 _Py_RefTotal++;
325 #endif
326 dk->dk_refcnt++;
327 }
328
329 static inline void
dictkeys_decref(PyDictKeysObject * dk)330 dictkeys_decref(PyDictKeysObject *dk)
331 {
332 assert(dk->dk_refcnt > 0);
333 #ifdef Py_REF_DEBUG
334 _Py_RefTotal--;
335 #endif
336 if (--dk->dk_refcnt == 0) {
337 free_keys_object(dk);
338 }
339 }
340
341 /* lookup indices. returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */
342 static inline Py_ssize_t
dictkeys_get_index(const PyDictKeysObject * keys,Py_ssize_t i)343 dictkeys_get_index(const PyDictKeysObject *keys, Py_ssize_t i)
344 {
345 Py_ssize_t s = DK_SIZE(keys);
346 Py_ssize_t ix;
347
348 if (s <= 0xff) {
349 const int8_t *indices = (const int8_t*)(keys->dk_indices);
350 ix = indices[i];
351 }
352 else if (s <= 0xffff) {
353 const int16_t *indices = (const int16_t*)(keys->dk_indices);
354 ix = indices[i];
355 }
356 #if SIZEOF_VOID_P > 4
357 else if (s > 0xffffffff) {
358 const int64_t *indices = (const int64_t*)(keys->dk_indices);
359 ix = indices[i];
360 }
361 #endif
362 else {
363 const int32_t *indices = (const int32_t*)(keys->dk_indices);
364 ix = indices[i];
365 }
366 assert(ix >= DKIX_DUMMY);
367 return ix;
368 }
369
370 /* write to indices. */
371 static inline void
dictkeys_set_index(PyDictKeysObject * keys,Py_ssize_t i,Py_ssize_t ix)372 dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
373 {
374 Py_ssize_t s = DK_SIZE(keys);
375
376 assert(ix >= DKIX_DUMMY);
377
378 if (s <= 0xff) {
379 int8_t *indices = (int8_t*)(keys->dk_indices);
380 assert(ix <= 0x7f);
381 indices[i] = (char)ix;
382 }
383 else if (s <= 0xffff) {
384 int16_t *indices = (int16_t*)(keys->dk_indices);
385 assert(ix <= 0x7fff);
386 indices[i] = (int16_t)ix;
387 }
388 #if SIZEOF_VOID_P > 4
389 else if (s > 0xffffffff) {
390 int64_t *indices = (int64_t*)(keys->dk_indices);
391 indices[i] = ix;
392 }
393 #endif
394 else {
395 int32_t *indices = (int32_t*)(keys->dk_indices);
396 assert(ix <= 0x7fffffff);
397 indices[i] = (int32_t)ix;
398 }
399 }
400
401
402 /* USABLE_FRACTION is the maximum dictionary load.
403 * Increasing this ratio makes dictionaries more dense resulting in more
404 * collisions. Decreasing it improves sparseness at the expense of spreading
405 * indices over more cache lines and at the cost of total memory consumed.
406 *
407 * USABLE_FRACTION must obey the following:
408 * (0 < USABLE_FRACTION(n) < n) for all n >= 2
409 *
410 * USABLE_FRACTION should be quick to calculate.
411 * Fractions around 1/2 to 2/3 seem to work well in practice.
412 */
413 #define USABLE_FRACTION(n) (((n) << 1)/3)
414
415 /* Find the smallest dk_size >= minsize. */
416 static inline Py_ssize_t
calculate_keysize(Py_ssize_t minsize)417 calculate_keysize(Py_ssize_t minsize)
418 {
419 #if SIZEOF_LONG == SIZEOF_SIZE_T
420 minsize = (minsize | PyDict_MINSIZE) - 1;
421 return 1LL << _Py_bit_length(minsize | (PyDict_MINSIZE-1));
422 #elif defined(_MSC_VER)
423 // On 64bit Windows, sizeof(long) == 4.
424 minsize = (minsize | PyDict_MINSIZE) - 1;
425 unsigned long msb;
426 _BitScanReverse64(&msb, (uint64_t)minsize);
427 return 1LL << (msb + 1);
428 #else
429 Py_ssize_t size;
430 for (size = PyDict_MINSIZE;
431 size < minsize && size > 0;
432 size <<= 1)
433 ;
434 return size;
435 #endif
436 }
437
438 /* estimate_keysize is reverse function of USABLE_FRACTION.
439 *
440 * This can be used to reserve enough size to insert n entries without
441 * resizing.
442 */
443 static inline Py_ssize_t
estimate_keysize(Py_ssize_t n)444 estimate_keysize(Py_ssize_t n)
445 {
446 return calculate_keysize((n*3 + 1) / 2);
447 }
448
449
450 /* GROWTH_RATE. Growth rate upon hitting maximum load.
451 * Currently set to used*3.
452 * This means that dicts double in size when growing without deletions,
453 * but have more head room when the number of deletions is on a par with the
454 * number of insertions. See also bpo-17563 and bpo-33205.
455 *
456 * GROWTH_RATE was set to used*4 up to version 3.2.
457 * GROWTH_RATE was set to used*2 in version 3.3.0
458 * GROWTH_RATE was set to used*2 + capacity/2 in 3.4.0-3.6.0.
459 */
460 #define GROWTH_RATE(d) ((d)->ma_used*3)
461
462 #define ENSURE_ALLOWS_DELETIONS(d) \
463 if ((d)->ma_keys->dk_lookup == lookdict_unicode_nodummy) { \
464 (d)->ma_keys->dk_lookup = lookdict_unicode; \
465 }
466
467 /* This immutable, empty PyDictKeysObject is used for PyDict_Clear()
468 * (which cannot fail and thus can do no allocation).
469 */
470 static PyDictKeysObject empty_keys_struct = {
471 1, /* dk_refcnt */
472 1, /* dk_size */
473 lookdict_split, /* dk_lookup */
474 0, /* dk_usable (immutable) */
475 0, /* dk_nentries */
476 {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
477 DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */
478 };
479
480 static PyObject *empty_values[1] = { NULL };
481
482 #define Py_EMPTY_KEYS &empty_keys_struct
483
484 /* Uncomment to check the dict content in _PyDict_CheckConsistency() */
485 /* #define DEBUG_PYDICT */
486
487 #ifdef DEBUG_PYDICT
488 # define ASSERT_CONSISTENT(op) assert(_PyDict_CheckConsistency((PyObject *)(op), 1))
489 #else
490 # define ASSERT_CONSISTENT(op) assert(_PyDict_CheckConsistency((PyObject *)(op), 0))
491 #endif
492
493
494 int
_PyDict_CheckConsistency(PyObject * op,int check_content)495 _PyDict_CheckConsistency(PyObject *op, int check_content)
496 {
497 #define CHECK(expr) \
498 do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)
499
500 assert(op != NULL);
501 CHECK(PyDict_Check(op));
502 PyDictObject *mp = (PyDictObject *)op;
503
504 PyDictKeysObject *keys = mp->ma_keys;
505 int splitted = _PyDict_HasSplitTable(mp);
506 Py_ssize_t usable = USABLE_FRACTION(keys->dk_size);
507
508 CHECK(0 <= mp->ma_used && mp->ma_used <= usable);
509 CHECK(IS_POWER_OF_2(keys->dk_size));
510 CHECK(0 <= keys->dk_usable && keys->dk_usable <= usable);
511 CHECK(0 <= keys->dk_nentries && keys->dk_nentries <= usable);
512 CHECK(keys->dk_usable + keys->dk_nentries <= usable);
513
514 if (!splitted) {
515 /* combined table */
516 CHECK(keys->dk_refcnt == 1);
517 }
518
519 if (check_content) {
520 PyDictKeyEntry *entries = DK_ENTRIES(keys);
521 Py_ssize_t i;
522
523 for (i=0; i < keys->dk_size; i++) {
524 Py_ssize_t ix = dictkeys_get_index(keys, i);
525 CHECK(DKIX_DUMMY <= ix && ix <= usable);
526 }
527
528 for (i=0; i < usable; i++) {
529 PyDictKeyEntry *entry = &entries[i];
530 PyObject *key = entry->me_key;
531
532 if (key != NULL) {
533 if (PyUnicode_CheckExact(key)) {
534 Py_hash_t hash = ((PyASCIIObject *)key)->hash;
535 CHECK(hash != -1);
536 CHECK(entry->me_hash == hash);
537 }
538 else {
539 /* test_dict fails if PyObject_Hash() is called again */
540 CHECK(entry->me_hash != -1);
541 }
542 if (!splitted) {
543 CHECK(entry->me_value != NULL);
544 }
545 }
546
547 if (splitted) {
548 CHECK(entry->me_value == NULL);
549 }
550 }
551
552 if (splitted) {
553 /* splitted table */
554 for (i=0; i < mp->ma_used; i++) {
555 CHECK(mp->ma_values[i] != NULL);
556 }
557 }
558 }
559 return 1;
560
561 #undef CHECK
562 }
563
564
565 static PyDictKeysObject*
new_keys_object(Py_ssize_t size)566 new_keys_object(Py_ssize_t size)
567 {
568 PyDictKeysObject *dk;
569 Py_ssize_t es, usable;
570
571 assert(size >= PyDict_MINSIZE);
572 assert(IS_POWER_OF_2(size));
573
574 usable = USABLE_FRACTION(size);
575 if (size <= 0xff) {
576 es = 1;
577 }
578 else if (size <= 0xffff) {
579 es = 2;
580 }
581 #if SIZEOF_VOID_P > 4
582 else if (size <= 0xffffffff) {
583 es = 4;
584 }
585 #endif
586 else {
587 es = sizeof(Py_ssize_t);
588 }
589
590 struct _Py_dict_state *state = get_dict_state();
591 #ifdef Py_DEBUG
592 // new_keys_object() must not be called after _PyDict_Fini()
593 assert(state->keys_numfree != -1);
594 #endif
595 if (size == PyDict_MINSIZE && state->keys_numfree > 0) {
596 dk = state->keys_free_list[--state->keys_numfree];
597 }
598 else
599 {
600 dk = PyObject_Malloc(sizeof(PyDictKeysObject)
601 + es * size
602 + sizeof(PyDictKeyEntry) * usable);
603 if (dk == NULL) {
604 PyErr_NoMemory();
605 return NULL;
606 }
607 }
608 #ifdef Py_REF_DEBUG
609 _Py_RefTotal++;
610 #endif
611 dk->dk_refcnt = 1;
612 dk->dk_size = size;
613 dk->dk_usable = usable;
614 dk->dk_lookup = lookdict_unicode_nodummy;
615 dk->dk_nentries = 0;
616 memset(&dk->dk_indices[0], 0xff, es * size);
617 memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable);
618 return dk;
619 }
620
621 static void
free_keys_object(PyDictKeysObject * keys)622 free_keys_object(PyDictKeysObject *keys)
623 {
624 PyDictKeyEntry *entries = DK_ENTRIES(keys);
625 Py_ssize_t i, n;
626 for (i = 0, n = keys->dk_nentries; i < n; i++) {
627 Py_XDECREF(entries[i].me_key);
628 Py_XDECREF(entries[i].me_value);
629 }
630 struct _Py_dict_state *state = get_dict_state();
631 #ifdef Py_DEBUG
632 // free_keys_object() must not be called after _PyDict_Fini()
633 assert(state->keys_numfree != -1);
634 #endif
635 if (keys->dk_size == PyDict_MINSIZE && state->keys_numfree < PyDict_MAXFREELIST) {
636 state->keys_free_list[state->keys_numfree++] = keys;
637 return;
638 }
639 PyObject_Free(keys);
640 }
641
642 #define new_values(size) PyMem_NEW(PyObject *, size)
643 #define free_values(values) PyMem_Free(values)
644
645 /* Consumes a reference to the keys object */
646 static PyObject *
new_dict(PyDictKeysObject * keys,PyObject ** values)647 new_dict(PyDictKeysObject *keys, PyObject **values)
648 {
649 PyDictObject *mp;
650 assert(keys != NULL);
651 struct _Py_dict_state *state = get_dict_state();
652 #ifdef Py_DEBUG
653 // new_dict() must not be called after _PyDict_Fini()
654 assert(state->numfree != -1);
655 #endif
656 if (state->numfree) {
657 mp = state->free_list[--state->numfree];
658 assert (mp != NULL);
659 assert (Py_IS_TYPE(mp, &PyDict_Type));
660 _Py_NewReference((PyObject *)mp);
661 }
662 else {
663 mp = PyObject_GC_New(PyDictObject, &PyDict_Type);
664 if (mp == NULL) {
665 dictkeys_decref(keys);
666 if (values != empty_values) {
667 free_values(values);
668 }
669 return NULL;
670 }
671 }
672 mp->ma_keys = keys;
673 mp->ma_values = values;
674 mp->ma_used = 0;
675 mp->ma_version_tag = DICT_NEXT_VERSION();
676 ASSERT_CONSISTENT(mp);
677 return (PyObject *)mp;
678 }
679
680 /* Consumes a reference to the keys object */
681 static PyObject *
new_dict_with_shared_keys(PyDictKeysObject * keys)682 new_dict_with_shared_keys(PyDictKeysObject *keys)
683 {
684 PyObject **values;
685 Py_ssize_t i, size;
686
687 size = USABLE_FRACTION(DK_SIZE(keys));
688 values = new_values(size);
689 if (values == NULL) {
690 dictkeys_decref(keys);
691 return PyErr_NoMemory();
692 }
693 for (i = 0; i < size; i++) {
694 values[i] = NULL;
695 }
696 return new_dict(keys, values);
697 }
698
699
700 static PyDictKeysObject *
clone_combined_dict_keys(PyDictObject * orig)701 clone_combined_dict_keys(PyDictObject *orig)
702 {
703 assert(PyDict_Check(orig));
704 assert(Py_TYPE(orig)->tp_iter == (getiterfunc)dict_iter);
705 assert(orig->ma_values == NULL);
706 assert(orig->ma_keys->dk_refcnt == 1);
707
708 Py_ssize_t keys_size = _PyDict_KeysSize(orig->ma_keys);
709 PyDictKeysObject *keys = PyObject_Malloc(keys_size);
710 if (keys == NULL) {
711 PyErr_NoMemory();
712 return NULL;
713 }
714
715 memcpy(keys, orig->ma_keys, keys_size);
716
717 /* After copying key/value pairs, we need to incref all
718 keys and values and they are about to be co-owned by a
719 new dict object. */
720 PyDictKeyEntry *ep0 = DK_ENTRIES(keys);
721 Py_ssize_t n = keys->dk_nentries;
722 for (Py_ssize_t i = 0; i < n; i++) {
723 PyDictKeyEntry *entry = &ep0[i];
724 PyObject *value = entry->me_value;
725 if (value != NULL) {
726 Py_INCREF(value);
727 Py_INCREF(entry->me_key);
728 }
729 }
730
731 /* Since we copied the keys table we now have an extra reference
732 in the system. Manually call increment _Py_RefTotal to signal that
733 we have it now; calling dictkeys_incref would be an error as
734 keys->dk_refcnt is already set to 1 (after memcpy). */
735 #ifdef Py_REF_DEBUG
736 _Py_RefTotal++;
737 #endif
738 return keys;
739 }
740
741 PyObject *
PyDict_New(void)742 PyDict_New(void)
743 {
744 dictkeys_incref(Py_EMPTY_KEYS);
745 return new_dict(Py_EMPTY_KEYS, empty_values);
746 }
747
748 /* Search index of hash table from offset of entry table */
749 static Py_ssize_t
lookdict_index(PyDictKeysObject * k,Py_hash_t hash,Py_ssize_t index)750 lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
751 {
752 size_t mask = DK_MASK(k);
753 size_t perturb = (size_t)hash;
754 size_t i = (size_t)hash & mask;
755
756 for (;;) {
757 Py_ssize_t ix = dictkeys_get_index(k, i);
758 if (ix == index) {
759 return i;
760 }
761 if (ix == DKIX_EMPTY) {
762 return DKIX_EMPTY;
763 }
764 perturb >>= PERTURB_SHIFT;
765 i = mask & (i*5 + perturb + 1);
766 }
767 Py_UNREACHABLE();
768 }
769
770 /*
771 The basic lookup function used by all operations.
772 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
773 Open addressing is preferred over chaining since the link overhead for
774 chaining would be substantial (100% with typical malloc overhead).
775
776 The initial probe index is computed as hash mod the table size. Subsequent
777 probe indices are computed as explained earlier.
778
779 All arithmetic on hash should ignore overflow.
780
781 The details in this version are due to Tim Peters, building on many past
782 contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
783 Christian Tismer.
784
785 lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a
786 comparison raises an exception.
787 lookdict_unicode() below is specialized to string keys, comparison of which can
788 never raise an exception; that function can never return DKIX_ERROR when key
789 is string. Otherwise, it falls back to lookdict().
790 lookdict_unicode_nodummy is further specialized for string keys that cannot be
791 the <dummy> value.
792 For both, when the key isn't found a DKIX_EMPTY is returned.
793 */
794 static Py_ssize_t _Py_HOT_FUNCTION
lookdict(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject ** value_addr)795 lookdict(PyDictObject *mp, PyObject *key,
796 Py_hash_t hash, PyObject **value_addr)
797 {
798 size_t i, mask, perturb;
799 PyDictKeysObject *dk;
800 PyDictKeyEntry *ep0;
801
802 top:
803 dk = mp->ma_keys;
804 ep0 = DK_ENTRIES(dk);
805 mask = DK_MASK(dk);
806 perturb = hash;
807 i = (size_t)hash & mask;
808
809 for (;;) {
810 Py_ssize_t ix = dictkeys_get_index(dk, i);
811 if (ix == DKIX_EMPTY) {
812 *value_addr = NULL;
813 return ix;
814 }
815 if (ix >= 0) {
816 PyDictKeyEntry *ep = &ep0[ix];
817 assert(ep->me_key != NULL);
818 if (ep->me_key == key) {
819 *value_addr = ep->me_value;
820 return ix;
821 }
822 if (ep->me_hash == hash) {
823 PyObject *startkey = ep->me_key;
824 Py_INCREF(startkey);
825 int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
826 Py_DECREF(startkey);
827 if (cmp < 0) {
828 *value_addr = NULL;
829 return DKIX_ERROR;
830 }
831 if (dk == mp->ma_keys && ep->me_key == startkey) {
832 if (cmp > 0) {
833 *value_addr = ep->me_value;
834 return ix;
835 }
836 }
837 else {
838 /* The dict was mutated, restart */
839 goto top;
840 }
841 }
842 }
843 perturb >>= PERTURB_SHIFT;
844 i = (i*5 + perturb + 1) & mask;
845 }
846 Py_UNREACHABLE();
847 }
848
849 /* Specialized version for string-only keys */
850 static Py_ssize_t _Py_HOT_FUNCTION
lookdict_unicode(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject ** value_addr)851 lookdict_unicode(PyDictObject *mp, PyObject *key,
852 Py_hash_t hash, PyObject **value_addr)
853 {
854 assert(mp->ma_values == NULL);
855 /* Make sure this function doesn't have to handle non-unicode keys,
856 including subclasses of str; e.g., one reason to subclass
857 unicodes is to override __eq__, and for speed we don't cater to
858 that here. */
859 if (!PyUnicode_CheckExact(key)) {
860 return lookdict(mp, key, hash, value_addr);
861 }
862
863 PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
864 size_t mask = DK_MASK(mp->ma_keys);
865 size_t perturb = (size_t)hash;
866 size_t i = (size_t)hash & mask;
867
868 for (;;) {
869 Py_ssize_t ix = dictkeys_get_index(mp->ma_keys, i);
870 if (ix == DKIX_EMPTY) {
871 *value_addr = NULL;
872 return DKIX_EMPTY;
873 }
874 if (ix >= 0) {
875 PyDictKeyEntry *ep = &ep0[ix];
876 assert(ep->me_key != NULL);
877 assert(PyUnicode_CheckExact(ep->me_key));
878 if (ep->me_key == key ||
879 (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
880 *value_addr = ep->me_value;
881 return ix;
882 }
883 }
884 perturb >>= PERTURB_SHIFT;
885 i = mask & (i*5 + perturb + 1);
886 }
887 Py_UNREACHABLE();
888 }
889
890 /* Faster version of lookdict_unicode when it is known that no <dummy> keys
891 * will be present. */
892 static Py_ssize_t _Py_HOT_FUNCTION
lookdict_unicode_nodummy(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject ** value_addr)893 lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
894 Py_hash_t hash, PyObject **value_addr)
895 {
896 assert(mp->ma_values == NULL);
897 /* Make sure this function doesn't have to handle non-unicode keys,
898 including subclasses of str; e.g., one reason to subclass
899 unicodes is to override __eq__, and for speed we don't cater to
900 that here. */
901 if (!PyUnicode_CheckExact(key)) {
902 return lookdict(mp, key, hash, value_addr);
903 }
904
905 PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
906 size_t mask = DK_MASK(mp->ma_keys);
907 size_t perturb = (size_t)hash;
908 size_t i = (size_t)hash & mask;
909
910 for (;;) {
911 Py_ssize_t ix = dictkeys_get_index(mp->ma_keys, i);
912 assert (ix != DKIX_DUMMY);
913 if (ix == DKIX_EMPTY) {
914 *value_addr = NULL;
915 return DKIX_EMPTY;
916 }
917 PyDictKeyEntry *ep = &ep0[ix];
918 assert(ep->me_key != NULL);
919 assert(PyUnicode_CheckExact(ep->me_key));
920 if (ep->me_key == key ||
921 (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
922 *value_addr = ep->me_value;
923 return ix;
924 }
925 perturb >>= PERTURB_SHIFT;
926 i = mask & (i*5 + perturb + 1);
927 }
928 Py_UNREACHABLE();
929 }
930
931 /* Version of lookdict for split tables.
932 * All split tables and only split tables use this lookup function.
933 * Split tables only contain unicode keys and no dummy keys,
934 * so algorithm is the same as lookdict_unicode_nodummy.
935 */
936 static Py_ssize_t _Py_HOT_FUNCTION
lookdict_split(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject ** value_addr)937 lookdict_split(PyDictObject *mp, PyObject *key,
938 Py_hash_t hash, PyObject **value_addr)
939 {
940 /* mp must split table */
941 assert(mp->ma_values != NULL);
942 if (!PyUnicode_CheckExact(key)) {
943 Py_ssize_t ix = lookdict(mp, key, hash, value_addr);
944 if (ix >= 0) {
945 *value_addr = mp->ma_values[ix];
946 }
947 return ix;
948 }
949
950 PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
951 size_t mask = DK_MASK(mp->ma_keys);
952 size_t perturb = (size_t)hash;
953 size_t i = (size_t)hash & mask;
954
955 for (;;) {
956 Py_ssize_t ix = dictkeys_get_index(mp->ma_keys, i);
957 assert (ix != DKIX_DUMMY);
958 if (ix == DKIX_EMPTY) {
959 *value_addr = NULL;
960 return DKIX_EMPTY;
961 }
962 PyDictKeyEntry *ep = &ep0[ix];
963 assert(ep->me_key != NULL);
964 assert(PyUnicode_CheckExact(ep->me_key));
965 if (ep->me_key == key ||
966 (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
967 *value_addr = mp->ma_values[ix];
968 return ix;
969 }
970 perturb >>= PERTURB_SHIFT;
971 i = mask & (i*5 + perturb + 1);
972 }
973 Py_UNREACHABLE();
974 }
975
976 int
_PyDict_HasOnlyStringKeys(PyObject * dict)977 _PyDict_HasOnlyStringKeys(PyObject *dict)
978 {
979 Py_ssize_t pos = 0;
980 PyObject *key, *value;
981 assert(PyDict_Check(dict));
982 /* Shortcut */
983 if (((PyDictObject *)dict)->ma_keys->dk_lookup != lookdict)
984 return 1;
985 while (PyDict_Next(dict, &pos, &key, &value))
986 if (!PyUnicode_Check(key))
987 return 0;
988 return 1;
989 }
990
991 #define MAINTAIN_TRACKING(mp, key, value) \
992 do { \
993 if (!_PyObject_GC_IS_TRACKED(mp)) { \
994 if (_PyObject_GC_MAY_BE_TRACKED(key) || \
995 _PyObject_GC_MAY_BE_TRACKED(value)) { \
996 _PyObject_GC_TRACK(mp); \
997 } \
998 } \
999 } while(0)
1000
1001 void
_PyDict_MaybeUntrack(PyObject * op)1002 _PyDict_MaybeUntrack(PyObject *op)
1003 {
1004 PyDictObject *mp;
1005 PyObject *value;
1006 Py_ssize_t i, numentries;
1007 PyDictKeyEntry *ep0;
1008
1009 if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
1010 return;
1011
1012 mp = (PyDictObject *) op;
1013 ep0 = DK_ENTRIES(mp->ma_keys);
1014 numentries = mp->ma_keys->dk_nentries;
1015 if (_PyDict_HasSplitTable(mp)) {
1016 for (i = 0; i < numentries; i++) {
1017 if ((value = mp->ma_values[i]) == NULL)
1018 continue;
1019 if (_PyObject_GC_MAY_BE_TRACKED(value)) {
1020 assert(!_PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key));
1021 return;
1022 }
1023 }
1024 }
1025 else {
1026 for (i = 0; i < numentries; i++) {
1027 if ((value = ep0[i].me_value) == NULL)
1028 continue;
1029 if (_PyObject_GC_MAY_BE_TRACKED(value) ||
1030 _PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key))
1031 return;
1032 }
1033 }
1034 _PyObject_GC_UNTRACK(op);
1035 }
1036
1037 /* Internal function to find slot for an item from its hash
1038 when it is known that the key is not present in the dict.
1039
1040 The dict must be combined. */
1041 static Py_ssize_t
find_empty_slot(PyDictKeysObject * keys,Py_hash_t hash)1042 find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash)
1043 {
1044 assert(keys != NULL);
1045
1046 const size_t mask = DK_MASK(keys);
1047 size_t i = hash & mask;
1048 Py_ssize_t ix = dictkeys_get_index(keys, i);
1049 for (size_t perturb = hash; ix >= 0;) {
1050 perturb >>= PERTURB_SHIFT;
1051 i = (i*5 + perturb + 1) & mask;
1052 ix = dictkeys_get_index(keys, i);
1053 }
1054 return i;
1055 }
1056
1057 static int
insertion_resize(PyDictObject * mp)1058 insertion_resize(PyDictObject *mp)
1059 {
1060 return dictresize(mp, calculate_keysize(GROWTH_RATE(mp)));
1061 }
1062
1063 /*
1064 Internal routine to insert a new item into the table.
1065 Used both by the internal resize routine and by the public insert routine.
1066 Returns -1 if an error occurred, or 0 on success.
1067 */
1068 static int
insertdict(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject * value)1069 insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
1070 {
1071 PyObject *old_value;
1072 PyDictKeyEntry *ep;
1073
1074 Py_INCREF(key);
1075 Py_INCREF(value);
1076 if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
1077 if (insertion_resize(mp) < 0)
1078 goto Fail;
1079 }
1080
1081 Py_ssize_t ix = mp->ma_keys->dk_lookup(mp, key, hash, &old_value);
1082 if (ix == DKIX_ERROR)
1083 goto Fail;
1084
1085 MAINTAIN_TRACKING(mp, key, value);
1086
1087 /* When insertion order is different from shared key, we can't share
1088 * the key anymore. Convert this instance to combine table.
1089 */
1090 if (_PyDict_HasSplitTable(mp) &&
1091 ((ix >= 0 && old_value == NULL && mp->ma_used != ix) ||
1092 (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
1093 if (insertion_resize(mp) < 0)
1094 goto Fail;
1095 ix = DKIX_EMPTY;
1096 }
1097
1098 if (ix == DKIX_EMPTY) {
1099 /* Insert into new slot. */
1100 assert(old_value == NULL);
1101 if (mp->ma_keys->dk_usable <= 0) {
1102 /* Need to resize. */
1103 if (insertion_resize(mp) < 0)
1104 goto Fail;
1105 }
1106 if (!PyUnicode_CheckExact(key) && mp->ma_keys->dk_lookup != lookdict) {
1107 mp->ma_keys->dk_lookup = lookdict;
1108 }
1109 Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash);
1110 ep = &DK_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries];
1111 dictkeys_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
1112 ep->me_key = key;
1113 ep->me_hash = hash;
1114 if (mp->ma_values) {
1115 assert (mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
1116 mp->ma_values[mp->ma_keys->dk_nentries] = value;
1117 }
1118 else {
1119 ep->me_value = value;
1120 }
1121 mp->ma_used++;
1122 mp->ma_version_tag = DICT_NEXT_VERSION();
1123 mp->ma_keys->dk_usable--;
1124 mp->ma_keys->dk_nentries++;
1125 assert(mp->ma_keys->dk_usable >= 0);
1126 ASSERT_CONSISTENT(mp);
1127 return 0;
1128 }
1129
1130 if (old_value != value) {
1131 if (_PyDict_HasSplitTable(mp)) {
1132 mp->ma_values[ix] = value;
1133 if (old_value == NULL) {
1134 /* pending state */
1135 assert(ix == mp->ma_used);
1136 mp->ma_used++;
1137 }
1138 }
1139 else {
1140 assert(old_value != NULL);
1141 DK_ENTRIES(mp->ma_keys)[ix].me_value = value;
1142 }
1143 mp->ma_version_tag = DICT_NEXT_VERSION();
1144 }
1145 Py_XDECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
1146 ASSERT_CONSISTENT(mp);
1147 Py_DECREF(key);
1148 return 0;
1149
1150 Fail:
1151 Py_DECREF(value);
1152 Py_DECREF(key);
1153 return -1;
1154 }
1155
1156 // Same to insertdict but specialized for ma_keys = Py_EMPTY_KEYS.
1157 static int
insert_to_emptydict(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject * value)1158 insert_to_emptydict(PyDictObject *mp, PyObject *key, Py_hash_t hash,
1159 PyObject *value)
1160 {
1161 assert(mp->ma_keys == Py_EMPTY_KEYS);
1162
1163 PyDictKeysObject *newkeys = new_keys_object(PyDict_MINSIZE);
1164 if (newkeys == NULL) {
1165 return -1;
1166 }
1167 if (!PyUnicode_CheckExact(key)) {
1168 newkeys->dk_lookup = lookdict;
1169 }
1170 dictkeys_decref(Py_EMPTY_KEYS);
1171 mp->ma_keys = newkeys;
1172 mp->ma_values = NULL;
1173
1174 Py_INCREF(key);
1175 Py_INCREF(value);
1176 MAINTAIN_TRACKING(mp, key, value);
1177
1178 size_t hashpos = (size_t)hash & (PyDict_MINSIZE-1);
1179 PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys);
1180 dictkeys_set_index(mp->ma_keys, hashpos, 0);
1181 ep->me_key = key;
1182 ep->me_hash = hash;
1183 ep->me_value = value;
1184 mp->ma_used++;
1185 mp->ma_version_tag = DICT_NEXT_VERSION();
1186 mp->ma_keys->dk_usable--;
1187 mp->ma_keys->dk_nentries++;
1188 return 0;
1189 }
1190
1191 /*
1192 Internal routine used by dictresize() to build a hashtable of entries.
1193 */
1194 static void
build_indices(PyDictKeysObject * keys,PyDictKeyEntry * ep,Py_ssize_t n)1195 build_indices(PyDictKeysObject *keys, PyDictKeyEntry *ep, Py_ssize_t n)
1196 {
1197 size_t mask = (size_t)DK_SIZE(keys) - 1;
1198 for (Py_ssize_t ix = 0; ix != n; ix++, ep++) {
1199 Py_hash_t hash = ep->me_hash;
1200 size_t i = hash & mask;
1201 for (size_t perturb = hash; dictkeys_get_index(keys, i) != DKIX_EMPTY;) {
1202 perturb >>= PERTURB_SHIFT;
1203 i = mask & (i*5 + perturb + 1);
1204 }
1205 dictkeys_set_index(keys, i, ix);
1206 }
1207 }
1208
1209 /*
1210 Restructure the table by allocating a new table and reinserting all
1211 items again. When entries have been deleted, the new table may
1212 actually be smaller than the old one.
1213 If a table is split (its keys and hashes are shared, its values are not),
1214 then the values are temporarily copied into the table, it is resized as
1215 a combined table, then the me_value slots in the old table are NULLed out.
1216 After resizing a table is always combined,
1217 but can be resplit by make_keys_shared().
1218 */
1219 static int
dictresize(PyDictObject * mp,Py_ssize_t newsize)1220 dictresize(PyDictObject *mp, Py_ssize_t newsize)
1221 {
1222 Py_ssize_t numentries;
1223 PyDictKeysObject *oldkeys;
1224 PyObject **oldvalues;
1225 PyDictKeyEntry *oldentries, *newentries;
1226
1227 if (newsize <= 0) {
1228 PyErr_NoMemory();
1229 return -1;
1230 }
1231 assert(IS_POWER_OF_2(newsize));
1232 assert(newsize >= PyDict_MINSIZE);
1233
1234 oldkeys = mp->ma_keys;
1235
1236 /* NOTE: Current odict checks mp->ma_keys to detect resize happen.
1237 * So we can't reuse oldkeys even if oldkeys->dk_size == newsize.
1238 * TODO: Try reusing oldkeys when reimplement odict.
1239 */
1240
1241 /* Allocate a new table. */
1242 mp->ma_keys = new_keys_object(newsize);
1243 if (mp->ma_keys == NULL) {
1244 mp->ma_keys = oldkeys;
1245 return -1;
1246 }
1247 // New table must be large enough.
1248 assert(mp->ma_keys->dk_usable >= mp->ma_used);
1249 if (oldkeys->dk_lookup == lookdict)
1250 mp->ma_keys->dk_lookup = lookdict;
1251
1252 numentries = mp->ma_used;
1253 oldentries = DK_ENTRIES(oldkeys);
1254 newentries = DK_ENTRIES(mp->ma_keys);
1255 oldvalues = mp->ma_values;
1256 if (oldvalues != NULL) {
1257 /* Convert split table into new combined table.
1258 * We must incref keys; we can transfer values.
1259 * Note that values of split table is always dense.
1260 */
1261 for (Py_ssize_t i = 0; i < numentries; i++) {
1262 assert(oldvalues[i] != NULL);
1263 PyDictKeyEntry *ep = &oldentries[i];
1264 PyObject *key = ep->me_key;
1265 Py_INCREF(key);
1266 newentries[i].me_key = key;
1267 newentries[i].me_hash = ep->me_hash;
1268 newentries[i].me_value = oldvalues[i];
1269 }
1270
1271 dictkeys_decref(oldkeys);
1272 mp->ma_values = NULL;
1273 if (oldvalues != empty_values) {
1274 free_values(oldvalues);
1275 }
1276 }
1277 else { // combined table.
1278 if (oldkeys->dk_nentries == numentries) {
1279 memcpy(newentries, oldentries, numentries * sizeof(PyDictKeyEntry));
1280 }
1281 else {
1282 PyDictKeyEntry *ep = oldentries;
1283 for (Py_ssize_t i = 0; i < numentries; i++) {
1284 while (ep->me_value == NULL)
1285 ep++;
1286 newentries[i] = *ep++;
1287 }
1288 }
1289
1290 assert(oldkeys->dk_lookup != lookdict_split);
1291 assert(oldkeys->dk_refcnt == 1);
1292 #ifdef Py_REF_DEBUG
1293 _Py_RefTotal--;
1294 #endif
1295 struct _Py_dict_state *state = get_dict_state();
1296 #ifdef Py_DEBUG
1297 // dictresize() must not be called after _PyDict_Fini()
1298 assert(state->keys_numfree != -1);
1299 #endif
1300 if (oldkeys->dk_size == PyDict_MINSIZE &&
1301 state->keys_numfree < PyDict_MAXFREELIST)
1302 {
1303 state->keys_free_list[state->keys_numfree++] = oldkeys;
1304 }
1305 else {
1306 PyObject_Free(oldkeys);
1307 }
1308 }
1309
1310 build_indices(mp->ma_keys, newentries, numentries);
1311 mp->ma_keys->dk_usable -= numentries;
1312 mp->ma_keys->dk_nentries = numentries;
1313 return 0;
1314 }
1315
1316 /* Returns NULL if unable to split table.
1317 * A NULL return does not necessarily indicate an error */
1318 static PyDictKeysObject *
make_keys_shared(PyObject * op)1319 make_keys_shared(PyObject *op)
1320 {
1321 Py_ssize_t i;
1322 Py_ssize_t size;
1323 PyDictObject *mp = (PyDictObject *)op;
1324
1325 if (!PyDict_CheckExact(op))
1326 return NULL;
1327 if (!_PyDict_HasSplitTable(mp)) {
1328 PyDictKeyEntry *ep0;
1329 PyObject **values;
1330 assert(mp->ma_keys->dk_refcnt == 1);
1331 if (mp->ma_keys->dk_lookup == lookdict) {
1332 return NULL;
1333 }
1334 else if (mp->ma_keys->dk_lookup == lookdict_unicode) {
1335 /* Remove dummy keys */
1336 if (dictresize(mp, DK_SIZE(mp->ma_keys)))
1337 return NULL;
1338 }
1339 assert(mp->ma_keys->dk_lookup == lookdict_unicode_nodummy);
1340 /* Copy values into a new array */
1341 ep0 = DK_ENTRIES(mp->ma_keys);
1342 size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
1343 values = new_values(size);
1344 if (values == NULL) {
1345 PyErr_SetString(PyExc_MemoryError,
1346 "Not enough memory to allocate new values array");
1347 return NULL;
1348 }
1349 for (i = 0; i < size; i++) {
1350 values[i] = ep0[i].me_value;
1351 ep0[i].me_value = NULL;
1352 }
1353 mp->ma_keys->dk_lookup = lookdict_split;
1354 mp->ma_values = values;
1355 }
1356 dictkeys_incref(mp->ma_keys);
1357 return mp->ma_keys;
1358 }
1359
1360 PyObject *
_PyDict_NewPresized(Py_ssize_t minused)1361 _PyDict_NewPresized(Py_ssize_t minused)
1362 {
1363 const Py_ssize_t max_presize = 128 * 1024;
1364 Py_ssize_t newsize;
1365 PyDictKeysObject *new_keys;
1366
1367 if (minused <= USABLE_FRACTION(PyDict_MINSIZE)) {
1368 return PyDict_New();
1369 }
1370 /* There are no strict guarantee that returned dict can contain minused
1371 * items without resize. So we create medium size dict instead of very
1372 * large dict or MemoryError.
1373 */
1374 if (minused > USABLE_FRACTION(max_presize)) {
1375 newsize = max_presize;
1376 }
1377 else {
1378 newsize = estimate_keysize(minused);
1379 }
1380
1381 new_keys = new_keys_object(newsize);
1382 if (new_keys == NULL)
1383 return NULL;
1384 return new_dict(new_keys, NULL);
1385 }
1386
1387 /* Note that, for historical reasons, PyDict_GetItem() suppresses all errors
1388 * that may occur (originally dicts supported only string keys, and exceptions
1389 * weren't possible). So, while the original intent was that a NULL return
1390 * meant the key wasn't present, in reality it can mean that, or that an error
1391 * (suppressed) occurred while computing the key's hash, or that some error
1392 * (suppressed) occurred when comparing keys in the dict's internal probe
1393 * sequence. A nasty example of the latter is when a Python-coded comparison
1394 * function hits a stack-depth error, which can cause this to return NULL
1395 * even if the key is present.
1396 */
1397 PyObject *
PyDict_GetItem(PyObject * op,PyObject * key)1398 PyDict_GetItem(PyObject *op, PyObject *key)
1399 {
1400 if (!PyDict_Check(op)) {
1401 return NULL;
1402 }
1403 PyDictObject *mp = (PyDictObject *)op;
1404
1405 Py_hash_t hash;
1406 if (!PyUnicode_CheckExact(key) ||
1407 (hash = ((PyASCIIObject *) key)->hash) == -1)
1408 {
1409 hash = PyObject_Hash(key);
1410 if (hash == -1) {
1411 PyErr_Clear();
1412 return NULL;
1413 }
1414 }
1415
1416 PyThreadState *tstate = _PyThreadState_GET();
1417 #ifdef Py_DEBUG
1418 // bpo-40839: Before Python 3.10, it was possible to call PyDict_GetItem()
1419 // with the GIL released.
1420 _Py_EnsureTstateNotNULL(tstate);
1421 #endif
1422
1423 /* Preserve the existing exception */
1424 PyObject *exc_type, *exc_value, *exc_tb;
1425 PyObject *value;
1426 Py_ssize_t ix;
1427
1428 _PyErr_Fetch(tstate, &exc_type, &exc_value, &exc_tb);
1429 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
1430
1431 /* Ignore any exception raised by the lookup */
1432 _PyErr_Restore(tstate, exc_type, exc_value, exc_tb);
1433
1434 if (ix < 0) {
1435 return NULL;
1436 }
1437 return value;
1438 }
1439
1440 Py_ssize_t
_PyDict_GetItemHint(PyDictObject * mp,PyObject * key,Py_ssize_t hint,PyObject ** value)1441 _PyDict_GetItemHint(PyDictObject *mp, PyObject *key,
1442 Py_ssize_t hint, PyObject **value)
1443 {
1444 assert(*value == NULL);
1445 assert(PyDict_CheckExact((PyObject*)mp));
1446 assert(PyUnicode_CheckExact(key));
1447
1448 if (hint >= 0 && hint < mp->ma_keys->dk_nentries) {
1449 PyObject *res = NULL;
1450
1451 PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys) + (size_t)hint;
1452 if (ep->me_key == key) {
1453 if (mp->ma_keys->dk_lookup == lookdict_split) {
1454 assert(mp->ma_values != NULL);
1455 res = mp->ma_values[(size_t)hint];
1456 }
1457 else {
1458 res = ep->me_value;
1459 }
1460 if (res != NULL) {
1461 *value = res;
1462 return hint;
1463 }
1464 }
1465 }
1466
1467 Py_hash_t hash = ((PyASCIIObject *) key)->hash;
1468 if (hash == -1) {
1469 hash = PyObject_Hash(key);
1470 if (hash == -1) {
1471 return -1;
1472 }
1473 }
1474
1475 return (mp->ma_keys->dk_lookup)(mp, key, hash, value);
1476 }
1477
1478 /* Same as PyDict_GetItemWithError() but with hash supplied by caller.
1479 This returns NULL *with* an exception set if an exception occurred.
1480 It returns NULL *without* an exception set if the key wasn't present.
1481 */
1482 PyObject *
_PyDict_GetItem_KnownHash(PyObject * op,PyObject * key,Py_hash_t hash)1483 _PyDict_GetItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
1484 {
1485 Py_ssize_t ix;
1486 PyDictObject *mp = (PyDictObject *)op;
1487 PyObject *value;
1488
1489 if (!PyDict_Check(op)) {
1490 PyErr_BadInternalCall();
1491 return NULL;
1492 }
1493
1494 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
1495 if (ix < 0) {
1496 return NULL;
1497 }
1498 return value;
1499 }
1500
1501 /* Variant of PyDict_GetItem() that doesn't suppress exceptions.
1502 This returns NULL *with* an exception set if an exception occurred.
1503 It returns NULL *without* an exception set if the key wasn't present.
1504 */
1505 PyObject *
PyDict_GetItemWithError(PyObject * op,PyObject * key)1506 PyDict_GetItemWithError(PyObject *op, PyObject *key)
1507 {
1508 Py_ssize_t ix;
1509 Py_hash_t hash;
1510 PyDictObject*mp = (PyDictObject *)op;
1511 PyObject *value;
1512
1513 if (!PyDict_Check(op)) {
1514 PyErr_BadInternalCall();
1515 return NULL;
1516 }
1517 if (!PyUnicode_CheckExact(key) ||
1518 (hash = ((PyASCIIObject *) key)->hash) == -1)
1519 {
1520 hash = PyObject_Hash(key);
1521 if (hash == -1) {
1522 return NULL;
1523 }
1524 }
1525
1526 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
1527 if (ix < 0)
1528 return NULL;
1529 return value;
1530 }
1531
1532 PyObject *
_PyDict_GetItemIdWithError(PyObject * dp,struct _Py_Identifier * key)1533 _PyDict_GetItemIdWithError(PyObject *dp, struct _Py_Identifier *key)
1534 {
1535 PyObject *kv;
1536 kv = _PyUnicode_FromId(key); /* borrowed */
1537 if (kv == NULL)
1538 return NULL;
1539 Py_hash_t hash = ((PyASCIIObject *) kv)->hash;
1540 assert (hash != -1); /* interned strings have their hash value initialised */
1541 return _PyDict_GetItem_KnownHash(dp, kv, hash);
1542 }
1543
1544 PyObject *
_PyDict_GetItemStringWithError(PyObject * v,const char * key)1545 _PyDict_GetItemStringWithError(PyObject *v, const char *key)
1546 {
1547 PyObject *kv, *rv;
1548 kv = PyUnicode_FromString(key);
1549 if (kv == NULL) {
1550 return NULL;
1551 }
1552 rv = PyDict_GetItemWithError(v, kv);
1553 Py_DECREF(kv);
1554 return rv;
1555 }
1556
1557 /* Fast version of global value lookup (LOAD_GLOBAL).
1558 * Lookup in globals, then builtins.
1559 *
1560 * Raise an exception and return NULL if an error occurred (ex: computing the
1561 * key hash failed, key comparison failed, ...). Return NULL if the key doesn't
1562 * exist. Return the value if the key exists.
1563 */
1564 PyObject *
_PyDict_LoadGlobal(PyDictObject * globals,PyDictObject * builtins,PyObject * key)1565 _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
1566 {
1567 Py_ssize_t ix;
1568 Py_hash_t hash;
1569 PyObject *value;
1570
1571 if (!PyUnicode_CheckExact(key) ||
1572 (hash = ((PyASCIIObject *) key)->hash) == -1)
1573 {
1574 hash = PyObject_Hash(key);
1575 if (hash == -1)
1576 return NULL;
1577 }
1578
1579 /* namespace 1: globals */
1580 ix = globals->ma_keys->dk_lookup(globals, key, hash, &value);
1581 if (ix == DKIX_ERROR)
1582 return NULL;
1583 if (ix != DKIX_EMPTY && value != NULL)
1584 return value;
1585
1586 /* namespace 2: builtins */
1587 ix = builtins->ma_keys->dk_lookup(builtins, key, hash, &value);
1588 if (ix < 0)
1589 return NULL;
1590 return value;
1591 }
1592
1593 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
1594 * dictionary if it's merely replacing the value for an existing key.
1595 * This means that it's safe to loop over a dictionary with PyDict_Next()
1596 * and occasionally replace a value -- but you can't insert new keys or
1597 * remove them.
1598 */
1599 int
PyDict_SetItem(PyObject * op,PyObject * key,PyObject * value)1600 PyDict_SetItem(PyObject *op, PyObject *key, PyObject *value)
1601 {
1602 PyDictObject *mp;
1603 Py_hash_t hash;
1604 if (!PyDict_Check(op)) {
1605 PyErr_BadInternalCall();
1606 return -1;
1607 }
1608 assert(key);
1609 assert(value);
1610 mp = (PyDictObject *)op;
1611 if (!PyUnicode_CheckExact(key) ||
1612 (hash = ((PyASCIIObject *) key)->hash) == -1)
1613 {
1614 hash = PyObject_Hash(key);
1615 if (hash == -1)
1616 return -1;
1617 }
1618
1619 if (mp->ma_keys == Py_EMPTY_KEYS) {
1620 return insert_to_emptydict(mp, key, hash, value);
1621 }
1622 /* insertdict() handles any resizing that might be necessary */
1623 return insertdict(mp, key, hash, value);
1624 }
1625
1626 int
_PyDict_SetItem_KnownHash(PyObject * op,PyObject * key,PyObject * value,Py_hash_t hash)1627 _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value,
1628 Py_hash_t hash)
1629 {
1630 PyDictObject *mp;
1631
1632 if (!PyDict_Check(op)) {
1633 PyErr_BadInternalCall();
1634 return -1;
1635 }
1636 assert(key);
1637 assert(value);
1638 assert(hash != -1);
1639 mp = (PyDictObject *)op;
1640
1641 if (mp->ma_keys == Py_EMPTY_KEYS) {
1642 return insert_to_emptydict(mp, key, hash, value);
1643 }
1644 /* insertdict() handles any resizing that might be necessary */
1645 return insertdict(mp, key, hash, value);
1646 }
1647
1648 static int
delitem_common(PyDictObject * mp,Py_hash_t hash,Py_ssize_t ix,PyObject * old_value)1649 delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
1650 PyObject *old_value)
1651 {
1652 PyObject *old_key;
1653 PyDictKeyEntry *ep;
1654
1655 Py_ssize_t hashpos = lookdict_index(mp->ma_keys, hash, ix);
1656 assert(hashpos >= 0);
1657
1658 mp->ma_used--;
1659 mp->ma_version_tag = DICT_NEXT_VERSION();
1660 ep = &DK_ENTRIES(mp->ma_keys)[ix];
1661 dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
1662 ENSURE_ALLOWS_DELETIONS(mp);
1663 old_key = ep->me_key;
1664 ep->me_key = NULL;
1665 ep->me_value = NULL;
1666 Py_DECREF(old_key);
1667 Py_DECREF(old_value);
1668
1669 ASSERT_CONSISTENT(mp);
1670 return 0;
1671 }
1672
1673 int
PyDict_DelItem(PyObject * op,PyObject * key)1674 PyDict_DelItem(PyObject *op, PyObject *key)
1675 {
1676 Py_hash_t hash;
1677 assert(key);
1678 if (!PyUnicode_CheckExact(key) ||
1679 (hash = ((PyASCIIObject *) key)->hash) == -1) {
1680 hash = PyObject_Hash(key);
1681 if (hash == -1)
1682 return -1;
1683 }
1684
1685 return _PyDict_DelItem_KnownHash(op, key, hash);
1686 }
1687
1688 int
_PyDict_DelItem_KnownHash(PyObject * op,PyObject * key,Py_hash_t hash)1689 _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
1690 {
1691 Py_ssize_t ix;
1692 PyDictObject *mp;
1693 PyObject *old_value;
1694
1695 if (!PyDict_Check(op)) {
1696 PyErr_BadInternalCall();
1697 return -1;
1698 }
1699 assert(key);
1700 assert(hash != -1);
1701 mp = (PyDictObject *)op;
1702 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value);
1703 if (ix == DKIX_ERROR)
1704 return -1;
1705 if (ix == DKIX_EMPTY || old_value == NULL) {
1706 _PyErr_SetKeyError(key);
1707 return -1;
1708 }
1709
1710 // Split table doesn't allow deletion. Combine it.
1711 if (_PyDict_HasSplitTable(mp)) {
1712 if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
1713 return -1;
1714 }
1715 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value);
1716 assert(ix >= 0);
1717 }
1718
1719 return delitem_common(mp, hash, ix, old_value);
1720 }
1721
1722 /* This function promises that the predicate -> deletion sequence is atomic
1723 * (i.e. protected by the GIL), assuming the predicate itself doesn't
1724 * release the GIL.
1725 */
1726 int
_PyDict_DelItemIf(PyObject * op,PyObject * key,int (* predicate)(PyObject * value))1727 _PyDict_DelItemIf(PyObject *op, PyObject *key,
1728 int (*predicate)(PyObject *value))
1729 {
1730 Py_ssize_t hashpos, ix;
1731 PyDictObject *mp;
1732 Py_hash_t hash;
1733 PyObject *old_value;
1734 int res;
1735
1736 if (!PyDict_Check(op)) {
1737 PyErr_BadInternalCall();
1738 return -1;
1739 }
1740 assert(key);
1741 hash = PyObject_Hash(key);
1742 if (hash == -1)
1743 return -1;
1744 mp = (PyDictObject *)op;
1745 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value);
1746 if (ix == DKIX_ERROR)
1747 return -1;
1748 if (ix == DKIX_EMPTY || old_value == NULL) {
1749 _PyErr_SetKeyError(key);
1750 return -1;
1751 }
1752
1753 // Split table doesn't allow deletion. Combine it.
1754 if (_PyDict_HasSplitTable(mp)) {
1755 if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
1756 return -1;
1757 }
1758 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value);
1759 assert(ix >= 0);
1760 }
1761
1762 res = predicate(old_value);
1763 if (res == -1)
1764 return -1;
1765
1766 hashpos = lookdict_index(mp->ma_keys, hash, ix);
1767 assert(hashpos >= 0);
1768
1769 if (res > 0)
1770 return delitem_common(mp, hashpos, ix, old_value);
1771 else
1772 return 0;
1773 }
1774
1775
1776 void
PyDict_Clear(PyObject * op)1777 PyDict_Clear(PyObject *op)
1778 {
1779 PyDictObject *mp;
1780 PyDictKeysObject *oldkeys;
1781 PyObject **oldvalues;
1782 Py_ssize_t i, n;
1783
1784 if (!PyDict_Check(op))
1785 return;
1786 mp = ((PyDictObject *)op);
1787 oldkeys = mp->ma_keys;
1788 oldvalues = mp->ma_values;
1789 if (oldvalues == empty_values)
1790 return;
1791 /* Empty the dict... */
1792 dictkeys_incref(Py_EMPTY_KEYS);
1793 mp->ma_keys = Py_EMPTY_KEYS;
1794 mp->ma_values = empty_values;
1795 mp->ma_used = 0;
1796 mp->ma_version_tag = DICT_NEXT_VERSION();
1797 /* ...then clear the keys and values */
1798 if (oldvalues != NULL) {
1799 n = oldkeys->dk_nentries;
1800 for (i = 0; i < n; i++)
1801 Py_CLEAR(oldvalues[i]);
1802 free_values(oldvalues);
1803 dictkeys_decref(oldkeys);
1804 }
1805 else {
1806 assert(oldkeys->dk_refcnt == 1);
1807 dictkeys_decref(oldkeys);
1808 }
1809 ASSERT_CONSISTENT(mp);
1810 }
1811
1812 /* Internal version of PyDict_Next that returns a hash value in addition
1813 * to the key and value.
1814 * Return 1 on success, return 0 when the reached the end of the dictionary
1815 * (or if op is not a dictionary)
1816 */
1817 int
_PyDict_Next(PyObject * op,Py_ssize_t * ppos,PyObject ** pkey,PyObject ** pvalue,Py_hash_t * phash)1818 _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
1819 PyObject **pvalue, Py_hash_t *phash)
1820 {
1821 Py_ssize_t i;
1822 PyDictObject *mp;
1823 PyDictKeyEntry *entry_ptr;
1824 PyObject *value;
1825
1826 if (!PyDict_Check(op))
1827 return 0;
1828 mp = (PyDictObject *)op;
1829 i = *ppos;
1830 if (mp->ma_values) {
1831 if (i < 0 || i >= mp->ma_used)
1832 return 0;
1833 /* values of split table is always dense */
1834 entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
1835 value = mp->ma_values[i];
1836 assert(value != NULL);
1837 }
1838 else {
1839 Py_ssize_t n = mp->ma_keys->dk_nentries;
1840 if (i < 0 || i >= n)
1841 return 0;
1842 entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
1843 while (i < n && entry_ptr->me_value == NULL) {
1844 entry_ptr++;
1845 i++;
1846 }
1847 if (i >= n)
1848 return 0;
1849 value = entry_ptr->me_value;
1850 }
1851 *ppos = i+1;
1852 if (pkey)
1853 *pkey = entry_ptr->me_key;
1854 if (phash)
1855 *phash = entry_ptr->me_hash;
1856 if (pvalue)
1857 *pvalue = value;
1858 return 1;
1859 }
1860
1861 /*
1862 * Iterate over a dict. Use like so:
1863 *
1864 * Py_ssize_t i;
1865 * PyObject *key, *value;
1866 * i = 0; # important! i should not otherwise be changed by you
1867 * while (PyDict_Next(yourdict, &i, &key, &value)) {
1868 * Refer to borrowed references in key and value.
1869 * }
1870 *
1871 * Return 1 on success, return 0 when the reached the end of the dictionary
1872 * (or if op is not a dictionary)
1873 *
1874 * CAUTION: In general, it isn't safe to use PyDict_Next in a loop that
1875 * mutates the dict. One exception: it is safe if the loop merely changes
1876 * the values associated with the keys (but doesn't insert new keys or
1877 * delete keys), via PyDict_SetItem().
1878 */
1879 int
PyDict_Next(PyObject * op,Py_ssize_t * ppos,PyObject ** pkey,PyObject ** pvalue)1880 PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
1881 {
1882 return _PyDict_Next(op, ppos, pkey, pvalue, NULL);
1883 }
1884
1885 /* Internal version of dict.pop(). */
1886 PyObject *
_PyDict_Pop_KnownHash(PyObject * dict,PyObject * key,Py_hash_t hash,PyObject * deflt)1887 _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt)
1888 {
1889 Py_ssize_t ix, hashpos;
1890 PyObject *old_value, *old_key;
1891 PyDictKeyEntry *ep;
1892 PyDictObject *mp;
1893
1894 assert(PyDict_Check(dict));
1895 mp = (PyDictObject *)dict;
1896
1897 if (mp->ma_used == 0) {
1898 if (deflt) {
1899 Py_INCREF(deflt);
1900 return deflt;
1901 }
1902 _PyErr_SetKeyError(key);
1903 return NULL;
1904 }
1905 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value);
1906 if (ix == DKIX_ERROR)
1907 return NULL;
1908 if (ix == DKIX_EMPTY || old_value == NULL) {
1909 if (deflt) {
1910 Py_INCREF(deflt);
1911 return deflt;
1912 }
1913 _PyErr_SetKeyError(key);
1914 return NULL;
1915 }
1916
1917 // Split table doesn't allow deletion. Combine it.
1918 if (_PyDict_HasSplitTable(mp)) {
1919 if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
1920 return NULL;
1921 }
1922 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value);
1923 assert(ix >= 0);
1924 }
1925
1926 hashpos = lookdict_index(mp->ma_keys, hash, ix);
1927 assert(hashpos >= 0);
1928 assert(old_value != NULL);
1929 mp->ma_used--;
1930 mp->ma_version_tag = DICT_NEXT_VERSION();
1931 dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
1932 ep = &DK_ENTRIES(mp->ma_keys)[ix];
1933 ENSURE_ALLOWS_DELETIONS(mp);
1934 old_key = ep->me_key;
1935 ep->me_key = NULL;
1936 ep->me_value = NULL;
1937 Py_DECREF(old_key);
1938
1939 ASSERT_CONSISTENT(mp);
1940 return old_value;
1941 }
1942
1943 PyObject *
_PyDict_Pop(PyObject * dict,PyObject * key,PyObject * deflt)1944 _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt)
1945 {
1946 Py_hash_t hash;
1947
1948 if (((PyDictObject *)dict)->ma_used == 0) {
1949 if (deflt) {
1950 Py_INCREF(deflt);
1951 return deflt;
1952 }
1953 _PyErr_SetKeyError(key);
1954 return NULL;
1955 }
1956 if (!PyUnicode_CheckExact(key) ||
1957 (hash = ((PyASCIIObject *) key)->hash) == -1) {
1958 hash = PyObject_Hash(key);
1959 if (hash == -1)
1960 return NULL;
1961 }
1962 return _PyDict_Pop_KnownHash(dict, key, hash, deflt);
1963 }
1964
1965 /* Internal version of dict.from_keys(). It is subclass-friendly. */
1966 PyObject *
_PyDict_FromKeys(PyObject * cls,PyObject * iterable,PyObject * value)1967 _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
1968 {
1969 PyObject *it; /* iter(iterable) */
1970 PyObject *key;
1971 PyObject *d;
1972 int status;
1973
1974 d = _PyObject_CallNoArg(cls);
1975 if (d == NULL)
1976 return NULL;
1977
1978 if (PyDict_CheckExact(d) && ((PyDictObject *)d)->ma_used == 0) {
1979 if (PyDict_CheckExact(iterable)) {
1980 PyDictObject *mp = (PyDictObject *)d;
1981 PyObject *oldvalue;
1982 Py_ssize_t pos = 0;
1983 PyObject *key;
1984 Py_hash_t hash;
1985
1986 if (dictresize(mp, estimate_keysize(PyDict_GET_SIZE(iterable)))) {
1987 Py_DECREF(d);
1988 return NULL;
1989 }
1990
1991 while (_PyDict_Next(iterable, &pos, &key, &oldvalue, &hash)) {
1992 if (insertdict(mp, key, hash, value)) {
1993 Py_DECREF(d);
1994 return NULL;
1995 }
1996 }
1997 return d;
1998 }
1999 if (PyAnySet_CheckExact(iterable)) {
2000 PyDictObject *mp = (PyDictObject *)d;
2001 Py_ssize_t pos = 0;
2002 PyObject *key;
2003 Py_hash_t hash;
2004
2005 if (dictresize(mp, estimate_keysize(PySet_GET_SIZE(iterable)))) {
2006 Py_DECREF(d);
2007 return NULL;
2008 }
2009
2010 while (_PySet_NextEntry(iterable, &pos, &key, &hash)) {
2011 if (insertdict(mp, key, hash, value)) {
2012 Py_DECREF(d);
2013 return NULL;
2014 }
2015 }
2016 return d;
2017 }
2018 }
2019
2020 it = PyObject_GetIter(iterable);
2021 if (it == NULL){
2022 Py_DECREF(d);
2023 return NULL;
2024 }
2025
2026 if (PyDict_CheckExact(d)) {
2027 while ((key = PyIter_Next(it)) != NULL) {
2028 status = PyDict_SetItem(d, key, value);
2029 Py_DECREF(key);
2030 if (status < 0)
2031 goto Fail;
2032 }
2033 } else {
2034 while ((key = PyIter_Next(it)) != NULL) {
2035 status = PyObject_SetItem(d, key, value);
2036 Py_DECREF(key);
2037 if (status < 0)
2038 goto Fail;
2039 }
2040 }
2041
2042 if (PyErr_Occurred())
2043 goto Fail;
2044 Py_DECREF(it);
2045 return d;
2046
2047 Fail:
2048 Py_DECREF(it);
2049 Py_DECREF(d);
2050 return NULL;
2051 }
2052
2053 /* Methods */
2054
2055 static void
dict_dealloc(PyDictObject * mp)2056 dict_dealloc(PyDictObject *mp)
2057 {
2058 PyObject **values = mp->ma_values;
2059 PyDictKeysObject *keys = mp->ma_keys;
2060 Py_ssize_t i, n;
2061
2062 /* bpo-31095: UnTrack is needed before calling any callbacks */
2063 PyObject_GC_UnTrack(mp);
2064 Py_TRASHCAN_BEGIN(mp, dict_dealloc)
2065 if (values != NULL) {
2066 if (values != empty_values) {
2067 for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
2068 Py_XDECREF(values[i]);
2069 }
2070 free_values(values);
2071 }
2072 dictkeys_decref(keys);
2073 }
2074 else if (keys != NULL) {
2075 assert(keys->dk_refcnt == 1);
2076 dictkeys_decref(keys);
2077 }
2078 struct _Py_dict_state *state = get_dict_state();
2079 #ifdef Py_DEBUG
2080 // new_dict() must not be called after _PyDict_Fini()
2081 assert(state->numfree != -1);
2082 #endif
2083 if (state->numfree < PyDict_MAXFREELIST && Py_IS_TYPE(mp, &PyDict_Type)) {
2084 state->free_list[state->numfree++] = mp;
2085 }
2086 else {
2087 Py_TYPE(mp)->tp_free((PyObject *)mp);
2088 }
2089 Py_TRASHCAN_END
2090 }
2091
2092
2093 static PyObject *
dict_repr(PyDictObject * mp)2094 dict_repr(PyDictObject *mp)
2095 {
2096 Py_ssize_t i;
2097 PyObject *key = NULL, *value = NULL;
2098 _PyUnicodeWriter writer;
2099 int first;
2100
2101 i = Py_ReprEnter((PyObject *)mp);
2102 if (i != 0) {
2103 return i > 0 ? PyUnicode_FromString("{...}") : NULL;
2104 }
2105
2106 if (mp->ma_used == 0) {
2107 Py_ReprLeave((PyObject *)mp);
2108 return PyUnicode_FromString("{}");
2109 }
2110
2111 _PyUnicodeWriter_Init(&writer);
2112 writer.overallocate = 1;
2113 /* "{" + "1: 2" + ", 3: 4" * (len - 1) + "}" */
2114 writer.min_length = 1 + 4 + (2 + 4) * (mp->ma_used - 1) + 1;
2115
2116 if (_PyUnicodeWriter_WriteChar(&writer, '{') < 0)
2117 goto error;
2118
2119 /* Do repr() on each key+value pair, and insert ": " between them.
2120 Note that repr may mutate the dict. */
2121 i = 0;
2122 first = 1;
2123 while (PyDict_Next((PyObject *)mp, &i, &key, &value)) {
2124 PyObject *s;
2125 int res;
2126
2127 /* Prevent repr from deleting key or value during key format. */
2128 Py_INCREF(key);
2129 Py_INCREF(value);
2130
2131 if (!first) {
2132 if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0)
2133 goto error;
2134 }
2135 first = 0;
2136
2137 s = PyObject_Repr(key);
2138 if (s == NULL)
2139 goto error;
2140 res = _PyUnicodeWriter_WriteStr(&writer, s);
2141 Py_DECREF(s);
2142 if (res < 0)
2143 goto error;
2144
2145 if (_PyUnicodeWriter_WriteASCIIString(&writer, ": ", 2) < 0)
2146 goto error;
2147
2148 s = PyObject_Repr(value);
2149 if (s == NULL)
2150 goto error;
2151 res = _PyUnicodeWriter_WriteStr(&writer, s);
2152 Py_DECREF(s);
2153 if (res < 0)
2154 goto error;
2155
2156 Py_CLEAR(key);
2157 Py_CLEAR(value);
2158 }
2159
2160 writer.overallocate = 0;
2161 if (_PyUnicodeWriter_WriteChar(&writer, '}') < 0)
2162 goto error;
2163
2164 Py_ReprLeave((PyObject *)mp);
2165
2166 return _PyUnicodeWriter_Finish(&writer);
2167
2168 error:
2169 Py_ReprLeave((PyObject *)mp);
2170 _PyUnicodeWriter_Dealloc(&writer);
2171 Py_XDECREF(key);
2172 Py_XDECREF(value);
2173 return NULL;
2174 }
2175
2176 static Py_ssize_t
dict_length(PyDictObject * mp)2177 dict_length(PyDictObject *mp)
2178 {
2179 return mp->ma_used;
2180 }
2181
2182 static PyObject *
dict_subscript(PyDictObject * mp,PyObject * key)2183 dict_subscript(PyDictObject *mp, PyObject *key)
2184 {
2185 Py_ssize_t ix;
2186 Py_hash_t hash;
2187 PyObject *value;
2188
2189 if (!PyUnicode_CheckExact(key) ||
2190 (hash = ((PyASCIIObject *) key)->hash) == -1) {
2191 hash = PyObject_Hash(key);
2192 if (hash == -1)
2193 return NULL;
2194 }
2195 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
2196 if (ix == DKIX_ERROR)
2197 return NULL;
2198 if (ix == DKIX_EMPTY || value == NULL) {
2199 if (!PyDict_CheckExact(mp)) {
2200 /* Look up __missing__ method if we're a subclass. */
2201 PyObject *missing, *res;
2202 _Py_IDENTIFIER(__missing__);
2203 missing = _PyObject_LookupSpecial((PyObject *)mp, &PyId___missing__);
2204 if (missing != NULL) {
2205 res = PyObject_CallOneArg(missing, key);
2206 Py_DECREF(missing);
2207 return res;
2208 }
2209 else if (PyErr_Occurred())
2210 return NULL;
2211 }
2212 _PyErr_SetKeyError(key);
2213 return NULL;
2214 }
2215 Py_INCREF(value);
2216 return value;
2217 }
2218
2219 static int
dict_ass_sub(PyDictObject * mp,PyObject * v,PyObject * w)2220 dict_ass_sub(PyDictObject *mp, PyObject *v, PyObject *w)
2221 {
2222 if (w == NULL)
2223 return PyDict_DelItem((PyObject *)mp, v);
2224 else
2225 return PyDict_SetItem((PyObject *)mp, v, w);
2226 }
2227
2228 static PyMappingMethods dict_as_mapping = {
2229 (lenfunc)dict_length, /*mp_length*/
2230 (binaryfunc)dict_subscript, /*mp_subscript*/
2231 (objobjargproc)dict_ass_sub, /*mp_ass_subscript*/
2232 };
2233
2234 static PyObject *
dict_keys(PyDictObject * mp)2235 dict_keys(PyDictObject *mp)
2236 {
2237 PyObject *v;
2238 Py_ssize_t i, j;
2239 PyDictKeyEntry *ep;
2240 Py_ssize_t n, offset;
2241 PyObject **value_ptr;
2242
2243 again:
2244 n = mp->ma_used;
2245 v = PyList_New(n);
2246 if (v == NULL)
2247 return NULL;
2248 if (n != mp->ma_used) {
2249 /* Durnit. The allocations caused the dict to resize.
2250 * Just start over, this shouldn't normally happen.
2251 */
2252 Py_DECREF(v);
2253 goto again;
2254 }
2255 ep = DK_ENTRIES(mp->ma_keys);
2256 if (mp->ma_values) {
2257 value_ptr = mp->ma_values;
2258 offset = sizeof(PyObject *);
2259 }
2260 else {
2261 value_ptr = &ep[0].me_value;
2262 offset = sizeof(PyDictKeyEntry);
2263 }
2264 for (i = 0, j = 0; j < n; i++) {
2265 if (*value_ptr != NULL) {
2266 PyObject *key = ep[i].me_key;
2267 Py_INCREF(key);
2268 PyList_SET_ITEM(v, j, key);
2269 j++;
2270 }
2271 value_ptr = (PyObject **)(((char *)value_ptr) + offset);
2272 }
2273 assert(j == n);
2274 return v;
2275 }
2276
2277 static PyObject *
dict_values(PyDictObject * mp)2278 dict_values(PyDictObject *mp)
2279 {
2280 PyObject *v;
2281 Py_ssize_t i, j;
2282 PyDictKeyEntry *ep;
2283 Py_ssize_t n, offset;
2284 PyObject **value_ptr;
2285
2286 again:
2287 n = mp->ma_used;
2288 v = PyList_New(n);
2289 if (v == NULL)
2290 return NULL;
2291 if (n != mp->ma_used) {
2292 /* Durnit. The allocations caused the dict to resize.
2293 * Just start over, this shouldn't normally happen.
2294 */
2295 Py_DECREF(v);
2296 goto again;
2297 }
2298 ep = DK_ENTRIES(mp->ma_keys);
2299 if (mp->ma_values) {
2300 value_ptr = mp->ma_values;
2301 offset = sizeof(PyObject *);
2302 }
2303 else {
2304 value_ptr = &ep[0].me_value;
2305 offset = sizeof(PyDictKeyEntry);
2306 }
2307 for (i = 0, j = 0; j < n; i++) {
2308 PyObject *value = *value_ptr;
2309 value_ptr = (PyObject **)(((char *)value_ptr) + offset);
2310 if (value != NULL) {
2311 Py_INCREF(value);
2312 PyList_SET_ITEM(v, j, value);
2313 j++;
2314 }
2315 }
2316 assert(j == n);
2317 return v;
2318 }
2319
2320 static PyObject *
dict_items(PyDictObject * mp)2321 dict_items(PyDictObject *mp)
2322 {
2323 PyObject *v;
2324 Py_ssize_t i, j, n;
2325 Py_ssize_t offset;
2326 PyObject *item, *key;
2327 PyDictKeyEntry *ep;
2328 PyObject **value_ptr;
2329
2330 /* Preallocate the list of tuples, to avoid allocations during
2331 * the loop over the items, which could trigger GC, which
2332 * could resize the dict. :-(
2333 */
2334 again:
2335 n = mp->ma_used;
2336 v = PyList_New(n);
2337 if (v == NULL)
2338 return NULL;
2339 for (i = 0; i < n; i++) {
2340 item = PyTuple_New(2);
2341 if (item == NULL) {
2342 Py_DECREF(v);
2343 return NULL;
2344 }
2345 PyList_SET_ITEM(v, i, item);
2346 }
2347 if (n != mp->ma_used) {
2348 /* Durnit. The allocations caused the dict to resize.
2349 * Just start over, this shouldn't normally happen.
2350 */
2351 Py_DECREF(v);
2352 goto again;
2353 }
2354 /* Nothing we do below makes any function calls. */
2355 ep = DK_ENTRIES(mp->ma_keys);
2356 if (mp->ma_values) {
2357 value_ptr = mp->ma_values;
2358 offset = sizeof(PyObject *);
2359 }
2360 else {
2361 value_ptr = &ep[0].me_value;
2362 offset = sizeof(PyDictKeyEntry);
2363 }
2364 for (i = 0, j = 0; j < n; i++) {
2365 PyObject *value = *value_ptr;
2366 value_ptr = (PyObject **)(((char *)value_ptr) + offset);
2367 if (value != NULL) {
2368 key = ep[i].me_key;
2369 item = PyList_GET_ITEM(v, j);
2370 Py_INCREF(key);
2371 PyTuple_SET_ITEM(item, 0, key);
2372 Py_INCREF(value);
2373 PyTuple_SET_ITEM(item, 1, value);
2374 j++;
2375 }
2376 }
2377 assert(j == n);
2378 return v;
2379 }
2380
2381 /*[clinic input]
2382 @classmethod
2383 dict.fromkeys
2384 iterable: object
2385 value: object=None
2386 /
2387
2388 Create a new dictionary with keys from iterable and values set to value.
2389 [clinic start generated code]*/
2390
2391 static PyObject *
dict_fromkeys_impl(PyTypeObject * type,PyObject * iterable,PyObject * value)2392 dict_fromkeys_impl(PyTypeObject *type, PyObject *iterable, PyObject *value)
2393 /*[clinic end generated code: output=8fb98e4b10384999 input=382ba4855d0f74c3]*/
2394 {
2395 return _PyDict_FromKeys((PyObject *)type, iterable, value);
2396 }
2397
2398 /* Single-arg dict update; used by dict_update_common and operators. */
2399 static int
dict_update_arg(PyObject * self,PyObject * arg)2400 dict_update_arg(PyObject *self, PyObject *arg)
2401 {
2402 if (PyDict_CheckExact(arg)) {
2403 return PyDict_Merge(self, arg, 1);
2404 }
2405 _Py_IDENTIFIER(keys);
2406 PyObject *func;
2407 if (_PyObject_LookupAttrId(arg, &PyId_keys, &func) < 0) {
2408 return -1;
2409 }
2410 if (func != NULL) {
2411 Py_DECREF(func);
2412 return PyDict_Merge(self, arg, 1);
2413 }
2414 return PyDict_MergeFromSeq2(self, arg, 1);
2415 }
2416
2417 static int
dict_update_common(PyObject * self,PyObject * args,PyObject * kwds,const char * methname)2418 dict_update_common(PyObject *self, PyObject *args, PyObject *kwds,
2419 const char *methname)
2420 {
2421 PyObject *arg = NULL;
2422 int result = 0;
2423
2424 if (!PyArg_UnpackTuple(args, methname, 0, 1, &arg)) {
2425 result = -1;
2426 }
2427 else if (arg != NULL) {
2428 result = dict_update_arg(self, arg);
2429 }
2430
2431 if (result == 0 && kwds != NULL) {
2432 if (PyArg_ValidateKeywordArguments(kwds))
2433 result = PyDict_Merge(self, kwds, 1);
2434 else
2435 result = -1;
2436 }
2437 return result;
2438 }
2439
2440 /* Note: dict.update() uses the METH_VARARGS|METH_KEYWORDS calling convention.
2441 Using METH_FASTCALL|METH_KEYWORDS would make dict.update(**dict2) calls
2442 slower, see the issue #29312. */
2443 static PyObject *
dict_update(PyObject * self,PyObject * args,PyObject * kwds)2444 dict_update(PyObject *self, PyObject *args, PyObject *kwds)
2445 {
2446 if (dict_update_common(self, args, kwds, "update") != -1)
2447 Py_RETURN_NONE;
2448 return NULL;
2449 }
2450
2451 /* Update unconditionally replaces existing items.
2452 Merge has a 3rd argument 'override'; if set, it acts like Update,
2453 otherwise it leaves existing items unchanged.
2454
2455 PyDict_{Update,Merge} update/merge from a mapping object.
2456
2457 PyDict_MergeFromSeq2 updates/merges from any iterable object
2458 producing iterable objects of length 2.
2459 */
2460
2461 int
PyDict_MergeFromSeq2(PyObject * d,PyObject * seq2,int override)2462 PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
2463 {
2464 PyObject *it; /* iter(seq2) */
2465 Py_ssize_t i; /* index into seq2 of current element */
2466 PyObject *item; /* seq2[i] */
2467 PyObject *fast; /* item as a 2-tuple or 2-list */
2468
2469 assert(d != NULL);
2470 assert(PyDict_Check(d));
2471 assert(seq2 != NULL);
2472
2473 it = PyObject_GetIter(seq2);
2474 if (it == NULL)
2475 return -1;
2476
2477 for (i = 0; ; ++i) {
2478 PyObject *key, *value;
2479 Py_ssize_t n;
2480
2481 fast = NULL;
2482 item = PyIter_Next(it);
2483 if (item == NULL) {
2484 if (PyErr_Occurred())
2485 goto Fail;
2486 break;
2487 }
2488
2489 /* Convert item to sequence, and verify length 2. */
2490 fast = PySequence_Fast(item, "");
2491 if (fast == NULL) {
2492 if (PyErr_ExceptionMatches(PyExc_TypeError))
2493 PyErr_Format(PyExc_TypeError,
2494 "cannot convert dictionary update "
2495 "sequence element #%zd to a sequence",
2496 i);
2497 goto Fail;
2498 }
2499 n = PySequence_Fast_GET_SIZE(fast);
2500 if (n != 2) {
2501 PyErr_Format(PyExc_ValueError,
2502 "dictionary update sequence element #%zd "
2503 "has length %zd; 2 is required",
2504 i, n);
2505 goto Fail;
2506 }
2507
2508 /* Update/merge with this (key, value) pair. */
2509 key = PySequence_Fast_GET_ITEM(fast, 0);
2510 value = PySequence_Fast_GET_ITEM(fast, 1);
2511 Py_INCREF(key);
2512 Py_INCREF(value);
2513 if (override) {
2514 if (PyDict_SetItem(d, key, value) < 0) {
2515 Py_DECREF(key);
2516 Py_DECREF(value);
2517 goto Fail;
2518 }
2519 }
2520 else {
2521 if (PyDict_SetDefault(d, key, value) == NULL) {
2522 Py_DECREF(key);
2523 Py_DECREF(value);
2524 goto Fail;
2525 }
2526 }
2527
2528 Py_DECREF(key);
2529 Py_DECREF(value);
2530 Py_DECREF(fast);
2531 Py_DECREF(item);
2532 }
2533
2534 i = 0;
2535 ASSERT_CONSISTENT(d);
2536 goto Return;
2537 Fail:
2538 Py_XDECREF(item);
2539 Py_XDECREF(fast);
2540 i = -1;
2541 Return:
2542 Py_DECREF(it);
2543 return Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
2544 }
2545
2546 static int
dict_merge(PyObject * a,PyObject * b,int override)2547 dict_merge(PyObject *a, PyObject *b, int override)
2548 {
2549 PyDictObject *mp, *other;
2550 Py_ssize_t i, n;
2551 PyDictKeyEntry *entry, *ep0;
2552
2553 assert(0 <= override && override <= 2);
2554
2555 /* We accept for the argument either a concrete dictionary object,
2556 * or an abstract "mapping" object. For the former, we can do
2557 * things quite efficiently. For the latter, we only require that
2558 * PyMapping_Keys() and PyObject_GetItem() be supported.
2559 */
2560 if (a == NULL || !PyDict_Check(a) || b == NULL) {
2561 PyErr_BadInternalCall();
2562 return -1;
2563 }
2564 mp = (PyDictObject*)a;
2565 if (PyDict_Check(b) && (Py_TYPE(b)->tp_iter == (getiterfunc)dict_iter)) {
2566 other = (PyDictObject*)b;
2567 if (other == mp || other->ma_used == 0)
2568 /* a.update(a) or a.update({}); nothing to do */
2569 return 0;
2570 if (mp->ma_used == 0) {
2571 /* Since the target dict is empty, PyDict_GetItem()
2572 * always returns NULL. Setting override to 1
2573 * skips the unnecessary test.
2574 */
2575 override = 1;
2576 PyDictKeysObject *okeys = other->ma_keys;
2577
2578 // If other is clean, combined, and just allocated, just clone it.
2579 if (other->ma_values == NULL &&
2580 other->ma_used == okeys->dk_nentries &&
2581 (okeys->dk_size == PyDict_MINSIZE ||
2582 USABLE_FRACTION(okeys->dk_size/2) < other->ma_used)) {
2583 PyDictKeysObject *keys = clone_combined_dict_keys(other);
2584 if (keys == NULL) {
2585 return -1;
2586 }
2587
2588 dictkeys_decref(mp->ma_keys);
2589 mp->ma_keys = keys;
2590 if (mp->ma_values != NULL) {
2591 if (mp->ma_values != empty_values) {
2592 free_values(mp->ma_values);
2593 }
2594 mp->ma_values = NULL;
2595 }
2596
2597 mp->ma_used = other->ma_used;
2598 mp->ma_version_tag = DICT_NEXT_VERSION();
2599 ASSERT_CONSISTENT(mp);
2600
2601 if (_PyObject_GC_IS_TRACKED(other) && !_PyObject_GC_IS_TRACKED(mp)) {
2602 /* Maintain tracking. */
2603 _PyObject_GC_TRACK(mp);
2604 }
2605
2606 return 0;
2607 }
2608 }
2609 /* Do one big resize at the start, rather than
2610 * incrementally resizing as we insert new items. Expect
2611 * that there will be no (or few) overlapping keys.
2612 */
2613 if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) {
2614 if (dictresize(mp, estimate_keysize(mp->ma_used + other->ma_used))) {
2615 return -1;
2616 }
2617 }
2618 ep0 = DK_ENTRIES(other->ma_keys);
2619 for (i = 0, n = other->ma_keys->dk_nentries; i < n; i++) {
2620 PyObject *key, *value;
2621 Py_hash_t hash;
2622 entry = &ep0[i];
2623 key = entry->me_key;
2624 hash = entry->me_hash;
2625 if (other->ma_values)
2626 value = other->ma_values[i];
2627 else
2628 value = entry->me_value;
2629
2630 if (value != NULL) {
2631 int err = 0;
2632 Py_INCREF(key);
2633 Py_INCREF(value);
2634 if (override == 1)
2635 err = insertdict(mp, key, hash, value);
2636 else {
2637 err = _PyDict_Contains_KnownHash(a, key, hash);
2638 if (err == 0) {
2639 err = insertdict(mp, key, hash, value);
2640 }
2641 else if (err > 0) {
2642 if (override != 0) {
2643 _PyErr_SetKeyError(key);
2644 Py_DECREF(value);
2645 Py_DECREF(key);
2646 return -1;
2647 }
2648 err = 0;
2649 }
2650 }
2651 Py_DECREF(value);
2652 Py_DECREF(key);
2653 if (err != 0)
2654 return -1;
2655
2656 if (n != other->ma_keys->dk_nentries) {
2657 PyErr_SetString(PyExc_RuntimeError,
2658 "dict mutated during update");
2659 return -1;
2660 }
2661 }
2662 }
2663 }
2664 else {
2665 /* Do it the generic, slower way */
2666 PyObject *keys = PyMapping_Keys(b);
2667 PyObject *iter;
2668 PyObject *key, *value;
2669 int status;
2670
2671 if (keys == NULL)
2672 /* Docstring says this is equivalent to E.keys() so
2673 * if E doesn't have a .keys() method we want
2674 * AttributeError to percolate up. Might as well
2675 * do the same for any other error.
2676 */
2677 return -1;
2678
2679 iter = PyObject_GetIter(keys);
2680 Py_DECREF(keys);
2681 if (iter == NULL)
2682 return -1;
2683
2684 for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) {
2685 if (override != 1) {
2686 status = PyDict_Contains(a, key);
2687 if (status != 0) {
2688 if (status > 0) {
2689 if (override == 0) {
2690 Py_DECREF(key);
2691 continue;
2692 }
2693 _PyErr_SetKeyError(key);
2694 }
2695 Py_DECREF(key);
2696 Py_DECREF(iter);
2697 return -1;
2698 }
2699 }
2700 value = PyObject_GetItem(b, key);
2701 if (value == NULL) {
2702 Py_DECREF(iter);
2703 Py_DECREF(key);
2704 return -1;
2705 }
2706 status = PyDict_SetItem(a, key, value);
2707 Py_DECREF(key);
2708 Py_DECREF(value);
2709 if (status < 0) {
2710 Py_DECREF(iter);
2711 return -1;
2712 }
2713 }
2714 Py_DECREF(iter);
2715 if (PyErr_Occurred())
2716 /* Iterator completed, via error */
2717 return -1;
2718 }
2719 ASSERT_CONSISTENT(a);
2720 return 0;
2721 }
2722
2723 int
PyDict_Update(PyObject * a,PyObject * b)2724 PyDict_Update(PyObject *a, PyObject *b)
2725 {
2726 return dict_merge(a, b, 1);
2727 }
2728
2729 int
PyDict_Merge(PyObject * a,PyObject * b,int override)2730 PyDict_Merge(PyObject *a, PyObject *b, int override)
2731 {
2732 /* XXX Deprecate override not in (0, 1). */
2733 return dict_merge(a, b, override != 0);
2734 }
2735
2736 int
_PyDict_MergeEx(PyObject * a,PyObject * b,int override)2737 _PyDict_MergeEx(PyObject *a, PyObject *b, int override)
2738 {
2739 return dict_merge(a, b, override);
2740 }
2741
2742 static PyObject *
dict_copy(PyDictObject * mp,PyObject * Py_UNUSED (ignored))2743 dict_copy(PyDictObject *mp, PyObject *Py_UNUSED(ignored))
2744 {
2745 return PyDict_Copy((PyObject*)mp);
2746 }
2747
2748 PyObject *
PyDict_Copy(PyObject * o)2749 PyDict_Copy(PyObject *o)
2750 {
2751 PyObject *copy;
2752 PyDictObject *mp;
2753 Py_ssize_t i, n;
2754
2755 if (o == NULL || !PyDict_Check(o)) {
2756 PyErr_BadInternalCall();
2757 return NULL;
2758 }
2759
2760 mp = (PyDictObject *)o;
2761 if (mp->ma_used == 0) {
2762 /* The dict is empty; just return a new dict. */
2763 return PyDict_New();
2764 }
2765
2766 if (_PyDict_HasSplitTable(mp)) {
2767 PyDictObject *split_copy;
2768 Py_ssize_t size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
2769 PyObject **newvalues;
2770 newvalues = new_values(size);
2771 if (newvalues == NULL)
2772 return PyErr_NoMemory();
2773 split_copy = PyObject_GC_New(PyDictObject, &PyDict_Type);
2774 if (split_copy == NULL) {
2775 free_values(newvalues);
2776 return NULL;
2777 }
2778 split_copy->ma_values = newvalues;
2779 split_copy->ma_keys = mp->ma_keys;
2780 split_copy->ma_used = mp->ma_used;
2781 split_copy->ma_version_tag = DICT_NEXT_VERSION();
2782 dictkeys_incref(mp->ma_keys);
2783 for (i = 0, n = size; i < n; i++) {
2784 PyObject *value = mp->ma_values[i];
2785 Py_XINCREF(value);
2786 split_copy->ma_values[i] = value;
2787 }
2788 if (_PyObject_GC_IS_TRACKED(mp))
2789 _PyObject_GC_TRACK(split_copy);
2790 return (PyObject *)split_copy;
2791 }
2792
2793 if (Py_TYPE(mp)->tp_iter == (getiterfunc)dict_iter &&
2794 mp->ma_values == NULL &&
2795 (mp->ma_used >= (mp->ma_keys->dk_nentries * 2) / 3))
2796 {
2797 /* Use fast-copy if:
2798
2799 (1) type(mp) doesn't override tp_iter; and
2800
2801 (2) 'mp' is not a split-dict; and
2802
2803 (3) if 'mp' is non-compact ('del' operation does not resize dicts),
2804 do fast-copy only if it has at most 1/3 non-used keys.
2805
2806 The last condition (3) is important to guard against a pathological
2807 case when a large dict is almost emptied with multiple del/pop
2808 operations and copied after that. In cases like this, we defer to
2809 PyDict_Merge, which produces a compacted copy.
2810 */
2811 PyDictKeysObject *keys = clone_combined_dict_keys(mp);
2812 if (keys == NULL) {
2813 return NULL;
2814 }
2815 PyDictObject *new = (PyDictObject *)new_dict(keys, NULL);
2816 if (new == NULL) {
2817 /* In case of an error, `new_dict()` takes care of
2818 cleaning up `keys`. */
2819 return NULL;
2820 }
2821
2822 new->ma_used = mp->ma_used;
2823 ASSERT_CONSISTENT(new);
2824 if (_PyObject_GC_IS_TRACKED(mp)) {
2825 /* Maintain tracking. */
2826 _PyObject_GC_TRACK(new);
2827 }
2828
2829 return (PyObject *)new;
2830 }
2831
2832 copy = PyDict_New();
2833 if (copy == NULL)
2834 return NULL;
2835 if (dict_merge(copy, o, 1) == 0)
2836 return copy;
2837 Py_DECREF(copy);
2838 return NULL;
2839 }
2840
2841 Py_ssize_t
PyDict_Size(PyObject * mp)2842 PyDict_Size(PyObject *mp)
2843 {
2844 if (mp == NULL || !PyDict_Check(mp)) {
2845 PyErr_BadInternalCall();
2846 return -1;
2847 }
2848 return ((PyDictObject *)mp)->ma_used;
2849 }
2850
2851 PyObject *
PyDict_Keys(PyObject * mp)2852 PyDict_Keys(PyObject *mp)
2853 {
2854 if (mp == NULL || !PyDict_Check(mp)) {
2855 PyErr_BadInternalCall();
2856 return NULL;
2857 }
2858 return dict_keys((PyDictObject *)mp);
2859 }
2860
2861 PyObject *
PyDict_Values(PyObject * mp)2862 PyDict_Values(PyObject *mp)
2863 {
2864 if (mp == NULL || !PyDict_Check(mp)) {
2865 PyErr_BadInternalCall();
2866 return NULL;
2867 }
2868 return dict_values((PyDictObject *)mp);
2869 }
2870
2871 PyObject *
PyDict_Items(PyObject * mp)2872 PyDict_Items(PyObject *mp)
2873 {
2874 if (mp == NULL || !PyDict_Check(mp)) {
2875 PyErr_BadInternalCall();
2876 return NULL;
2877 }
2878 return dict_items((PyDictObject *)mp);
2879 }
2880
2881 /* Return 1 if dicts equal, 0 if not, -1 if error.
2882 * Gets out as soon as any difference is detected.
2883 * Uses only Py_EQ comparison.
2884 */
2885 static int
dict_equal(PyDictObject * a,PyDictObject * b)2886 dict_equal(PyDictObject *a, PyDictObject *b)
2887 {
2888 Py_ssize_t i;
2889
2890 if (a->ma_used != b->ma_used)
2891 /* can't be equal if # of entries differ */
2892 return 0;
2893 /* Same # of entries -- check all of 'em. Exit early on any diff. */
2894 for (i = 0; i < a->ma_keys->dk_nentries; i++) {
2895 PyDictKeyEntry *ep = &DK_ENTRIES(a->ma_keys)[i];
2896 PyObject *aval;
2897 if (a->ma_values)
2898 aval = a->ma_values[i];
2899 else
2900 aval = ep->me_value;
2901 if (aval != NULL) {
2902 int cmp;
2903 PyObject *bval;
2904 PyObject *key = ep->me_key;
2905 /* temporarily bump aval's refcount to ensure it stays
2906 alive until we're done with it */
2907 Py_INCREF(aval);
2908 /* ditto for key */
2909 Py_INCREF(key);
2910 /* reuse the known hash value */
2911 b->ma_keys->dk_lookup(b, key, ep->me_hash, &bval);
2912 if (bval == NULL) {
2913 Py_DECREF(key);
2914 Py_DECREF(aval);
2915 if (PyErr_Occurred())
2916 return -1;
2917 return 0;
2918 }
2919 Py_INCREF(bval);
2920 cmp = PyObject_RichCompareBool(aval, bval, Py_EQ);
2921 Py_DECREF(key);
2922 Py_DECREF(aval);
2923 Py_DECREF(bval);
2924 if (cmp <= 0) /* error or not equal */
2925 return cmp;
2926 }
2927 }
2928 return 1;
2929 }
2930
2931 static PyObject *
dict_richcompare(PyObject * v,PyObject * w,int op)2932 dict_richcompare(PyObject *v, PyObject *w, int op)
2933 {
2934 int cmp;
2935 PyObject *res;
2936
2937 if (!PyDict_Check(v) || !PyDict_Check(w)) {
2938 res = Py_NotImplemented;
2939 }
2940 else if (op == Py_EQ || op == Py_NE) {
2941 cmp = dict_equal((PyDictObject *)v, (PyDictObject *)w);
2942 if (cmp < 0)
2943 return NULL;
2944 res = (cmp == (op == Py_EQ)) ? Py_True : Py_False;
2945 }
2946 else
2947 res = Py_NotImplemented;
2948 Py_INCREF(res);
2949 return res;
2950 }
2951
2952 /*[clinic input]
2953
2954 @coexist
2955 dict.__contains__
2956
2957 key: object
2958 /
2959
2960 True if the dictionary has the specified key, else False.
2961 [clinic start generated code]*/
2962
2963 static PyObject *
dict___contains__(PyDictObject * self,PyObject * key)2964 dict___contains__(PyDictObject *self, PyObject *key)
2965 /*[clinic end generated code: output=a3d03db709ed6e6b input=fe1cb42ad831e820]*/
2966 {
2967 register PyDictObject *mp = self;
2968 Py_hash_t hash;
2969 Py_ssize_t ix;
2970 PyObject *value;
2971
2972 if (!PyUnicode_CheckExact(key) ||
2973 (hash = ((PyASCIIObject *) key)->hash) == -1) {
2974 hash = PyObject_Hash(key);
2975 if (hash == -1)
2976 return NULL;
2977 }
2978 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
2979 if (ix == DKIX_ERROR)
2980 return NULL;
2981 if (ix == DKIX_EMPTY || value == NULL)
2982 Py_RETURN_FALSE;
2983 Py_RETURN_TRUE;
2984 }
2985
2986 /*[clinic input]
2987 dict.get
2988
2989 key: object
2990 default: object = None
2991 /
2992
2993 Return the value for key if key is in the dictionary, else default.
2994 [clinic start generated code]*/
2995
2996 static PyObject *
dict_get_impl(PyDictObject * self,PyObject * key,PyObject * default_value)2997 dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value)
2998 /*[clinic end generated code: output=bba707729dee05bf input=279ddb5790b6b107]*/
2999 {
3000 PyObject *val = NULL;
3001 Py_hash_t hash;
3002 Py_ssize_t ix;
3003
3004 if (!PyUnicode_CheckExact(key) ||
3005 (hash = ((PyASCIIObject *) key)->hash) == -1) {
3006 hash = PyObject_Hash(key);
3007 if (hash == -1)
3008 return NULL;
3009 }
3010 ix = (self->ma_keys->dk_lookup) (self, key, hash, &val);
3011 if (ix == DKIX_ERROR)
3012 return NULL;
3013 if (ix == DKIX_EMPTY || val == NULL) {
3014 val = default_value;
3015 }
3016 Py_INCREF(val);
3017 return val;
3018 }
3019
3020 PyObject *
PyDict_SetDefault(PyObject * d,PyObject * key,PyObject * defaultobj)3021 PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
3022 {
3023 PyDictObject *mp = (PyDictObject *)d;
3024 PyObject *value;
3025 Py_hash_t hash;
3026
3027 if (!PyDict_Check(d)) {
3028 PyErr_BadInternalCall();
3029 return NULL;
3030 }
3031
3032 if (!PyUnicode_CheckExact(key) ||
3033 (hash = ((PyASCIIObject *) key)->hash) == -1) {
3034 hash = PyObject_Hash(key);
3035 if (hash == -1)
3036 return NULL;
3037 }
3038 if (mp->ma_keys == Py_EMPTY_KEYS) {
3039 if (insert_to_emptydict(mp, key, hash, defaultobj) < 0) {
3040 return NULL;
3041 }
3042 return defaultobj;
3043 }
3044
3045 if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
3046 if (insertion_resize(mp) < 0)
3047 return NULL;
3048 }
3049
3050 Py_ssize_t ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
3051 if (ix == DKIX_ERROR)
3052 return NULL;
3053
3054 if (_PyDict_HasSplitTable(mp) &&
3055 ((ix >= 0 && value == NULL && mp->ma_used != ix) ||
3056 (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
3057 if (insertion_resize(mp) < 0) {
3058 return NULL;
3059 }
3060 ix = DKIX_EMPTY;
3061 }
3062
3063 if (ix == DKIX_EMPTY) {
3064 PyDictKeyEntry *ep, *ep0;
3065 value = defaultobj;
3066 if (mp->ma_keys->dk_usable <= 0) {
3067 if (insertion_resize(mp) < 0) {
3068 return NULL;
3069 }
3070 }
3071 if (!PyUnicode_CheckExact(key) && mp->ma_keys->dk_lookup != lookdict) {
3072 mp->ma_keys->dk_lookup = lookdict;
3073 }
3074 Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash);
3075 ep0 = DK_ENTRIES(mp->ma_keys);
3076 ep = &ep0[mp->ma_keys->dk_nentries];
3077 dictkeys_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
3078 Py_INCREF(key);
3079 Py_INCREF(value);
3080 MAINTAIN_TRACKING(mp, key, value);
3081 ep->me_key = key;
3082 ep->me_hash = hash;
3083 if (_PyDict_HasSplitTable(mp)) {
3084 assert(mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
3085 mp->ma_values[mp->ma_keys->dk_nentries] = value;
3086 }
3087 else {
3088 ep->me_value = value;
3089 }
3090 mp->ma_used++;
3091 mp->ma_version_tag = DICT_NEXT_VERSION();
3092 mp->ma_keys->dk_usable--;
3093 mp->ma_keys->dk_nentries++;
3094 assert(mp->ma_keys->dk_usable >= 0);
3095 }
3096 else if (value == NULL) {
3097 value = defaultobj;
3098 assert(_PyDict_HasSplitTable(mp));
3099 assert(ix == mp->ma_used);
3100 Py_INCREF(value);
3101 MAINTAIN_TRACKING(mp, key, value);
3102 mp->ma_values[ix] = value;
3103 mp->ma_used++;
3104 mp->ma_version_tag = DICT_NEXT_VERSION();
3105 }
3106
3107 ASSERT_CONSISTENT(mp);
3108 return value;
3109 }
3110
3111 /*[clinic input]
3112 dict.setdefault
3113
3114 key: object
3115 default: object = None
3116 /
3117
3118 Insert key with a value of default if key is not in the dictionary.
3119
3120 Return the value for key if key is in the dictionary, else default.
3121 [clinic start generated code]*/
3122
3123 static PyObject *
dict_setdefault_impl(PyDictObject * self,PyObject * key,PyObject * default_value)3124 dict_setdefault_impl(PyDictObject *self, PyObject *key,
3125 PyObject *default_value)
3126 /*[clinic end generated code: output=f8c1101ebf69e220 input=0f063756e815fd9d]*/
3127 {
3128 PyObject *val;
3129
3130 val = PyDict_SetDefault((PyObject *)self, key, default_value);
3131 Py_XINCREF(val);
3132 return val;
3133 }
3134
3135 static PyObject *
dict_clear(PyDictObject * mp,PyObject * Py_UNUSED (ignored))3136 dict_clear(PyDictObject *mp, PyObject *Py_UNUSED(ignored))
3137 {
3138 PyDict_Clear((PyObject *)mp);
3139 Py_RETURN_NONE;
3140 }
3141
3142 /*[clinic input]
3143 dict.pop
3144
3145 key: object
3146 default: object = NULL
3147 /
3148
3149 D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
3150
3151 If the key is not found, return the default if given; otherwise,
3152 raise a KeyError.
3153 [clinic start generated code]*/
3154
3155 static PyObject *
dict_pop_impl(PyDictObject * self,PyObject * key,PyObject * default_value)3156 dict_pop_impl(PyDictObject *self, PyObject *key, PyObject *default_value)
3157 /*[clinic end generated code: output=3abb47b89f24c21c input=e221baa01044c44c]*/
3158 {
3159 return _PyDict_Pop((PyObject*)self, key, default_value);
3160 }
3161
3162 /*[clinic input]
3163 dict.popitem
3164
3165 Remove and return a (key, value) pair as a 2-tuple.
3166
3167 Pairs are returned in LIFO (last-in, first-out) order.
3168 Raises KeyError if the dict is empty.
3169 [clinic start generated code]*/
3170
3171 static PyObject *
dict_popitem_impl(PyDictObject * self)3172 dict_popitem_impl(PyDictObject *self)
3173 /*[clinic end generated code: output=e65fcb04420d230d input=1c38a49f21f64941]*/
3174 {
3175 Py_ssize_t i, j;
3176 PyDictKeyEntry *ep0, *ep;
3177 PyObject *res;
3178
3179 /* Allocate the result tuple before checking the size. Believe it
3180 * or not, this allocation could trigger a garbage collection which
3181 * could empty the dict, so if we checked the size first and that
3182 * happened, the result would be an infinite loop (searching for an
3183 * entry that no longer exists). Note that the usual popitem()
3184 * idiom is "while d: k, v = d.popitem()". so needing to throw the
3185 * tuple away if the dict *is* empty isn't a significant
3186 * inefficiency -- possible, but unlikely in practice.
3187 */
3188 res = PyTuple_New(2);
3189 if (res == NULL)
3190 return NULL;
3191 if (self->ma_used == 0) {
3192 Py_DECREF(res);
3193 PyErr_SetString(PyExc_KeyError, "popitem(): dictionary is empty");
3194 return NULL;
3195 }
3196 /* Convert split table to combined table */
3197 if (self->ma_keys->dk_lookup == lookdict_split) {
3198 if (dictresize(self, DK_SIZE(self->ma_keys))) {
3199 Py_DECREF(res);
3200 return NULL;
3201 }
3202 }
3203 ENSURE_ALLOWS_DELETIONS(self);
3204
3205 /* Pop last item */
3206 ep0 = DK_ENTRIES(self->ma_keys);
3207 i = self->ma_keys->dk_nentries - 1;
3208 while (i >= 0 && ep0[i].me_value == NULL) {
3209 i--;
3210 }
3211 assert(i >= 0);
3212
3213 ep = &ep0[i];
3214 j = lookdict_index(self->ma_keys, ep->me_hash, i);
3215 assert(j >= 0);
3216 assert(dictkeys_get_index(self->ma_keys, j) == i);
3217 dictkeys_set_index(self->ma_keys, j, DKIX_DUMMY);
3218
3219 PyTuple_SET_ITEM(res, 0, ep->me_key);
3220 PyTuple_SET_ITEM(res, 1, ep->me_value);
3221 ep->me_key = NULL;
3222 ep->me_value = NULL;
3223 /* We can't dk_usable++ since there is DKIX_DUMMY in indices */
3224 self->ma_keys->dk_nentries = i;
3225 self->ma_used--;
3226 self->ma_version_tag = DICT_NEXT_VERSION();
3227 ASSERT_CONSISTENT(self);
3228 return res;
3229 }
3230
3231 static int
dict_traverse(PyObject * op,visitproc visit,void * arg)3232 dict_traverse(PyObject *op, visitproc visit, void *arg)
3233 {
3234 PyDictObject *mp = (PyDictObject *)op;
3235 PyDictKeysObject *keys = mp->ma_keys;
3236 PyDictKeyEntry *entries = DK_ENTRIES(keys);
3237 Py_ssize_t i, n = keys->dk_nentries;
3238
3239 if (keys->dk_lookup == lookdict) {
3240 for (i = 0; i < n; i++) {
3241 if (entries[i].me_value != NULL) {
3242 Py_VISIT(entries[i].me_value);
3243 Py_VISIT(entries[i].me_key);
3244 }
3245 }
3246 }
3247 else {
3248 if (mp->ma_values != NULL) {
3249 for (i = 0; i < n; i++) {
3250 Py_VISIT(mp->ma_values[i]);
3251 }
3252 }
3253 else {
3254 for (i = 0; i < n; i++) {
3255 Py_VISIT(entries[i].me_value);
3256 }
3257 }
3258 }
3259 return 0;
3260 }
3261
3262 static int
dict_tp_clear(PyObject * op)3263 dict_tp_clear(PyObject *op)
3264 {
3265 PyDict_Clear(op);
3266 return 0;
3267 }
3268
3269 static PyObject *dictiter_new(PyDictObject *, PyTypeObject *);
3270
3271 Py_ssize_t
_PyDict_SizeOf(PyDictObject * mp)3272 _PyDict_SizeOf(PyDictObject *mp)
3273 {
3274 Py_ssize_t size, usable, res;
3275
3276 size = DK_SIZE(mp->ma_keys);
3277 usable = USABLE_FRACTION(size);
3278
3279 res = _PyObject_SIZE(Py_TYPE(mp));
3280 if (mp->ma_values)
3281 res += usable * sizeof(PyObject*);
3282 /* If the dictionary is split, the keys portion is accounted-for
3283 in the type object. */
3284 if (mp->ma_keys->dk_refcnt == 1)
3285 res += (sizeof(PyDictKeysObject)
3286 + DK_IXSIZE(mp->ma_keys) * size
3287 + sizeof(PyDictKeyEntry) * usable);
3288 return res;
3289 }
3290
3291 Py_ssize_t
_PyDict_KeysSize(PyDictKeysObject * keys)3292 _PyDict_KeysSize(PyDictKeysObject *keys)
3293 {
3294 return (sizeof(PyDictKeysObject)
3295 + DK_IXSIZE(keys) * DK_SIZE(keys)
3296 + USABLE_FRACTION(DK_SIZE(keys)) * sizeof(PyDictKeyEntry));
3297 }
3298
3299 static PyObject *
dict_sizeof(PyDictObject * mp,PyObject * Py_UNUSED (ignored))3300 dict_sizeof(PyDictObject *mp, PyObject *Py_UNUSED(ignored))
3301 {
3302 return PyLong_FromSsize_t(_PyDict_SizeOf(mp));
3303 }
3304
3305 static PyObject *
dict_or(PyObject * self,PyObject * other)3306 dict_or(PyObject *self, PyObject *other)
3307 {
3308 if (!PyDict_Check(self) || !PyDict_Check(other)) {
3309 Py_RETURN_NOTIMPLEMENTED;
3310 }
3311 PyObject *new = PyDict_Copy(self);
3312 if (new == NULL) {
3313 return NULL;
3314 }
3315 if (dict_update_arg(new, other)) {
3316 Py_DECREF(new);
3317 return NULL;
3318 }
3319 return new;
3320 }
3321
3322 static PyObject *
dict_ior(PyObject * self,PyObject * other)3323 dict_ior(PyObject *self, PyObject *other)
3324 {
3325 if (dict_update_arg(self, other)) {
3326 return NULL;
3327 }
3328 Py_INCREF(self);
3329 return self;
3330 }
3331
3332 PyDoc_STRVAR(getitem__doc__, "x.__getitem__(y) <==> x[y]");
3333
3334 PyDoc_STRVAR(sizeof__doc__,
3335 "D.__sizeof__() -> size of D in memory, in bytes");
3336
3337 PyDoc_STRVAR(update__doc__,
3338 "D.update([E, ]**F) -> None. Update D from dict/iterable E and F.\n\
3339 If E is present and has a .keys() method, then does: for k in E: D[k] = E[k]\n\
3340 If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v\n\
3341 In either case, this is followed by: for k in F: D[k] = F[k]");
3342
3343 PyDoc_STRVAR(clear__doc__,
3344 "D.clear() -> None. Remove all items from D.");
3345
3346 PyDoc_STRVAR(copy__doc__,
3347 "D.copy() -> a shallow copy of D");
3348
3349 /* Forward */
3350 static PyObject *dictkeys_new(PyObject *, PyObject *);
3351 static PyObject *dictitems_new(PyObject *, PyObject *);
3352 static PyObject *dictvalues_new(PyObject *, PyObject *);
3353
3354 PyDoc_STRVAR(keys__doc__,
3355 "D.keys() -> a set-like object providing a view on D's keys");
3356 PyDoc_STRVAR(items__doc__,
3357 "D.items() -> a set-like object providing a view on D's items");
3358 PyDoc_STRVAR(values__doc__,
3359 "D.values() -> an object providing a view on D's values");
3360
3361 static PyMethodDef mapp_methods[] = {
3362 DICT___CONTAINS___METHODDEF
3363 {"__getitem__", (PyCFunction)(void(*)(void))dict_subscript, METH_O | METH_COEXIST,
3364 getitem__doc__},
3365 {"__sizeof__", (PyCFunction)(void(*)(void))dict_sizeof, METH_NOARGS,
3366 sizeof__doc__},
3367 DICT_GET_METHODDEF
3368 DICT_SETDEFAULT_METHODDEF
3369 DICT_POP_METHODDEF
3370 DICT_POPITEM_METHODDEF
3371 {"keys", dictkeys_new, METH_NOARGS,
3372 keys__doc__},
3373 {"items", dictitems_new, METH_NOARGS,
3374 items__doc__},
3375 {"values", dictvalues_new, METH_NOARGS,
3376 values__doc__},
3377 {"update", (PyCFunction)(void(*)(void))dict_update, METH_VARARGS | METH_KEYWORDS,
3378 update__doc__},
3379 DICT_FROMKEYS_METHODDEF
3380 {"clear", (PyCFunction)dict_clear, METH_NOARGS,
3381 clear__doc__},
3382 {"copy", (PyCFunction)dict_copy, METH_NOARGS,
3383 copy__doc__},
3384 DICT___REVERSED___METHODDEF
3385 {"__class_getitem__", (PyCFunction)Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")},
3386 {NULL, NULL} /* sentinel */
3387 };
3388
3389 /* Return 1 if `key` is in dict `op`, 0 if not, and -1 on error. */
3390 int
PyDict_Contains(PyObject * op,PyObject * key)3391 PyDict_Contains(PyObject *op, PyObject *key)
3392 {
3393 Py_hash_t hash;
3394 Py_ssize_t ix;
3395 PyDictObject *mp = (PyDictObject *)op;
3396 PyObject *value;
3397
3398 if (!PyUnicode_CheckExact(key) ||
3399 (hash = ((PyASCIIObject *) key)->hash) == -1) {
3400 hash = PyObject_Hash(key);
3401 if (hash == -1)
3402 return -1;
3403 }
3404 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
3405 if (ix == DKIX_ERROR)
3406 return -1;
3407 return (ix != DKIX_EMPTY && value != NULL);
3408 }
3409
3410 /* Internal version of PyDict_Contains used when the hash value is already known */
3411 int
_PyDict_Contains_KnownHash(PyObject * op,PyObject * key,Py_hash_t hash)3412 _PyDict_Contains_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
3413 {
3414 PyDictObject *mp = (PyDictObject *)op;
3415 PyObject *value;
3416 Py_ssize_t ix;
3417
3418 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
3419 if (ix == DKIX_ERROR)
3420 return -1;
3421 return (ix != DKIX_EMPTY && value != NULL);
3422 }
3423
3424 int
_PyDict_ContainsId(PyObject * op,struct _Py_Identifier * key)3425 _PyDict_ContainsId(PyObject *op, struct _Py_Identifier *key)
3426 {
3427 PyObject *kv = _PyUnicode_FromId(key); /* borrowed */
3428 if (kv == NULL) {
3429 return -1;
3430 }
3431 return PyDict_Contains(op, kv);
3432 }
3433
3434 /* Hack to implement "key in dict" */
3435 static PySequenceMethods dict_as_sequence = {
3436 0, /* sq_length */
3437 0, /* sq_concat */
3438 0, /* sq_repeat */
3439 0, /* sq_item */
3440 0, /* sq_slice */
3441 0, /* sq_ass_item */
3442 0, /* sq_ass_slice */
3443 PyDict_Contains, /* sq_contains */
3444 0, /* sq_inplace_concat */
3445 0, /* sq_inplace_repeat */
3446 };
3447
3448 static PyNumberMethods dict_as_number = {
3449 .nb_or = dict_or,
3450 .nb_inplace_or = dict_ior,
3451 };
3452
3453 static PyObject *
dict_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3454 dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3455 {
3456 PyObject *self;
3457 PyDictObject *d;
3458
3459 assert(type != NULL && type->tp_alloc != NULL);
3460 self = type->tp_alloc(type, 0);
3461 if (self == NULL)
3462 return NULL;
3463 d = (PyDictObject *)self;
3464
3465 /* The object has been implicitly tracked by tp_alloc */
3466 if (type == &PyDict_Type) {
3467 _PyObject_GC_UNTRACK(d);
3468 }
3469
3470 d->ma_used = 0;
3471 d->ma_version_tag = DICT_NEXT_VERSION();
3472 dictkeys_incref(Py_EMPTY_KEYS);
3473 d->ma_keys = Py_EMPTY_KEYS;
3474 d->ma_values = empty_values;
3475 ASSERT_CONSISTENT(d);
3476 return self;
3477 }
3478
3479 static int
dict_init(PyObject * self,PyObject * args,PyObject * kwds)3480 dict_init(PyObject *self, PyObject *args, PyObject *kwds)
3481 {
3482 return dict_update_common(self, args, kwds, "dict");
3483 }
3484
3485 static PyObject *
dict_vectorcall(PyObject * type,PyObject * const * args,size_t nargsf,PyObject * kwnames)3486 dict_vectorcall(PyObject *type, PyObject * const*args,
3487 size_t nargsf, PyObject *kwnames)
3488 {
3489 assert(PyType_Check(type));
3490 Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
3491 if (!_PyArg_CheckPositional("dict", nargs, 0, 1)) {
3492 return NULL;
3493 }
3494
3495 PyObject *self = dict_new((PyTypeObject *)type, NULL, NULL);
3496 if (self == NULL) {
3497 return NULL;
3498 }
3499 if (nargs == 1) {
3500 if (dict_update_arg(self, args[0]) < 0) {
3501 Py_DECREF(self);
3502 return NULL;
3503 }
3504 args++;
3505 }
3506 if (kwnames != NULL) {
3507 for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(kwnames); i++) {
3508 if (PyDict_SetItem(self, PyTuple_GET_ITEM(kwnames, i), args[i]) < 0) {
3509 Py_DECREF(self);
3510 return NULL;
3511 }
3512 }
3513 }
3514 return self;
3515 }
3516
3517 static PyObject *
dict_iter(PyDictObject * dict)3518 dict_iter(PyDictObject *dict)
3519 {
3520 return dictiter_new(dict, &PyDictIterKey_Type);
3521 }
3522
3523 PyDoc_STRVAR(dictionary_doc,
3524 "dict() -> new empty dictionary\n"
3525 "dict(mapping) -> new dictionary initialized from a mapping object's\n"
3526 " (key, value) pairs\n"
3527 "dict(iterable) -> new dictionary initialized as if via:\n"
3528 " d = {}\n"
3529 " for k, v in iterable:\n"
3530 " d[k] = v\n"
3531 "dict(**kwargs) -> new dictionary initialized with the name=value pairs\n"
3532 " in the keyword argument list. For example: dict(one=1, two=2)");
3533
3534 PyTypeObject PyDict_Type = {
3535 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3536 "dict",
3537 sizeof(PyDictObject),
3538 0,
3539 (destructor)dict_dealloc, /* tp_dealloc */
3540 0, /* tp_vectorcall_offset */
3541 0, /* tp_getattr */
3542 0, /* tp_setattr */
3543 0, /* tp_as_async */
3544 (reprfunc)dict_repr, /* tp_repr */
3545 &dict_as_number, /* tp_as_number */
3546 &dict_as_sequence, /* tp_as_sequence */
3547 &dict_as_mapping, /* tp_as_mapping */
3548 PyObject_HashNotImplemented, /* tp_hash */
3549 0, /* tp_call */
3550 0, /* tp_str */
3551 PyObject_GenericGetAttr, /* tp_getattro */
3552 0, /* tp_setattro */
3553 0, /* tp_as_buffer */
3554 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
3555 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_DICT_SUBCLASS |
3556 _Py_TPFLAGS_MATCH_SELF | Py_TPFLAGS_MAPPING, /* tp_flags */
3557 dictionary_doc, /* tp_doc */
3558 dict_traverse, /* tp_traverse */
3559 dict_tp_clear, /* tp_clear */
3560 dict_richcompare, /* tp_richcompare */
3561 0, /* tp_weaklistoffset */
3562 (getiterfunc)dict_iter, /* tp_iter */
3563 0, /* tp_iternext */
3564 mapp_methods, /* tp_methods */
3565 0, /* tp_members */
3566 0, /* tp_getset */
3567 0, /* tp_base */
3568 0, /* tp_dict */
3569 0, /* tp_descr_get */
3570 0, /* tp_descr_set */
3571 0, /* tp_dictoffset */
3572 dict_init, /* tp_init */
3573 PyType_GenericAlloc, /* tp_alloc */
3574 dict_new, /* tp_new */
3575 PyObject_GC_Del, /* tp_free */
3576 .tp_vectorcall = dict_vectorcall,
3577 };
3578
3579 /* For backward compatibility with old dictionary interface */
3580
3581 PyObject *
PyDict_GetItemString(PyObject * v,const char * key)3582 PyDict_GetItemString(PyObject *v, const char *key)
3583 {
3584 PyObject *kv, *rv;
3585 kv = PyUnicode_FromString(key);
3586 if (kv == NULL) {
3587 PyErr_Clear();
3588 return NULL;
3589 }
3590 rv = PyDict_GetItem(v, kv);
3591 Py_DECREF(kv);
3592 return rv;
3593 }
3594
3595 int
_PyDict_SetItemId(PyObject * v,struct _Py_Identifier * key,PyObject * item)3596 _PyDict_SetItemId(PyObject *v, struct _Py_Identifier *key, PyObject *item)
3597 {
3598 PyObject *kv;
3599 kv = _PyUnicode_FromId(key); /* borrowed */
3600 if (kv == NULL)
3601 return -1;
3602 return PyDict_SetItem(v, kv, item);
3603 }
3604
3605 int
PyDict_SetItemString(PyObject * v,const char * key,PyObject * item)3606 PyDict_SetItemString(PyObject *v, const char *key, PyObject *item)
3607 {
3608 PyObject *kv;
3609 int err;
3610 kv = PyUnicode_FromString(key);
3611 if (kv == NULL)
3612 return -1;
3613 PyUnicode_InternInPlace(&kv); /* XXX Should we really? */
3614 err = PyDict_SetItem(v, kv, item);
3615 Py_DECREF(kv);
3616 return err;
3617 }
3618
3619 int
_PyDict_DelItemId(PyObject * v,_Py_Identifier * key)3620 _PyDict_DelItemId(PyObject *v, _Py_Identifier *key)
3621 {
3622 PyObject *kv = _PyUnicode_FromId(key); /* borrowed */
3623 if (kv == NULL)
3624 return -1;
3625 return PyDict_DelItem(v, kv);
3626 }
3627
3628 int
PyDict_DelItemString(PyObject * v,const char * key)3629 PyDict_DelItemString(PyObject *v, const char *key)
3630 {
3631 PyObject *kv;
3632 int err;
3633 kv = PyUnicode_FromString(key);
3634 if (kv == NULL)
3635 return -1;
3636 err = PyDict_DelItem(v, kv);
3637 Py_DECREF(kv);
3638 return err;
3639 }
3640
3641 /* Dictionary iterator types */
3642
3643 typedef struct {
3644 PyObject_HEAD
3645 PyDictObject *di_dict; /* Set to NULL when iterator is exhausted */
3646 Py_ssize_t di_used;
3647 Py_ssize_t di_pos;
3648 PyObject* di_result; /* reusable result tuple for iteritems */
3649 Py_ssize_t len;
3650 } dictiterobject;
3651
3652 static PyObject *
dictiter_new(PyDictObject * dict,PyTypeObject * itertype)3653 dictiter_new(PyDictObject *dict, PyTypeObject *itertype)
3654 {
3655 dictiterobject *di;
3656 di = PyObject_GC_New(dictiterobject, itertype);
3657 if (di == NULL) {
3658 return NULL;
3659 }
3660 Py_INCREF(dict);
3661 di->di_dict = dict;
3662 di->di_used = dict->ma_used;
3663 di->len = dict->ma_used;
3664 if (itertype == &PyDictRevIterKey_Type ||
3665 itertype == &PyDictRevIterItem_Type ||
3666 itertype == &PyDictRevIterValue_Type) {
3667 if (dict->ma_values) {
3668 di->di_pos = dict->ma_used - 1;
3669 }
3670 else {
3671 di->di_pos = dict->ma_keys->dk_nentries - 1;
3672 }
3673 }
3674 else {
3675 di->di_pos = 0;
3676 }
3677 if (itertype == &PyDictIterItem_Type ||
3678 itertype == &PyDictRevIterItem_Type) {
3679 di->di_result = PyTuple_Pack(2, Py_None, Py_None);
3680 if (di->di_result == NULL) {
3681 Py_DECREF(di);
3682 return NULL;
3683 }
3684 }
3685 else {
3686 di->di_result = NULL;
3687 }
3688 _PyObject_GC_TRACK(di);
3689 return (PyObject *)di;
3690 }
3691
3692 static void
dictiter_dealloc(dictiterobject * di)3693 dictiter_dealloc(dictiterobject *di)
3694 {
3695 /* bpo-31095: UnTrack is needed before calling any callbacks */
3696 _PyObject_GC_UNTRACK(di);
3697 Py_XDECREF(di->di_dict);
3698 Py_XDECREF(di->di_result);
3699 PyObject_GC_Del(di);
3700 }
3701
3702 static int
dictiter_traverse(dictiterobject * di,visitproc visit,void * arg)3703 dictiter_traverse(dictiterobject *di, visitproc visit, void *arg)
3704 {
3705 Py_VISIT(di->di_dict);
3706 Py_VISIT(di->di_result);
3707 return 0;
3708 }
3709
3710 static PyObject *
dictiter_len(dictiterobject * di,PyObject * Py_UNUSED (ignored))3711 dictiter_len(dictiterobject *di, PyObject *Py_UNUSED(ignored))
3712 {
3713 Py_ssize_t len = 0;
3714 if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used)
3715 len = di->len;
3716 return PyLong_FromSize_t(len);
3717 }
3718
3719 PyDoc_STRVAR(length_hint_doc,
3720 "Private method returning an estimate of len(list(it)).");
3721
3722 static PyObject *
3723 dictiter_reduce(dictiterobject *di, PyObject *Py_UNUSED(ignored));
3724
3725 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3726
3727 static PyMethodDef dictiter_methods[] = {
3728 {"__length_hint__", (PyCFunction)(void(*)(void))dictiter_len, METH_NOARGS,
3729 length_hint_doc},
3730 {"__reduce__", (PyCFunction)(void(*)(void))dictiter_reduce, METH_NOARGS,
3731 reduce_doc},
3732 {NULL, NULL} /* sentinel */
3733 };
3734
3735 static PyObject*
dictiter_iternextkey(dictiterobject * di)3736 dictiter_iternextkey(dictiterobject *di)
3737 {
3738 PyObject *key;
3739 Py_ssize_t i;
3740 PyDictKeysObject *k;
3741 PyDictObject *d = di->di_dict;
3742
3743 if (d == NULL)
3744 return NULL;
3745 assert (PyDict_Check(d));
3746
3747 if (di->di_used != d->ma_used) {
3748 PyErr_SetString(PyExc_RuntimeError,
3749 "dictionary changed size during iteration");
3750 di->di_used = -1; /* Make this state sticky */
3751 return NULL;
3752 }
3753
3754 i = di->di_pos;
3755 k = d->ma_keys;
3756 assert(i >= 0);
3757 if (d->ma_values) {
3758 if (i >= d->ma_used)
3759 goto fail;
3760 key = DK_ENTRIES(k)[i].me_key;
3761 assert(d->ma_values[i] != NULL);
3762 }
3763 else {
3764 Py_ssize_t n = k->dk_nentries;
3765 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(k)[i];
3766 while (i < n && entry_ptr->me_value == NULL) {
3767 entry_ptr++;
3768 i++;
3769 }
3770 if (i >= n)
3771 goto fail;
3772 key = entry_ptr->me_key;
3773 }
3774 // We found an element (key), but did not expect it
3775 if (di->len == 0) {
3776 PyErr_SetString(PyExc_RuntimeError,
3777 "dictionary keys changed during iteration");
3778 goto fail;
3779 }
3780 di->di_pos = i+1;
3781 di->len--;
3782 Py_INCREF(key);
3783 return key;
3784
3785 fail:
3786 di->di_dict = NULL;
3787 Py_DECREF(d);
3788 return NULL;
3789 }
3790
3791 PyTypeObject PyDictIterKey_Type = {
3792 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3793 "dict_keyiterator", /* tp_name */
3794 sizeof(dictiterobject), /* tp_basicsize */
3795 0, /* tp_itemsize */
3796 /* methods */
3797 (destructor)dictiter_dealloc, /* tp_dealloc */
3798 0, /* tp_vectorcall_offset */
3799 0, /* tp_getattr */
3800 0, /* tp_setattr */
3801 0, /* tp_as_async */
3802 0, /* tp_repr */
3803 0, /* tp_as_number */
3804 0, /* tp_as_sequence */
3805 0, /* tp_as_mapping */
3806 0, /* tp_hash */
3807 0, /* tp_call */
3808 0, /* tp_str */
3809 PyObject_GenericGetAttr, /* tp_getattro */
3810 0, /* tp_setattro */
3811 0, /* tp_as_buffer */
3812 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3813 0, /* tp_doc */
3814 (traverseproc)dictiter_traverse, /* tp_traverse */
3815 0, /* tp_clear */
3816 0, /* tp_richcompare */
3817 0, /* tp_weaklistoffset */
3818 PyObject_SelfIter, /* tp_iter */
3819 (iternextfunc)dictiter_iternextkey, /* tp_iternext */
3820 dictiter_methods, /* tp_methods */
3821 0,
3822 };
3823
3824 static PyObject *
dictiter_iternextvalue(dictiterobject * di)3825 dictiter_iternextvalue(dictiterobject *di)
3826 {
3827 PyObject *value;
3828 Py_ssize_t i;
3829 PyDictObject *d = di->di_dict;
3830
3831 if (d == NULL)
3832 return NULL;
3833 assert (PyDict_Check(d));
3834
3835 if (di->di_used != d->ma_used) {
3836 PyErr_SetString(PyExc_RuntimeError,
3837 "dictionary changed size during iteration");
3838 di->di_used = -1; /* Make this state sticky */
3839 return NULL;
3840 }
3841
3842 i = di->di_pos;
3843 assert(i >= 0);
3844 if (d->ma_values) {
3845 if (i >= d->ma_used)
3846 goto fail;
3847 value = d->ma_values[i];
3848 assert(value != NULL);
3849 }
3850 else {
3851 Py_ssize_t n = d->ma_keys->dk_nentries;
3852 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
3853 while (i < n && entry_ptr->me_value == NULL) {
3854 entry_ptr++;
3855 i++;
3856 }
3857 if (i >= n)
3858 goto fail;
3859 value = entry_ptr->me_value;
3860 }
3861 // We found an element, but did not expect it
3862 if (di->len == 0) {
3863 PyErr_SetString(PyExc_RuntimeError,
3864 "dictionary keys changed during iteration");
3865 goto fail;
3866 }
3867 di->di_pos = i+1;
3868 di->len--;
3869 Py_INCREF(value);
3870 return value;
3871
3872 fail:
3873 di->di_dict = NULL;
3874 Py_DECREF(d);
3875 return NULL;
3876 }
3877
3878 PyTypeObject PyDictIterValue_Type = {
3879 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3880 "dict_valueiterator", /* tp_name */
3881 sizeof(dictiterobject), /* tp_basicsize */
3882 0, /* tp_itemsize */
3883 /* methods */
3884 (destructor)dictiter_dealloc, /* tp_dealloc */
3885 0, /* tp_vectorcall_offset */
3886 0, /* tp_getattr */
3887 0, /* tp_setattr */
3888 0, /* tp_as_async */
3889 0, /* tp_repr */
3890 0, /* tp_as_number */
3891 0, /* tp_as_sequence */
3892 0, /* tp_as_mapping */
3893 0, /* tp_hash */
3894 0, /* tp_call */
3895 0, /* tp_str */
3896 PyObject_GenericGetAttr, /* tp_getattro */
3897 0, /* tp_setattro */
3898 0, /* tp_as_buffer */
3899 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3900 0, /* tp_doc */
3901 (traverseproc)dictiter_traverse, /* tp_traverse */
3902 0, /* tp_clear */
3903 0, /* tp_richcompare */
3904 0, /* tp_weaklistoffset */
3905 PyObject_SelfIter, /* tp_iter */
3906 (iternextfunc)dictiter_iternextvalue, /* tp_iternext */
3907 dictiter_methods, /* tp_methods */
3908 0,
3909 };
3910
3911 static PyObject *
dictiter_iternextitem(dictiterobject * di)3912 dictiter_iternextitem(dictiterobject *di)
3913 {
3914 PyObject *key, *value, *result;
3915 Py_ssize_t i;
3916 PyDictObject *d = di->di_dict;
3917
3918 if (d == NULL)
3919 return NULL;
3920 assert (PyDict_Check(d));
3921
3922 if (di->di_used != d->ma_used) {
3923 PyErr_SetString(PyExc_RuntimeError,
3924 "dictionary changed size during iteration");
3925 di->di_used = -1; /* Make this state sticky */
3926 return NULL;
3927 }
3928
3929 i = di->di_pos;
3930 assert(i >= 0);
3931 if (d->ma_values) {
3932 if (i >= d->ma_used)
3933 goto fail;
3934 key = DK_ENTRIES(d->ma_keys)[i].me_key;
3935 value = d->ma_values[i];
3936 assert(value != NULL);
3937 }
3938 else {
3939 Py_ssize_t n = d->ma_keys->dk_nentries;
3940 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
3941 while (i < n && entry_ptr->me_value == NULL) {
3942 entry_ptr++;
3943 i++;
3944 }
3945 if (i >= n)
3946 goto fail;
3947 key = entry_ptr->me_key;
3948 value = entry_ptr->me_value;
3949 }
3950 // We found an element, but did not expect it
3951 if (di->len == 0) {
3952 PyErr_SetString(PyExc_RuntimeError,
3953 "dictionary keys changed during iteration");
3954 goto fail;
3955 }
3956 di->di_pos = i+1;
3957 di->len--;
3958 Py_INCREF(key);
3959 Py_INCREF(value);
3960 result = di->di_result;
3961 if (Py_REFCNT(result) == 1) {
3962 PyObject *oldkey = PyTuple_GET_ITEM(result, 0);
3963 PyObject *oldvalue = PyTuple_GET_ITEM(result, 1);
3964 PyTuple_SET_ITEM(result, 0, key); /* steals reference */
3965 PyTuple_SET_ITEM(result, 1, value); /* steals reference */
3966 Py_INCREF(result);
3967 Py_DECREF(oldkey);
3968 Py_DECREF(oldvalue);
3969 // bpo-42536: The GC may have untracked this result tuple. Since we're
3970 // recycling it, make sure it's tracked again:
3971 if (!_PyObject_GC_IS_TRACKED(result)) {
3972 _PyObject_GC_TRACK(result);
3973 }
3974 }
3975 else {
3976 result = PyTuple_New(2);
3977 if (result == NULL)
3978 return NULL;
3979 PyTuple_SET_ITEM(result, 0, key); /* steals reference */
3980 PyTuple_SET_ITEM(result, 1, value); /* steals reference */
3981 }
3982 return result;
3983
3984 fail:
3985 di->di_dict = NULL;
3986 Py_DECREF(d);
3987 return NULL;
3988 }
3989
3990 PyTypeObject PyDictIterItem_Type = {
3991 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3992 "dict_itemiterator", /* tp_name */
3993 sizeof(dictiterobject), /* tp_basicsize */
3994 0, /* tp_itemsize */
3995 /* methods */
3996 (destructor)dictiter_dealloc, /* tp_dealloc */
3997 0, /* tp_vectorcall_offset */
3998 0, /* tp_getattr */
3999 0, /* tp_setattr */
4000 0, /* tp_as_async */
4001 0, /* tp_repr */
4002 0, /* tp_as_number */
4003 0, /* tp_as_sequence */
4004 0, /* tp_as_mapping */
4005 0, /* tp_hash */
4006 0, /* tp_call */
4007 0, /* tp_str */
4008 PyObject_GenericGetAttr, /* tp_getattro */
4009 0, /* tp_setattro */
4010 0, /* tp_as_buffer */
4011 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4012 0, /* tp_doc */
4013 (traverseproc)dictiter_traverse, /* tp_traverse */
4014 0, /* tp_clear */
4015 0, /* tp_richcompare */
4016 0, /* tp_weaklistoffset */
4017 PyObject_SelfIter, /* tp_iter */
4018 (iternextfunc)dictiter_iternextitem, /* tp_iternext */
4019 dictiter_methods, /* tp_methods */
4020 0,
4021 };
4022
4023
4024 /* dictreviter */
4025
4026 static PyObject *
dictreviter_iternext(dictiterobject * di)4027 dictreviter_iternext(dictiterobject *di)
4028 {
4029 PyDictObject *d = di->di_dict;
4030
4031 if (d == NULL) {
4032 return NULL;
4033 }
4034 assert (PyDict_Check(d));
4035
4036 if (di->di_used != d->ma_used) {
4037 PyErr_SetString(PyExc_RuntimeError,
4038 "dictionary changed size during iteration");
4039 di->di_used = -1; /* Make this state sticky */
4040 return NULL;
4041 }
4042
4043 Py_ssize_t i = di->di_pos;
4044 PyDictKeysObject *k = d->ma_keys;
4045 PyObject *key, *value, *result;
4046
4047 if (i < 0) {
4048 goto fail;
4049 }
4050 if (d->ma_values) {
4051 key = DK_ENTRIES(k)[i].me_key;
4052 value = d->ma_values[i];
4053 assert (value != NULL);
4054 }
4055 else {
4056 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(k)[i];
4057 while (entry_ptr->me_value == NULL) {
4058 if (--i < 0) {
4059 goto fail;
4060 }
4061 entry_ptr--;
4062 }
4063 key = entry_ptr->me_key;
4064 value = entry_ptr->me_value;
4065 }
4066 di->di_pos = i-1;
4067 di->len--;
4068
4069 if (Py_IS_TYPE(di, &PyDictRevIterKey_Type)) {
4070 Py_INCREF(key);
4071 return key;
4072 }
4073 else if (Py_IS_TYPE(di, &PyDictRevIterValue_Type)) {
4074 Py_INCREF(value);
4075 return value;
4076 }
4077 else if (Py_IS_TYPE(di, &PyDictRevIterItem_Type)) {
4078 Py_INCREF(key);
4079 Py_INCREF(value);
4080 result = di->di_result;
4081 if (Py_REFCNT(result) == 1) {
4082 PyObject *oldkey = PyTuple_GET_ITEM(result, 0);
4083 PyObject *oldvalue = PyTuple_GET_ITEM(result, 1);
4084 PyTuple_SET_ITEM(result, 0, key); /* steals reference */
4085 PyTuple_SET_ITEM(result, 1, value); /* steals reference */
4086 Py_INCREF(result);
4087 Py_DECREF(oldkey);
4088 Py_DECREF(oldvalue);
4089 // bpo-42536: The GC may have untracked this result tuple. Since
4090 // we're recycling it, make sure it's tracked again:
4091 if (!_PyObject_GC_IS_TRACKED(result)) {
4092 _PyObject_GC_TRACK(result);
4093 }
4094 }
4095 else {
4096 result = PyTuple_New(2);
4097 if (result == NULL) {
4098 return NULL;
4099 }
4100 PyTuple_SET_ITEM(result, 0, key); /* steals reference */
4101 PyTuple_SET_ITEM(result, 1, value); /* steals reference */
4102 }
4103 return result;
4104 }
4105 else {
4106 Py_UNREACHABLE();
4107 }
4108
4109 fail:
4110 di->di_dict = NULL;
4111 Py_DECREF(d);
4112 return NULL;
4113 }
4114
4115 PyTypeObject PyDictRevIterKey_Type = {
4116 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4117 "dict_reversekeyiterator",
4118 sizeof(dictiterobject),
4119 .tp_dealloc = (destructor)dictiter_dealloc,
4120 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
4121 .tp_traverse = (traverseproc)dictiter_traverse,
4122 .tp_iter = PyObject_SelfIter,
4123 .tp_iternext = (iternextfunc)dictreviter_iternext,
4124 .tp_methods = dictiter_methods
4125 };
4126
4127
4128 /*[clinic input]
4129 dict.__reversed__
4130
4131 Return a reverse iterator over the dict keys.
4132 [clinic start generated code]*/
4133
4134 static PyObject *
dict___reversed___impl(PyDictObject * self)4135 dict___reversed___impl(PyDictObject *self)
4136 /*[clinic end generated code: output=e674483336d1ed51 input=23210ef3477d8c4d]*/
4137 {
4138 assert (PyDict_Check(self));
4139 return dictiter_new(self, &PyDictRevIterKey_Type);
4140 }
4141
4142 static PyObject *
dictiter_reduce(dictiterobject * di,PyObject * Py_UNUSED (ignored))4143 dictiter_reduce(dictiterobject *di, PyObject *Py_UNUSED(ignored))
4144 {
4145 _Py_IDENTIFIER(iter);
4146 /* copy the iterator state */
4147 dictiterobject tmp = *di;
4148 Py_XINCREF(tmp.di_dict);
4149
4150 PyObject *list = PySequence_List((PyObject*)&tmp);
4151 Py_XDECREF(tmp.di_dict);
4152 if (list == NULL) {
4153 return NULL;
4154 }
4155 return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), list);
4156 }
4157
4158 PyTypeObject PyDictRevIterItem_Type = {
4159 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4160 "dict_reverseitemiterator",
4161 sizeof(dictiterobject),
4162 .tp_dealloc = (destructor)dictiter_dealloc,
4163 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
4164 .tp_traverse = (traverseproc)dictiter_traverse,
4165 .tp_iter = PyObject_SelfIter,
4166 .tp_iternext = (iternextfunc)dictreviter_iternext,
4167 .tp_methods = dictiter_methods
4168 };
4169
4170 PyTypeObject PyDictRevIterValue_Type = {
4171 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4172 "dict_reversevalueiterator",
4173 sizeof(dictiterobject),
4174 .tp_dealloc = (destructor)dictiter_dealloc,
4175 .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
4176 .tp_traverse = (traverseproc)dictiter_traverse,
4177 .tp_iter = PyObject_SelfIter,
4178 .tp_iternext = (iternextfunc)dictreviter_iternext,
4179 .tp_methods = dictiter_methods
4180 };
4181
4182 /***********************************************/
4183 /* View objects for keys(), items(), values(). */
4184 /***********************************************/
4185
4186 /* The instance lay-out is the same for all three; but the type differs. */
4187
4188 static void
dictview_dealloc(_PyDictViewObject * dv)4189 dictview_dealloc(_PyDictViewObject *dv)
4190 {
4191 /* bpo-31095: UnTrack is needed before calling any callbacks */
4192 _PyObject_GC_UNTRACK(dv);
4193 Py_XDECREF(dv->dv_dict);
4194 PyObject_GC_Del(dv);
4195 }
4196
4197 static int
dictview_traverse(_PyDictViewObject * dv,visitproc visit,void * arg)4198 dictview_traverse(_PyDictViewObject *dv, visitproc visit, void *arg)
4199 {
4200 Py_VISIT(dv->dv_dict);
4201 return 0;
4202 }
4203
4204 static Py_ssize_t
dictview_len(_PyDictViewObject * dv)4205 dictview_len(_PyDictViewObject *dv)
4206 {
4207 Py_ssize_t len = 0;
4208 if (dv->dv_dict != NULL)
4209 len = dv->dv_dict->ma_used;
4210 return len;
4211 }
4212
4213 PyObject *
_PyDictView_New(PyObject * dict,PyTypeObject * type)4214 _PyDictView_New(PyObject *dict, PyTypeObject *type)
4215 {
4216 _PyDictViewObject *dv;
4217 if (dict == NULL) {
4218 PyErr_BadInternalCall();
4219 return NULL;
4220 }
4221 if (!PyDict_Check(dict)) {
4222 /* XXX Get rid of this restriction later */
4223 PyErr_Format(PyExc_TypeError,
4224 "%s() requires a dict argument, not '%s'",
4225 type->tp_name, Py_TYPE(dict)->tp_name);
4226 return NULL;
4227 }
4228 dv = PyObject_GC_New(_PyDictViewObject, type);
4229 if (dv == NULL)
4230 return NULL;
4231 Py_INCREF(dict);
4232 dv->dv_dict = (PyDictObject *)dict;
4233 _PyObject_GC_TRACK(dv);
4234 return (PyObject *)dv;
4235 }
4236
4237 static PyObject *
dictview_mapping(PyObject * view,void * Py_UNUSED (ignored))4238 dictview_mapping(PyObject *view, void *Py_UNUSED(ignored)) {
4239 assert(view != NULL);
4240 assert(PyDictKeys_Check(view)
4241 || PyDictValues_Check(view)
4242 || PyDictItems_Check(view));
4243 PyObject *mapping = (PyObject *)((_PyDictViewObject *)view)->dv_dict;
4244 return PyDictProxy_New(mapping);
4245 }
4246
4247 static PyGetSetDef dictview_getset[] = {
4248 {"mapping", dictview_mapping, (setter)NULL,
4249 "dictionary that this view refers to", NULL},
4250 {0}
4251 };
4252
4253 /* TODO(guido): The views objects are not complete:
4254
4255 * support more set operations
4256 * support arbitrary mappings?
4257 - either these should be static or exported in dictobject.h
4258 - if public then they should probably be in builtins
4259 */
4260
4261 /* Return 1 if self is a subset of other, iterating over self;
4262 0 if not; -1 if an error occurred. */
4263 static int
all_contained_in(PyObject * self,PyObject * other)4264 all_contained_in(PyObject *self, PyObject *other)
4265 {
4266 PyObject *iter = PyObject_GetIter(self);
4267 int ok = 1;
4268
4269 if (iter == NULL)
4270 return -1;
4271 for (;;) {
4272 PyObject *next = PyIter_Next(iter);
4273 if (next == NULL) {
4274 if (PyErr_Occurred())
4275 ok = -1;
4276 break;
4277 }
4278 ok = PySequence_Contains(other, next);
4279 Py_DECREF(next);
4280 if (ok <= 0)
4281 break;
4282 }
4283 Py_DECREF(iter);
4284 return ok;
4285 }
4286
4287 static PyObject *
dictview_richcompare(PyObject * self,PyObject * other,int op)4288 dictview_richcompare(PyObject *self, PyObject *other, int op)
4289 {
4290 Py_ssize_t len_self, len_other;
4291 int ok;
4292 PyObject *result;
4293
4294 assert(self != NULL);
4295 assert(PyDictViewSet_Check(self));
4296 assert(other != NULL);
4297
4298 if (!PyAnySet_Check(other) && !PyDictViewSet_Check(other))
4299 Py_RETURN_NOTIMPLEMENTED;
4300
4301 len_self = PyObject_Size(self);
4302 if (len_self < 0)
4303 return NULL;
4304 len_other = PyObject_Size(other);
4305 if (len_other < 0)
4306 return NULL;
4307
4308 ok = 0;
4309 switch(op) {
4310
4311 case Py_NE:
4312 case Py_EQ:
4313 if (len_self == len_other)
4314 ok = all_contained_in(self, other);
4315 if (op == Py_NE && ok >= 0)
4316 ok = !ok;
4317 break;
4318
4319 case Py_LT:
4320 if (len_self < len_other)
4321 ok = all_contained_in(self, other);
4322 break;
4323
4324 case Py_LE:
4325 if (len_self <= len_other)
4326 ok = all_contained_in(self, other);
4327 break;
4328
4329 case Py_GT:
4330 if (len_self > len_other)
4331 ok = all_contained_in(other, self);
4332 break;
4333
4334 case Py_GE:
4335 if (len_self >= len_other)
4336 ok = all_contained_in(other, self);
4337 break;
4338
4339 }
4340 if (ok < 0)
4341 return NULL;
4342 result = ok ? Py_True : Py_False;
4343 Py_INCREF(result);
4344 return result;
4345 }
4346
4347 static PyObject *
dictview_repr(_PyDictViewObject * dv)4348 dictview_repr(_PyDictViewObject *dv)
4349 {
4350 PyObject *seq;
4351 PyObject *result = NULL;
4352 Py_ssize_t rc;
4353
4354 rc = Py_ReprEnter((PyObject *)dv);
4355 if (rc != 0) {
4356 return rc > 0 ? PyUnicode_FromString("...") : NULL;
4357 }
4358 seq = PySequence_List((PyObject *)dv);
4359 if (seq == NULL) {
4360 goto Done;
4361 }
4362 result = PyUnicode_FromFormat("%s(%R)", Py_TYPE(dv)->tp_name, seq);
4363 Py_DECREF(seq);
4364
4365 Done:
4366 Py_ReprLeave((PyObject *)dv);
4367 return result;
4368 }
4369
4370 /*** dict_keys ***/
4371
4372 static PyObject *
dictkeys_iter(_PyDictViewObject * dv)4373 dictkeys_iter(_PyDictViewObject *dv)
4374 {
4375 if (dv->dv_dict == NULL) {
4376 Py_RETURN_NONE;
4377 }
4378 return dictiter_new(dv->dv_dict, &PyDictIterKey_Type);
4379 }
4380
4381 static int
dictkeys_contains(_PyDictViewObject * dv,PyObject * obj)4382 dictkeys_contains(_PyDictViewObject *dv, PyObject *obj)
4383 {
4384 if (dv->dv_dict == NULL)
4385 return 0;
4386 return PyDict_Contains((PyObject *)dv->dv_dict, obj);
4387 }
4388
4389 static PySequenceMethods dictkeys_as_sequence = {
4390 (lenfunc)dictview_len, /* sq_length */
4391 0, /* sq_concat */
4392 0, /* sq_repeat */
4393 0, /* sq_item */
4394 0, /* sq_slice */
4395 0, /* sq_ass_item */
4396 0, /* sq_ass_slice */
4397 (objobjproc)dictkeys_contains, /* sq_contains */
4398 };
4399
4400 // Create an set object from dictviews object.
4401 // Returns a new reference.
4402 // This utility function is used by set operations.
4403 static PyObject*
dictviews_to_set(PyObject * self)4404 dictviews_to_set(PyObject *self)
4405 {
4406 PyObject *left = self;
4407 if (PyDictKeys_Check(self)) {
4408 // PySet_New() has fast path for the dict object.
4409 PyObject *dict = (PyObject *)((_PyDictViewObject *)self)->dv_dict;
4410 if (PyDict_CheckExact(dict)) {
4411 left = dict;
4412 }
4413 }
4414 return PySet_New(left);
4415 }
4416
4417 static PyObject*
dictviews_sub(PyObject * self,PyObject * other)4418 dictviews_sub(PyObject *self, PyObject *other)
4419 {
4420 PyObject *result = dictviews_to_set(self);
4421 if (result == NULL) {
4422 return NULL;
4423 }
4424
4425 _Py_IDENTIFIER(difference_update);
4426 PyObject *tmp = _PyObject_CallMethodIdOneArg(
4427 result, &PyId_difference_update, other);
4428 if (tmp == NULL) {
4429 Py_DECREF(result);
4430 return NULL;
4431 }
4432
4433 Py_DECREF(tmp);
4434 return result;
4435 }
4436
4437 static int
4438 dictitems_contains(_PyDictViewObject *dv, PyObject *obj);
4439
4440 PyObject *
_PyDictView_Intersect(PyObject * self,PyObject * other)4441 _PyDictView_Intersect(PyObject* self, PyObject *other)
4442 {
4443 PyObject *result;
4444 PyObject *it;
4445 PyObject *key;
4446 Py_ssize_t len_self;
4447 int rv;
4448 int (*dict_contains)(_PyDictViewObject *, PyObject *);
4449
4450 /* Python interpreter swaps parameters when dict view
4451 is on right side of & */
4452 if (!PyDictViewSet_Check(self)) {
4453 PyObject *tmp = other;
4454 other = self;
4455 self = tmp;
4456 }
4457
4458 len_self = dictview_len((_PyDictViewObject *)self);
4459
4460 /* if other is a set and self is smaller than other,
4461 reuse set intersection logic */
4462 if (PySet_CheckExact(other) && len_self <= PyObject_Size(other)) {
4463 _Py_IDENTIFIER(intersection);
4464 return _PyObject_CallMethodIdObjArgs(other, &PyId_intersection, self, NULL);
4465 }
4466
4467 /* if other is another dict view, and it is bigger than self,
4468 swap them */
4469 if (PyDictViewSet_Check(other)) {
4470 Py_ssize_t len_other = dictview_len((_PyDictViewObject *)other);
4471 if (len_other > len_self) {
4472 PyObject *tmp = other;
4473 other = self;
4474 self = tmp;
4475 }
4476 }
4477
4478 /* at this point, two things should be true
4479 1. self is a dictview
4480 2. if other is a dictview then it is smaller than self */
4481 result = PySet_New(NULL);
4482 if (result == NULL)
4483 return NULL;
4484
4485 it = PyObject_GetIter(other);
4486 if (it == NULL) {
4487 Py_DECREF(result);
4488 return NULL;
4489 }
4490
4491 if (PyDictKeys_Check(self)) {
4492 dict_contains = dictkeys_contains;
4493 }
4494 /* else PyDictItems_Check(self) */
4495 else {
4496 dict_contains = dictitems_contains;
4497 }
4498
4499 while ((key = PyIter_Next(it)) != NULL) {
4500 rv = dict_contains((_PyDictViewObject *)self, key);
4501 if (rv < 0) {
4502 goto error;
4503 }
4504 if (rv) {
4505 if (PySet_Add(result, key)) {
4506 goto error;
4507 }
4508 }
4509 Py_DECREF(key);
4510 }
4511 Py_DECREF(it);
4512 if (PyErr_Occurred()) {
4513 Py_DECREF(result);
4514 return NULL;
4515 }
4516 return result;
4517
4518 error:
4519 Py_DECREF(it);
4520 Py_DECREF(result);
4521 Py_DECREF(key);
4522 return NULL;
4523 }
4524
4525 static PyObject*
dictviews_or(PyObject * self,PyObject * other)4526 dictviews_or(PyObject* self, PyObject *other)
4527 {
4528 PyObject *result = dictviews_to_set(self);
4529 if (result == NULL) {
4530 return NULL;
4531 }
4532
4533 if (_PySet_Update(result, other) < 0) {
4534 Py_DECREF(result);
4535 return NULL;
4536 }
4537 return result;
4538 }
4539
4540 static PyObject *
dictitems_xor(PyObject * self,PyObject * other)4541 dictitems_xor(PyObject *self, PyObject *other)
4542 {
4543 assert(PyDictItems_Check(self));
4544 assert(PyDictItems_Check(other));
4545 PyObject *d1 = (PyObject *)((_PyDictViewObject *)self)->dv_dict;
4546 PyObject *d2 = (PyObject *)((_PyDictViewObject *)other)->dv_dict;
4547
4548 PyObject *temp_dict = PyDict_Copy(d1);
4549 if (temp_dict == NULL) {
4550 return NULL;
4551 }
4552 PyObject *result_set = PySet_New(NULL);
4553 if (result_set == NULL) {
4554 Py_CLEAR(temp_dict);
4555 return NULL;
4556 }
4557
4558 PyObject *key = NULL, *val1 = NULL, *val2 = NULL;
4559 Py_ssize_t pos = 0;
4560 Py_hash_t hash;
4561
4562 while (_PyDict_Next(d2, &pos, &key, &val2, &hash)) {
4563 Py_INCREF(key);
4564 Py_INCREF(val2);
4565 val1 = _PyDict_GetItem_KnownHash(temp_dict, key, hash);
4566
4567 int to_delete;
4568 if (val1 == NULL) {
4569 if (PyErr_Occurred()) {
4570 goto error;
4571 }
4572 to_delete = 0;
4573 }
4574 else {
4575 Py_INCREF(val1);
4576 to_delete = PyObject_RichCompareBool(val1, val2, Py_EQ);
4577 if (to_delete < 0) {
4578 goto error;
4579 }
4580 }
4581
4582 if (to_delete) {
4583 if (_PyDict_DelItem_KnownHash(temp_dict, key, hash) < 0) {
4584 goto error;
4585 }
4586 }
4587 else {
4588 PyObject *pair = PyTuple_Pack(2, key, val2);
4589 if (pair == NULL) {
4590 goto error;
4591 }
4592 if (PySet_Add(result_set, pair) < 0) {
4593 Py_DECREF(pair);
4594 goto error;
4595 }
4596 Py_DECREF(pair);
4597 }
4598 Py_DECREF(key);
4599 Py_XDECREF(val1);
4600 Py_DECREF(val2);
4601 }
4602 key = val1 = val2 = NULL;
4603
4604 _Py_IDENTIFIER(items);
4605 PyObject *remaining_pairs = _PyObject_CallMethodIdNoArgs(temp_dict,
4606 &PyId_items);
4607 if (remaining_pairs == NULL) {
4608 goto error;
4609 }
4610 if (_PySet_Update(result_set, remaining_pairs) < 0) {
4611 Py_DECREF(remaining_pairs);
4612 goto error;
4613 }
4614 Py_DECREF(temp_dict);
4615 Py_DECREF(remaining_pairs);
4616 return result_set;
4617
4618 error:
4619 Py_XDECREF(temp_dict);
4620 Py_XDECREF(result_set);
4621 Py_XDECREF(key);
4622 Py_XDECREF(val1);
4623 Py_XDECREF(val2);
4624 return NULL;
4625 }
4626
4627 static PyObject*
dictviews_xor(PyObject * self,PyObject * other)4628 dictviews_xor(PyObject* self, PyObject *other)
4629 {
4630 if (PyDictItems_Check(self) && PyDictItems_Check(other)) {
4631 return dictitems_xor(self, other);
4632 }
4633 PyObject *result = dictviews_to_set(self);
4634 if (result == NULL) {
4635 return NULL;
4636 }
4637
4638 _Py_IDENTIFIER(symmetric_difference_update);
4639 PyObject *tmp = _PyObject_CallMethodIdOneArg(
4640 result, &PyId_symmetric_difference_update, other);
4641 if (tmp == NULL) {
4642 Py_DECREF(result);
4643 return NULL;
4644 }
4645
4646 Py_DECREF(tmp);
4647 return result;
4648 }
4649
4650 static PyNumberMethods dictviews_as_number = {
4651 0, /*nb_add*/
4652 (binaryfunc)dictviews_sub, /*nb_subtract*/
4653 0, /*nb_multiply*/
4654 0, /*nb_remainder*/
4655 0, /*nb_divmod*/
4656 0, /*nb_power*/
4657 0, /*nb_negative*/
4658 0, /*nb_positive*/
4659 0, /*nb_absolute*/
4660 0, /*nb_bool*/
4661 0, /*nb_invert*/
4662 0, /*nb_lshift*/
4663 0, /*nb_rshift*/
4664 (binaryfunc)_PyDictView_Intersect, /*nb_and*/
4665 (binaryfunc)dictviews_xor, /*nb_xor*/
4666 (binaryfunc)dictviews_or, /*nb_or*/
4667 };
4668
4669 static PyObject*
dictviews_isdisjoint(PyObject * self,PyObject * other)4670 dictviews_isdisjoint(PyObject *self, PyObject *other)
4671 {
4672 PyObject *it;
4673 PyObject *item = NULL;
4674
4675 if (self == other) {
4676 if (dictview_len((_PyDictViewObject *)self) == 0)
4677 Py_RETURN_TRUE;
4678 else
4679 Py_RETURN_FALSE;
4680 }
4681
4682 /* Iterate over the shorter object (only if other is a set,
4683 * because PySequence_Contains may be expensive otherwise): */
4684 if (PyAnySet_Check(other) || PyDictViewSet_Check(other)) {
4685 Py_ssize_t len_self = dictview_len((_PyDictViewObject *)self);
4686 Py_ssize_t len_other = PyObject_Size(other);
4687 if (len_other == -1)
4688 return NULL;
4689
4690 if ((len_other > len_self)) {
4691 PyObject *tmp = other;
4692 other = self;
4693 self = tmp;
4694 }
4695 }
4696
4697 it = PyObject_GetIter(other);
4698 if (it == NULL)
4699 return NULL;
4700
4701 while ((item = PyIter_Next(it)) != NULL) {
4702 int contains = PySequence_Contains(self, item);
4703 Py_DECREF(item);
4704 if (contains == -1) {
4705 Py_DECREF(it);
4706 return NULL;
4707 }
4708
4709 if (contains) {
4710 Py_DECREF(it);
4711 Py_RETURN_FALSE;
4712 }
4713 }
4714 Py_DECREF(it);
4715 if (PyErr_Occurred())
4716 return NULL; /* PyIter_Next raised an exception. */
4717 Py_RETURN_TRUE;
4718 }
4719
4720 PyDoc_STRVAR(isdisjoint_doc,
4721 "Return True if the view and the given iterable have a null intersection.");
4722
4723 static PyObject* dictkeys_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored));
4724
4725 PyDoc_STRVAR(reversed_keys_doc,
4726 "Return a reverse iterator over the dict keys.");
4727
4728 static PyMethodDef dictkeys_methods[] = {
4729 {"isdisjoint", (PyCFunction)dictviews_isdisjoint, METH_O,
4730 isdisjoint_doc},
4731 {"__reversed__", (PyCFunction)(void(*)(void))dictkeys_reversed, METH_NOARGS,
4732 reversed_keys_doc},
4733 {NULL, NULL} /* sentinel */
4734 };
4735
4736 PyTypeObject PyDictKeys_Type = {
4737 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4738 "dict_keys", /* tp_name */
4739 sizeof(_PyDictViewObject), /* tp_basicsize */
4740 0, /* tp_itemsize */
4741 /* methods */
4742 (destructor)dictview_dealloc, /* tp_dealloc */
4743 0, /* tp_vectorcall_offset */
4744 0, /* tp_getattr */
4745 0, /* tp_setattr */
4746 0, /* tp_as_async */
4747 (reprfunc)dictview_repr, /* tp_repr */
4748 &dictviews_as_number, /* tp_as_number */
4749 &dictkeys_as_sequence, /* tp_as_sequence */
4750 0, /* tp_as_mapping */
4751 0, /* tp_hash */
4752 0, /* tp_call */
4753 0, /* tp_str */
4754 PyObject_GenericGetAttr, /* tp_getattro */
4755 0, /* tp_setattro */
4756 0, /* tp_as_buffer */
4757 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4758 0, /* tp_doc */
4759 (traverseproc)dictview_traverse, /* tp_traverse */
4760 0, /* tp_clear */
4761 dictview_richcompare, /* tp_richcompare */
4762 0, /* tp_weaklistoffset */
4763 (getiterfunc)dictkeys_iter, /* tp_iter */
4764 0, /* tp_iternext */
4765 dictkeys_methods, /* tp_methods */
4766 .tp_getset = dictview_getset,
4767 };
4768
4769 static PyObject *
dictkeys_new(PyObject * dict,PyObject * Py_UNUSED (ignored))4770 dictkeys_new(PyObject *dict, PyObject *Py_UNUSED(ignored))
4771 {
4772 return _PyDictView_New(dict, &PyDictKeys_Type);
4773 }
4774
4775 static PyObject *
dictkeys_reversed(_PyDictViewObject * dv,PyObject * Py_UNUSED (ignored))4776 dictkeys_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored))
4777 {
4778 if (dv->dv_dict == NULL) {
4779 Py_RETURN_NONE;
4780 }
4781 return dictiter_new(dv->dv_dict, &PyDictRevIterKey_Type);
4782 }
4783
4784 /*** dict_items ***/
4785
4786 static PyObject *
dictitems_iter(_PyDictViewObject * dv)4787 dictitems_iter(_PyDictViewObject *dv)
4788 {
4789 if (dv->dv_dict == NULL) {
4790 Py_RETURN_NONE;
4791 }
4792 return dictiter_new(dv->dv_dict, &PyDictIterItem_Type);
4793 }
4794
4795 static int
dictitems_contains(_PyDictViewObject * dv,PyObject * obj)4796 dictitems_contains(_PyDictViewObject *dv, PyObject *obj)
4797 {
4798 int result;
4799 PyObject *key, *value, *found;
4800 if (dv->dv_dict == NULL)
4801 return 0;
4802 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 2)
4803 return 0;
4804 key = PyTuple_GET_ITEM(obj, 0);
4805 value = PyTuple_GET_ITEM(obj, 1);
4806 found = PyDict_GetItemWithError((PyObject *)dv->dv_dict, key);
4807 if (found == NULL) {
4808 if (PyErr_Occurred())
4809 return -1;
4810 return 0;
4811 }
4812 Py_INCREF(found);
4813 result = PyObject_RichCompareBool(found, value, Py_EQ);
4814 Py_DECREF(found);
4815 return result;
4816 }
4817
4818 static PySequenceMethods dictitems_as_sequence = {
4819 (lenfunc)dictview_len, /* sq_length */
4820 0, /* sq_concat */
4821 0, /* sq_repeat */
4822 0, /* sq_item */
4823 0, /* sq_slice */
4824 0, /* sq_ass_item */
4825 0, /* sq_ass_slice */
4826 (objobjproc)dictitems_contains, /* sq_contains */
4827 };
4828
4829 static PyObject* dictitems_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored));
4830
4831 PyDoc_STRVAR(reversed_items_doc,
4832 "Return a reverse iterator over the dict items.");
4833
4834 static PyMethodDef dictitems_methods[] = {
4835 {"isdisjoint", (PyCFunction)dictviews_isdisjoint, METH_O,
4836 isdisjoint_doc},
4837 {"__reversed__", (PyCFunction)dictitems_reversed, METH_NOARGS,
4838 reversed_items_doc},
4839 {NULL, NULL} /* sentinel */
4840 };
4841
4842 PyTypeObject PyDictItems_Type = {
4843 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4844 "dict_items", /* tp_name */
4845 sizeof(_PyDictViewObject), /* tp_basicsize */
4846 0, /* tp_itemsize */
4847 /* methods */
4848 (destructor)dictview_dealloc, /* tp_dealloc */
4849 0, /* tp_vectorcall_offset */
4850 0, /* tp_getattr */
4851 0, /* tp_setattr */
4852 0, /* tp_as_async */
4853 (reprfunc)dictview_repr, /* tp_repr */
4854 &dictviews_as_number, /* tp_as_number */
4855 &dictitems_as_sequence, /* tp_as_sequence */
4856 0, /* tp_as_mapping */
4857 0, /* tp_hash */
4858 0, /* tp_call */
4859 0, /* tp_str */
4860 PyObject_GenericGetAttr, /* tp_getattro */
4861 0, /* tp_setattro */
4862 0, /* tp_as_buffer */
4863 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4864 0, /* tp_doc */
4865 (traverseproc)dictview_traverse, /* tp_traverse */
4866 0, /* tp_clear */
4867 dictview_richcompare, /* tp_richcompare */
4868 0, /* tp_weaklistoffset */
4869 (getiterfunc)dictitems_iter, /* tp_iter */
4870 0, /* tp_iternext */
4871 dictitems_methods, /* tp_methods */
4872 .tp_getset = dictview_getset,
4873 };
4874
4875 static PyObject *
dictitems_new(PyObject * dict,PyObject * Py_UNUSED (ignored))4876 dictitems_new(PyObject *dict, PyObject *Py_UNUSED(ignored))
4877 {
4878 return _PyDictView_New(dict, &PyDictItems_Type);
4879 }
4880
4881 static PyObject *
dictitems_reversed(_PyDictViewObject * dv,PyObject * Py_UNUSED (ignored))4882 dictitems_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored))
4883 {
4884 if (dv->dv_dict == NULL) {
4885 Py_RETURN_NONE;
4886 }
4887 return dictiter_new(dv->dv_dict, &PyDictRevIterItem_Type);
4888 }
4889
4890 /*** dict_values ***/
4891
4892 static PyObject *
dictvalues_iter(_PyDictViewObject * dv)4893 dictvalues_iter(_PyDictViewObject *dv)
4894 {
4895 if (dv->dv_dict == NULL) {
4896 Py_RETURN_NONE;
4897 }
4898 return dictiter_new(dv->dv_dict, &PyDictIterValue_Type);
4899 }
4900
4901 static PySequenceMethods dictvalues_as_sequence = {
4902 (lenfunc)dictview_len, /* sq_length */
4903 0, /* sq_concat */
4904 0, /* sq_repeat */
4905 0, /* sq_item */
4906 0, /* sq_slice */
4907 0, /* sq_ass_item */
4908 0, /* sq_ass_slice */
4909 (objobjproc)0, /* sq_contains */
4910 };
4911
4912 static PyObject* dictvalues_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored));
4913
4914 PyDoc_STRVAR(reversed_values_doc,
4915 "Return a reverse iterator over the dict values.");
4916
4917 static PyMethodDef dictvalues_methods[] = {
4918 {"__reversed__", (PyCFunction)dictvalues_reversed, METH_NOARGS,
4919 reversed_values_doc},
4920 {NULL, NULL} /* sentinel */
4921 };
4922
4923 PyTypeObject PyDictValues_Type = {
4924 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4925 "dict_values", /* tp_name */
4926 sizeof(_PyDictViewObject), /* tp_basicsize */
4927 0, /* tp_itemsize */
4928 /* methods */
4929 (destructor)dictview_dealloc, /* tp_dealloc */
4930 0, /* tp_vectorcall_offset */
4931 0, /* tp_getattr */
4932 0, /* tp_setattr */
4933 0, /* tp_as_async */
4934 (reprfunc)dictview_repr, /* tp_repr */
4935 0, /* tp_as_number */
4936 &dictvalues_as_sequence, /* tp_as_sequence */
4937 0, /* tp_as_mapping */
4938 0, /* tp_hash */
4939 0, /* tp_call */
4940 0, /* tp_str */
4941 PyObject_GenericGetAttr, /* tp_getattro */
4942 0, /* tp_setattro */
4943 0, /* tp_as_buffer */
4944 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4945 0, /* tp_doc */
4946 (traverseproc)dictview_traverse, /* tp_traverse */
4947 0, /* tp_clear */
4948 0, /* tp_richcompare */
4949 0, /* tp_weaklistoffset */
4950 (getiterfunc)dictvalues_iter, /* tp_iter */
4951 0, /* tp_iternext */
4952 dictvalues_methods, /* tp_methods */
4953 .tp_getset = dictview_getset,
4954 };
4955
4956 static PyObject *
dictvalues_new(PyObject * dict,PyObject * Py_UNUSED (ignored))4957 dictvalues_new(PyObject *dict, PyObject *Py_UNUSED(ignored))
4958 {
4959 return _PyDictView_New(dict, &PyDictValues_Type);
4960 }
4961
4962 static PyObject *
dictvalues_reversed(_PyDictViewObject * dv,PyObject * Py_UNUSED (ignored))4963 dictvalues_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored))
4964 {
4965 if (dv->dv_dict == NULL) {
4966 Py_RETURN_NONE;
4967 }
4968 return dictiter_new(dv->dv_dict, &PyDictRevIterValue_Type);
4969 }
4970
4971
4972 /* Returns NULL if cannot allocate a new PyDictKeysObject,
4973 but does not set an error */
4974 PyDictKeysObject *
_PyDict_NewKeysForClass(void)4975 _PyDict_NewKeysForClass(void)
4976 {
4977 PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE);
4978 if (keys == NULL) {
4979 PyErr_Clear();
4980 }
4981 else {
4982 keys->dk_lookup = lookdict_split;
4983 }
4984 return keys;
4985 }
4986
4987 #define CACHED_KEYS(tp) (((PyHeapTypeObject*)tp)->ht_cached_keys)
4988
4989 PyObject *
PyObject_GenericGetDict(PyObject * obj,void * context)4990 PyObject_GenericGetDict(PyObject *obj, void *context)
4991 {
4992 PyObject *dict, **dictptr = _PyObject_GetDictPtr(obj);
4993 if (dictptr == NULL) {
4994 PyErr_SetString(PyExc_AttributeError,
4995 "This object has no __dict__");
4996 return NULL;
4997 }
4998 dict = *dictptr;
4999 if (dict == NULL) {
5000 PyTypeObject *tp = Py_TYPE(obj);
5001 if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && CACHED_KEYS(tp)) {
5002 dictkeys_incref(CACHED_KEYS(tp));
5003 *dictptr = dict = new_dict_with_shared_keys(CACHED_KEYS(tp));
5004 }
5005 else {
5006 *dictptr = dict = PyDict_New();
5007 }
5008 }
5009 Py_XINCREF(dict);
5010 return dict;
5011 }
5012
5013 int
_PyObjectDict_SetItem(PyTypeObject * tp,PyObject ** dictptr,PyObject * key,PyObject * value)5014 _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
5015 PyObject *key, PyObject *value)
5016 {
5017 PyObject *dict;
5018 int res;
5019 PyDictKeysObject *cached;
5020
5021 assert(dictptr != NULL);
5022 if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && (cached = CACHED_KEYS(tp))) {
5023 assert(dictptr != NULL);
5024 dict = *dictptr;
5025 if (dict == NULL) {
5026 dictkeys_incref(cached);
5027 dict = new_dict_with_shared_keys(cached);
5028 if (dict == NULL)
5029 return -1;
5030 *dictptr = dict;
5031 }
5032 if (value == NULL) {
5033 res = PyDict_DelItem(dict, key);
5034 // Since key sharing dict doesn't allow deletion, PyDict_DelItem()
5035 // always converts dict to combined form.
5036 if ((cached = CACHED_KEYS(tp)) != NULL) {
5037 CACHED_KEYS(tp) = NULL;
5038 dictkeys_decref(cached);
5039 }
5040 }
5041 else {
5042 int was_shared = (cached == ((PyDictObject *)dict)->ma_keys);
5043 res = PyDict_SetItem(dict, key, value);
5044 if (was_shared &&
5045 (cached = CACHED_KEYS(tp)) != NULL &&
5046 cached != ((PyDictObject *)dict)->ma_keys) {
5047 /* PyDict_SetItem() may call dictresize and convert split table
5048 * into combined table. In such case, convert it to split
5049 * table again and update type's shared key only when this is
5050 * the only dict sharing key with the type.
5051 *
5052 * This is to allow using shared key in class like this:
5053 *
5054 * class C:
5055 * def __init__(self):
5056 * # one dict resize happens
5057 * self.a, self.b, self.c = 1, 2, 3
5058 * self.d, self.e, self.f = 4, 5, 6
5059 * a = C()
5060 */
5061 if (cached->dk_refcnt == 1) {
5062 CACHED_KEYS(tp) = make_keys_shared(dict);
5063 }
5064 else {
5065 CACHED_KEYS(tp) = NULL;
5066 }
5067 dictkeys_decref(cached);
5068 if (CACHED_KEYS(tp) == NULL && PyErr_Occurred())
5069 return -1;
5070 }
5071 }
5072 } else {
5073 dict = *dictptr;
5074 if (dict == NULL) {
5075 dict = PyDict_New();
5076 if (dict == NULL)
5077 return -1;
5078 *dictptr = dict;
5079 }
5080 if (value == NULL) {
5081 res = PyDict_DelItem(dict, key);
5082 } else {
5083 res = PyDict_SetItem(dict, key, value);
5084 }
5085 }
5086 return res;
5087 }
5088
5089 void
_PyDictKeys_DecRef(PyDictKeysObject * keys)5090 _PyDictKeys_DecRef(PyDictKeysObject *keys)
5091 {
5092 dictkeys_decref(keys);
5093 }
5094