1 /* Dictionary object implementation using a hash table */
2
3 /* The distribution includes a separate file, Objects/dictnotes.txt,
4 describing explorations into dictionary design and optimization.
5 It covers typical dictionary use patterns, the parameters for
6 tuning dictionaries, and several ideas for possible optimizations.
7 */
8
9 /* PyDictKeysObject
10
11 This implements the dictionary's hashtable.
12
13 As of Python 3.6, this is compact and ordered. Basic idea is described here:
14 * https://mail.python.org/pipermail/python-dev/2012-December/123028.html
15 * https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
16
17 layout:
18
19 +---------------+
20 | dk_refcnt |
21 | dk_size |
22 | dk_lookup |
23 | dk_usable |
24 | dk_nentries |
25 +---------------+
26 | dk_indices |
27 | |
28 +---------------+
29 | dk_entries |
30 | |
31 +---------------+
32
33 dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1)
34 or DKIX_DUMMY(-2).
35 Size of indices is dk_size. Type of each index in indices is vary on dk_size:
36
37 * int8 for dk_size <= 128
38 * int16 for 256 <= dk_size <= 2**15
39 * int32 for 2**16 <= dk_size <= 2**31
40 * int64 for 2**32 <= dk_size
41
42 dk_entries is array of PyDictKeyEntry. It's size is USABLE_FRACTION(dk_size).
43 DK_ENTRIES(dk) can be used to get pointer to entries.
44
45 NOTE: Since negative value is used for DKIX_EMPTY and DKIX_DUMMY, type of
46 dk_indices entry is signed integer and int16 is used for table which
47 dk_size == 256.
48 */
49
50
51 /*
52 The DictObject can be in one of two forms.
53
54 Either:
55 A combined table:
56 ma_values == NULL, dk_refcnt == 1.
57 Values are stored in the me_value field of the PyDictKeysObject.
58 Or:
59 A split table:
60 ma_values != NULL, dk_refcnt >= 1
61 Values are stored in the ma_values array.
62 Only string (unicode) keys are allowed.
63 All dicts sharing same key must have same insertion order.
64
65 There are four kinds of slots in the table (slot is index, and
66 DK_ENTRIES(keys)[index] if index >= 0):
67
68 1. Unused. index == DKIX_EMPTY
69 Does not hold an active (key, value) pair now and never did. Unused can
70 transition to Active upon key insertion. This is each slot's initial state.
71
72 2. Active. index >= 0, me_key != NULL and me_value != NULL
73 Holds an active (key, value) pair. Active can transition to Dummy or
74 Pending upon key deletion (for combined and split tables respectively).
75 This is the only case in which me_value != NULL.
76
77 3. Dummy. index == DKIX_DUMMY (combined only)
78 Previously held an active (key, value) pair, but that was deleted and an
79 active pair has not yet overwritten the slot. Dummy can transition to
80 Active upon key insertion. Dummy slots cannot be made Unused again
81 else the probe sequence in case of collision would have no way to know
82 they were once active.
83
84 4. Pending. index >= 0, key != NULL, and value == NULL (split only)
85 Not yet inserted in split-table.
86 */
87
88 /*
89 Preserving insertion order
90
91 It's simple for combined table. Since dk_entries is mostly append only, we can
92 get insertion order by just iterating dk_entries.
93
94 One exception is .popitem(). It removes last item in dk_entries and decrement
95 dk_nentries to achieve amortized O(1). Since there are DKIX_DUMMY remains in
96 dk_indices, we can't increment dk_usable even though dk_nentries is
97 decremented.
98
99 In split table, inserting into pending entry is allowed only for dk_entries[ix]
100 where ix == mp->ma_used. Inserting into other index and deleting item cause
101 converting the dict to the combined table.
102 */
103
104 /* PyDict_MINSIZE is the starting size for any new dict.
105 * 8 allows dicts with no more than 5 active entries; experiments suggested
106 * this suffices for the majority of dicts (consisting mostly of usually-small
107 * dicts created to pass keyword arguments).
108 * Making this 8, rather than 4 reduces the number of resizes for most
109 * dictionaries, without any significant extra memory use.
110 */
111 #define PyDict_MINSIZE 8
112
113 #include "Python.h"
114 #include "internal/pystate.h"
115 #include "dict-common.h"
116 #include "stringlib/eq.h" /* to get unicode_eq() */
117
118 /*[clinic input]
119 class dict "PyDictObject *" "&PyDict_Type"
120 [clinic start generated code]*/
121 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=f157a5a0ce9589d6]*/
122
123
124 /*
125 To ensure the lookup algorithm terminates, there must be at least one Unused
126 slot (NULL key) in the table.
127 To avoid slowing down lookups on a near-full table, we resize the table when
128 it's USABLE_FRACTION (currently two-thirds) full.
129 */
130
131 #define PERTURB_SHIFT 5
132
133 /*
134 Major subtleties ahead: Most hash schemes depend on having a "good" hash
135 function, in the sense of simulating randomness. Python doesn't: its most
136 important hash functions (for ints) are very regular in common
137 cases:
138
139 >>>[hash(i) for i in range(4)]
140 [0, 1, 2, 3]
141
142 This isn't necessarily bad! To the contrary, in a table of size 2**i, taking
143 the low-order i bits as the initial table index is extremely fast, and there
144 are no collisions at all for dicts indexed by a contiguous range of ints. So
145 this gives better-than-random behavior in common cases, and that's very
146 desirable.
147
148 OTOH, when collisions occur, the tendency to fill contiguous slices of the
149 hash table makes a good collision resolution strategy crucial. Taking only
150 the last i bits of the hash code is also vulnerable: for example, consider
151 the list [i << 16 for i in range(20000)] as a set of keys. Since ints are
152 their own hash codes, and this fits in a dict of size 2**15, the last 15 bits
153 of every hash code are all 0: they *all* map to the same table index.
154
155 But catering to unusual cases should not slow the usual ones, so we just take
156 the last i bits anyway. It's up to collision resolution to do the rest. If
157 we *usually* find the key we're looking for on the first try (and, it turns
158 out, we usually do -- the table load factor is kept under 2/3, so the odds
159 are solidly in our favor), then it makes best sense to keep the initial index
160 computation dirt cheap.
161
162 The first half of collision resolution is to visit table indices via this
163 recurrence:
164
165 j = ((5*j) + 1) mod 2**i
166
167 For any initial j in range(2**i), repeating that 2**i times generates each
168 int in range(2**i) exactly once (see any text on random-number generation for
169 proof). By itself, this doesn't help much: like linear probing (setting
170 j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed
171 order. This would be bad, except that's not the only thing we do, and it's
172 actually *good* in the common cases where hash keys are consecutive. In an
173 example that's really too small to make this entirely clear, for a table of
174 size 2**3 the order of indices is:
175
176 0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating]
177
178 If two things come in at index 5, the first place we look after is index 2,
179 not 6, so if another comes in at index 6 the collision at 5 didn't hurt it.
180 Linear probing is deadly in this case because there the fixed probe order
181 is the *same* as the order consecutive keys are likely to arrive. But it's
182 extremely unlikely hash codes will follow a 5*j+1 recurrence by accident,
183 and certain that consecutive hash codes do not.
184
185 The other half of the strategy is to get the other bits of the hash code
186 into play. This is done by initializing a (unsigned) vrbl "perturb" to the
187 full hash code, and changing the recurrence to:
188
189 perturb >>= PERTURB_SHIFT;
190 j = (5*j) + 1 + perturb;
191 use j % 2**i as the next table index;
192
193 Now the probe sequence depends (eventually) on every bit in the hash code,
194 and the pseudo-scrambling property of recurring on 5*j+1 is more valuable,
195 because it quickly magnifies small differences in the bits that didn't affect
196 the initial index. Note that because perturb is unsigned, if the recurrence
197 is executed often enough perturb eventually becomes and remains 0. At that
198 point (very rarely reached) the recurrence is on (just) 5*j+1 again, and
199 that's certain to find an empty slot eventually (since it generates every int
200 in range(2**i), and we make sure there's always at least one empty slot).
201
202 Selecting a good value for PERTURB_SHIFT is a balancing act. You want it
203 small so that the high bits of the hash code continue to affect the probe
204 sequence across iterations; but you want it large so that in really bad cases
205 the high-order hash bits have an effect on early iterations. 5 was "the
206 best" in minimizing total collisions across experiments Tim Peters ran (on
207 both normal and pathological cases), but 4 and 6 weren't significantly worse.
208
209 Historical: Reimer Behrends contributed the idea of using a polynomial-based
210 approach, using repeated multiplication by x in GF(2**n) where an irreducible
211 polynomial for each table size was chosen such that x was a primitive root.
212 Christian Tismer later extended that to use division by x instead, as an
213 efficient way to get the high bits of the hash code into play. This scheme
214 also gave excellent collision statistics, but was more expensive: two
215 if-tests were required inside the loop; computing "the next" index took about
216 the same number of operations but without as much potential parallelism
217 (e.g., computing 5*j can go on at the same time as computing 1+perturb in the
218 above, and then shifting perturb can be done while the table index is being
219 masked); and the PyDictObject struct required a member to hold the table's
220 polynomial. In Tim's experiments the current scheme ran faster, produced
221 equally good collision statistics, needed less code & used less memory.
222
223 */
224
225 /* forward declarations */
226 static Py_ssize_t lookdict(PyDictObject *mp, PyObject *key,
227 Py_hash_t hash, PyObject **value_addr);
228 static Py_ssize_t lookdict_unicode(PyDictObject *mp, PyObject *key,
229 Py_hash_t hash, PyObject **value_addr);
230 static Py_ssize_t
231 lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
232 Py_hash_t hash, PyObject **value_addr);
233 static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
234 Py_hash_t hash, PyObject **value_addr);
235
236 static int dictresize(PyDictObject *mp, Py_ssize_t minused);
237
238 static PyObject* dict_iter(PyDictObject *dict);
239
240 /*Global counter used to set ma_version_tag field of dictionary.
241 * It is incremented each time that a dictionary is created and each
242 * time that a dictionary is modified. */
243 static uint64_t pydict_global_version = 0;
244
245 #define DICT_NEXT_VERSION() (++pydict_global_version)
246
247 /* Dictionary reuse scheme to save calls to malloc and free */
248 #ifndef PyDict_MAXFREELIST
249 #define PyDict_MAXFREELIST 80
250 #endif
251 static PyDictObject *free_list[PyDict_MAXFREELIST];
252 static int numfree = 0;
253 static PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST];
254 static int numfreekeys = 0;
255
256 #include "clinic/dictobject.c.h"
257
258 int
PyDict_ClearFreeList(void)259 PyDict_ClearFreeList(void)
260 {
261 PyDictObject *op;
262 int ret = numfree + numfreekeys;
263 while (numfree) {
264 op = free_list[--numfree];
265 assert(PyDict_CheckExact(op));
266 PyObject_GC_Del(op);
267 }
268 while (numfreekeys) {
269 PyObject_FREE(keys_free_list[--numfreekeys]);
270 }
271 return ret;
272 }
273
274 /* Print summary info about the state of the optimized allocator */
275 void
_PyDict_DebugMallocStats(FILE * out)276 _PyDict_DebugMallocStats(FILE *out)
277 {
278 _PyDebugAllocatorStats(out,
279 "free PyDictObject", numfree, sizeof(PyDictObject));
280 }
281
282
283 void
PyDict_Fini(void)284 PyDict_Fini(void)
285 {
286 PyDict_ClearFreeList();
287 }
288
289 #define DK_SIZE(dk) ((dk)->dk_size)
290 #if SIZEOF_VOID_P > 4
291 #define DK_IXSIZE(dk) \
292 (DK_SIZE(dk) <= 0xff ? \
293 1 : DK_SIZE(dk) <= 0xffff ? \
294 2 : DK_SIZE(dk) <= 0xffffffff ? \
295 4 : sizeof(int64_t))
296 #else
297 #define DK_IXSIZE(dk) \
298 (DK_SIZE(dk) <= 0xff ? \
299 1 : DK_SIZE(dk) <= 0xffff ? \
300 2 : sizeof(int32_t))
301 #endif
302 #define DK_ENTRIES(dk) \
303 ((PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)]))
304
305 #define DK_DEBUG_INCREF _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA
306 #define DK_DEBUG_DECREF _Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA
307
308 #define DK_INCREF(dk) (DK_DEBUG_INCREF ++(dk)->dk_refcnt)
309 #define DK_DECREF(dk) if (DK_DEBUG_DECREF (--(dk)->dk_refcnt) == 0) free_keys_object(dk)
310 #define DK_MASK(dk) (((dk)->dk_size)-1)
311 #define IS_POWER_OF_2(x) (((x) & (x-1)) == 0)
312
313 /* lookup indices. returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */
314 static inline Py_ssize_t
dk_get_index(PyDictKeysObject * keys,Py_ssize_t i)315 dk_get_index(PyDictKeysObject *keys, Py_ssize_t i)
316 {
317 Py_ssize_t s = DK_SIZE(keys);
318 Py_ssize_t ix;
319
320 if (s <= 0xff) {
321 int8_t *indices = (int8_t*)(keys->dk_indices);
322 ix = indices[i];
323 }
324 else if (s <= 0xffff) {
325 int16_t *indices = (int16_t*)(keys->dk_indices);
326 ix = indices[i];
327 }
328 #if SIZEOF_VOID_P > 4
329 else if (s > 0xffffffff) {
330 int64_t *indices = (int64_t*)(keys->dk_indices);
331 ix = indices[i];
332 }
333 #endif
334 else {
335 int32_t *indices = (int32_t*)(keys->dk_indices);
336 ix = indices[i];
337 }
338 assert(ix >= DKIX_DUMMY);
339 return ix;
340 }
341
342 /* write to indices. */
343 static inline void
dk_set_index(PyDictKeysObject * keys,Py_ssize_t i,Py_ssize_t ix)344 dk_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
345 {
346 Py_ssize_t s = DK_SIZE(keys);
347
348 assert(ix >= DKIX_DUMMY);
349
350 if (s <= 0xff) {
351 int8_t *indices = (int8_t*)(keys->dk_indices);
352 assert(ix <= 0x7f);
353 indices[i] = (char)ix;
354 }
355 else if (s <= 0xffff) {
356 int16_t *indices = (int16_t*)(keys->dk_indices);
357 assert(ix <= 0x7fff);
358 indices[i] = (int16_t)ix;
359 }
360 #if SIZEOF_VOID_P > 4
361 else if (s > 0xffffffff) {
362 int64_t *indices = (int64_t*)(keys->dk_indices);
363 indices[i] = ix;
364 }
365 #endif
366 else {
367 int32_t *indices = (int32_t*)(keys->dk_indices);
368 assert(ix <= 0x7fffffff);
369 indices[i] = (int32_t)ix;
370 }
371 }
372
373
374 /* USABLE_FRACTION is the maximum dictionary load.
375 * Increasing this ratio makes dictionaries more dense resulting in more
376 * collisions. Decreasing it improves sparseness at the expense of spreading
377 * indices over more cache lines and at the cost of total memory consumed.
378 *
379 * USABLE_FRACTION must obey the following:
380 * (0 < USABLE_FRACTION(n) < n) for all n >= 2
381 *
382 * USABLE_FRACTION should be quick to calculate.
383 * Fractions around 1/2 to 2/3 seem to work well in practice.
384 */
385 #define USABLE_FRACTION(n) (((n) << 1)/3)
386
387 /* ESTIMATE_SIZE is reverse function of USABLE_FRACTION.
388 * This can be used to reserve enough size to insert n entries without
389 * resizing.
390 */
391 #define ESTIMATE_SIZE(n) (((n)*3+1) >> 1)
392
393 /* Alternative fraction that is otherwise close enough to 2n/3 to make
394 * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
395 * 32 * 2/3 = 21, 32 * 5/8 = 20.
396 * Its advantage is that it is faster to compute on machines with slow division.
397 * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
398 */
399
400 /* GROWTH_RATE. Growth rate upon hitting maximum load.
401 * Currently set to used*3.
402 * This means that dicts double in size when growing without deletions,
403 * but have more head room when the number of deletions is on a par with the
404 * number of insertions. See also bpo-17563 and bpo-33205.
405 *
406 * GROWTH_RATE was set to used*4 up to version 3.2.
407 * GROWTH_RATE was set to used*2 in version 3.3.0
408 * GROWTH_RATE was set to used*2 + capacity/2 in 3.4.0-3.6.0.
409 */
410 #define GROWTH_RATE(d) ((d)->ma_used*3)
411
412 #define ENSURE_ALLOWS_DELETIONS(d) \
413 if ((d)->ma_keys->dk_lookup == lookdict_unicode_nodummy) { \
414 (d)->ma_keys->dk_lookup = lookdict_unicode; \
415 }
416
417 /* This immutable, empty PyDictKeysObject is used for PyDict_Clear()
418 * (which cannot fail and thus can do no allocation).
419 */
420 static PyDictKeysObject empty_keys_struct = {
421 1, /* dk_refcnt */
422 1, /* dk_size */
423 lookdict_split, /* dk_lookup */
424 0, /* dk_usable (immutable) */
425 0, /* dk_nentries */
426 {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
427 DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */
428 };
429
430 static PyObject *empty_values[1] = { NULL };
431
432 #define Py_EMPTY_KEYS &empty_keys_struct
433
434 /* Uncomment to check the dict content in _PyDict_CheckConsistency() */
435 /* #define DEBUG_PYDICT */
436
437
438 #ifndef NDEBUG
439 static int
_PyDict_CheckConsistency(PyDictObject * mp)440 _PyDict_CheckConsistency(PyDictObject *mp)
441 {
442 PyDictKeysObject *keys = mp->ma_keys;
443 int splitted = _PyDict_HasSplitTable(mp);
444 Py_ssize_t usable = USABLE_FRACTION(keys->dk_size);
445 #ifdef DEBUG_PYDICT
446 PyDictKeyEntry *entries = DK_ENTRIES(keys);
447 Py_ssize_t i;
448 #endif
449
450 assert(0 <= mp->ma_used && mp->ma_used <= usable);
451 assert(IS_POWER_OF_2(keys->dk_size));
452 assert(0 <= keys->dk_usable
453 && keys->dk_usable <= usable);
454 assert(0 <= keys->dk_nentries
455 && keys->dk_nentries <= usable);
456 assert(keys->dk_usable + keys->dk_nentries <= usable);
457
458 if (!splitted) {
459 /* combined table */
460 assert(keys->dk_refcnt == 1);
461 }
462
463 #ifdef DEBUG_PYDICT
464 for (i=0; i < keys->dk_size; i++) {
465 Py_ssize_t ix = dk_get_index(keys, i);
466 assert(DKIX_DUMMY <= ix && ix <= usable);
467 }
468
469 for (i=0; i < usable; i++) {
470 PyDictKeyEntry *entry = &entries[i];
471 PyObject *key = entry->me_key;
472
473 if (key != NULL) {
474 if (PyUnicode_CheckExact(key)) {
475 Py_hash_t hash = ((PyASCIIObject *)key)->hash;
476 assert(hash != -1);
477 assert(entry->me_hash == hash);
478 }
479 else {
480 /* test_dict fails if PyObject_Hash() is called again */
481 assert(entry->me_hash != -1);
482 }
483 if (!splitted) {
484 assert(entry->me_value != NULL);
485 }
486 }
487
488 if (splitted) {
489 assert(entry->me_value == NULL);
490 }
491 }
492
493 if (splitted) {
494 /* splitted table */
495 for (i=0; i < mp->ma_used; i++) {
496 assert(mp->ma_values[i] != NULL);
497 }
498 }
499 #endif
500
501 return 1;
502 }
503 #endif
504
505
new_keys_object(Py_ssize_t size)506 static PyDictKeysObject *new_keys_object(Py_ssize_t size)
507 {
508 PyDictKeysObject *dk;
509 Py_ssize_t es, usable;
510
511 assert(size >= PyDict_MINSIZE);
512 assert(IS_POWER_OF_2(size));
513
514 usable = USABLE_FRACTION(size);
515 if (size <= 0xff) {
516 es = 1;
517 }
518 else if (size <= 0xffff) {
519 es = 2;
520 }
521 #if SIZEOF_VOID_P > 4
522 else if (size <= 0xffffffff) {
523 es = 4;
524 }
525 #endif
526 else {
527 es = sizeof(Py_ssize_t);
528 }
529
530 if (size == PyDict_MINSIZE && numfreekeys > 0) {
531 dk = keys_free_list[--numfreekeys];
532 }
533 else {
534 dk = PyObject_MALLOC(sizeof(PyDictKeysObject)
535 + es * size
536 + sizeof(PyDictKeyEntry) * usable);
537 if (dk == NULL) {
538 PyErr_NoMemory();
539 return NULL;
540 }
541 }
542 DK_DEBUG_INCREF dk->dk_refcnt = 1;
543 dk->dk_size = size;
544 dk->dk_usable = usable;
545 dk->dk_lookup = lookdict_unicode_nodummy;
546 dk->dk_nentries = 0;
547 memset(&dk->dk_indices[0], 0xff, es * size);
548 memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable);
549 return dk;
550 }
551
552 static void
free_keys_object(PyDictKeysObject * keys)553 free_keys_object(PyDictKeysObject *keys)
554 {
555 PyDictKeyEntry *entries = DK_ENTRIES(keys);
556 Py_ssize_t i, n;
557 for (i = 0, n = keys->dk_nentries; i < n; i++) {
558 Py_XDECREF(entries[i].me_key);
559 Py_XDECREF(entries[i].me_value);
560 }
561 if (keys->dk_size == PyDict_MINSIZE && numfreekeys < PyDict_MAXFREELIST) {
562 keys_free_list[numfreekeys++] = keys;
563 return;
564 }
565 PyObject_FREE(keys);
566 }
567
568 #define new_values(size) PyMem_NEW(PyObject *, size)
569 #define free_values(values) PyMem_FREE(values)
570
571 /* Consumes a reference to the keys object */
572 static PyObject *
new_dict(PyDictKeysObject * keys,PyObject ** values)573 new_dict(PyDictKeysObject *keys, PyObject **values)
574 {
575 PyDictObject *mp;
576 assert(keys != NULL);
577 if (numfree) {
578 mp = free_list[--numfree];
579 assert (mp != NULL);
580 assert (Py_TYPE(mp) == &PyDict_Type);
581 _Py_NewReference((PyObject *)mp);
582 }
583 else {
584 mp = PyObject_GC_New(PyDictObject, &PyDict_Type);
585 if (mp == NULL) {
586 DK_DECREF(keys);
587 free_values(values);
588 return NULL;
589 }
590 }
591 mp->ma_keys = keys;
592 mp->ma_values = values;
593 mp->ma_used = 0;
594 mp->ma_version_tag = DICT_NEXT_VERSION();
595 assert(_PyDict_CheckConsistency(mp));
596 return (PyObject *)mp;
597 }
598
599 /* Consumes a reference to the keys object */
600 static PyObject *
new_dict_with_shared_keys(PyDictKeysObject * keys)601 new_dict_with_shared_keys(PyDictKeysObject *keys)
602 {
603 PyObject **values;
604 Py_ssize_t i, size;
605
606 size = USABLE_FRACTION(DK_SIZE(keys));
607 values = new_values(size);
608 if (values == NULL) {
609 DK_DECREF(keys);
610 return PyErr_NoMemory();
611 }
612 for (i = 0; i < size; i++) {
613 values[i] = NULL;
614 }
615 return new_dict(keys, values);
616 }
617
618
619 static PyObject *
clone_combined_dict(PyDictObject * orig)620 clone_combined_dict(PyDictObject *orig)
621 {
622 assert(PyDict_CheckExact(orig));
623 assert(orig->ma_values == NULL);
624 assert(orig->ma_keys->dk_refcnt == 1);
625
626 Py_ssize_t keys_size = _PyDict_KeysSize(orig->ma_keys);
627 PyDictKeysObject *keys = PyObject_Malloc(keys_size);
628 if (keys == NULL) {
629 PyErr_NoMemory();
630 return NULL;
631 }
632
633 memcpy(keys, orig->ma_keys, keys_size);
634
635 /* After copying key/value pairs, we need to incref all
636 keys and values and they are about to be co-owned by a
637 new dict object. */
638 PyDictKeyEntry *ep0 = DK_ENTRIES(keys);
639 Py_ssize_t n = keys->dk_nentries;
640 for (Py_ssize_t i = 0; i < n; i++) {
641 PyDictKeyEntry *entry = &ep0[i];
642 PyObject *value = entry->me_value;
643 if (value != NULL) {
644 Py_INCREF(value);
645 Py_INCREF(entry->me_key);
646 }
647 }
648
649 PyDictObject *new = (PyDictObject *)new_dict(keys, NULL);
650 if (new == NULL) {
651 /* In case of an error, `new_dict()` takes care of
652 cleaning up `keys`. */
653 return NULL;
654 }
655 new->ma_used = orig->ma_used;
656 assert(_PyDict_CheckConsistency(new));
657 if (_PyObject_GC_IS_TRACKED(orig)) {
658 /* Maintain tracking. */
659 _PyObject_GC_TRACK(new);
660 }
661
662 /* Since we copied the keys table we now have an extra reference
663 in the system. Manually call _Py_INC_REFTOTAL to signal that
664 we have it now; calling DK_INCREF would be an error as
665 keys->dk_refcnt is already set to 1 (after memcpy). */
666 _Py_INC_REFTOTAL;
667
668 return (PyObject *)new;
669 }
670
671 PyObject *
PyDict_New(void)672 PyDict_New(void)
673 {
674 PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE);
675 if (keys == NULL)
676 return NULL;
677 return new_dict(keys, NULL);
678 }
679
680 /* Search index of hash table from offset of entry table */
681 static Py_ssize_t
lookdict_index(PyDictKeysObject * k,Py_hash_t hash,Py_ssize_t index)682 lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
683 {
684 size_t mask = DK_MASK(k);
685 size_t perturb = (size_t)hash;
686 size_t i = (size_t)hash & mask;
687
688 for (;;) {
689 Py_ssize_t ix = dk_get_index(k, i);
690 if (ix == index) {
691 return i;
692 }
693 if (ix == DKIX_EMPTY) {
694 return DKIX_EMPTY;
695 }
696 perturb >>= PERTURB_SHIFT;
697 i = mask & (i*5 + perturb + 1);
698 }
699 Py_UNREACHABLE();
700 }
701
702 /*
703 The basic lookup function used by all operations.
704 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
705 Open addressing is preferred over chaining since the link overhead for
706 chaining would be substantial (100% with typical malloc overhead).
707
708 The initial probe index is computed as hash mod the table size. Subsequent
709 probe indices are computed as explained earlier.
710
711 All arithmetic on hash should ignore overflow.
712
713 The details in this version are due to Tim Peters, building on many past
714 contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
715 Christian Tismer.
716
717 lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a
718 comparison raises an exception.
719 lookdict_unicode() below is specialized to string keys, comparison of which can
720 never raise an exception; that function can never return DKIX_ERROR when key
721 is string. Otherwise, it falls back to lookdict().
722 lookdict_unicode_nodummy is further specialized for string keys that cannot be
723 the <dummy> value.
724 For both, when the key isn't found a DKIX_EMPTY is returned.
725 */
726 static Py_ssize_t _Py_HOT_FUNCTION
lookdict(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject ** value_addr)727 lookdict(PyDictObject *mp, PyObject *key,
728 Py_hash_t hash, PyObject **value_addr)
729 {
730 size_t i, mask, perturb;
731 PyDictKeysObject *dk;
732 PyDictKeyEntry *ep0;
733
734 top:
735 dk = mp->ma_keys;
736 ep0 = DK_ENTRIES(dk);
737 mask = DK_MASK(dk);
738 perturb = hash;
739 i = (size_t)hash & mask;
740
741 for (;;) {
742 Py_ssize_t ix = dk_get_index(dk, i);
743 if (ix == DKIX_EMPTY) {
744 *value_addr = NULL;
745 return ix;
746 }
747 if (ix >= 0) {
748 PyDictKeyEntry *ep = &ep0[ix];
749 assert(ep->me_key != NULL);
750 if (ep->me_key == key) {
751 *value_addr = ep->me_value;
752 return ix;
753 }
754 if (ep->me_hash == hash) {
755 PyObject *startkey = ep->me_key;
756 Py_INCREF(startkey);
757 int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
758 Py_DECREF(startkey);
759 if (cmp < 0) {
760 *value_addr = NULL;
761 return DKIX_ERROR;
762 }
763 if (dk == mp->ma_keys && ep->me_key == startkey) {
764 if (cmp > 0) {
765 *value_addr = ep->me_value;
766 return ix;
767 }
768 }
769 else {
770 /* The dict was mutated, restart */
771 goto top;
772 }
773 }
774 }
775 perturb >>= PERTURB_SHIFT;
776 i = (i*5 + perturb + 1) & mask;
777 }
778 Py_UNREACHABLE();
779 }
780
781 /* Specialized version for string-only keys */
782 static Py_ssize_t _Py_HOT_FUNCTION
lookdict_unicode(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject ** value_addr)783 lookdict_unicode(PyDictObject *mp, PyObject *key,
784 Py_hash_t hash, PyObject **value_addr)
785 {
786 assert(mp->ma_values == NULL);
787 /* Make sure this function doesn't have to handle non-unicode keys,
788 including subclasses of str; e.g., one reason to subclass
789 unicodes is to override __eq__, and for speed we don't cater to
790 that here. */
791 if (!PyUnicode_CheckExact(key)) {
792 mp->ma_keys->dk_lookup = lookdict;
793 return lookdict(mp, key, hash, value_addr);
794 }
795
796 PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
797 size_t mask = DK_MASK(mp->ma_keys);
798 size_t perturb = (size_t)hash;
799 size_t i = (size_t)hash & mask;
800
801 for (;;) {
802 Py_ssize_t ix = dk_get_index(mp->ma_keys, i);
803 if (ix == DKIX_EMPTY) {
804 *value_addr = NULL;
805 return DKIX_EMPTY;
806 }
807 if (ix >= 0) {
808 PyDictKeyEntry *ep = &ep0[ix];
809 assert(ep->me_key != NULL);
810 assert(PyUnicode_CheckExact(ep->me_key));
811 if (ep->me_key == key ||
812 (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
813 *value_addr = ep->me_value;
814 return ix;
815 }
816 }
817 perturb >>= PERTURB_SHIFT;
818 i = mask & (i*5 + perturb + 1);
819 }
820 Py_UNREACHABLE();
821 }
822
823 /* Faster version of lookdict_unicode when it is known that no <dummy> keys
824 * will be present. */
825 static Py_ssize_t _Py_HOT_FUNCTION
lookdict_unicode_nodummy(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject ** value_addr)826 lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
827 Py_hash_t hash, PyObject **value_addr)
828 {
829 assert(mp->ma_values == NULL);
830 /* Make sure this function doesn't have to handle non-unicode keys,
831 including subclasses of str; e.g., one reason to subclass
832 unicodes is to override __eq__, and for speed we don't cater to
833 that here. */
834 if (!PyUnicode_CheckExact(key)) {
835 mp->ma_keys->dk_lookup = lookdict;
836 return lookdict(mp, key, hash, value_addr);
837 }
838
839 PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
840 size_t mask = DK_MASK(mp->ma_keys);
841 size_t perturb = (size_t)hash;
842 size_t i = (size_t)hash & mask;
843
844 for (;;) {
845 Py_ssize_t ix = dk_get_index(mp->ma_keys, i);
846 assert (ix != DKIX_DUMMY);
847 if (ix == DKIX_EMPTY) {
848 *value_addr = NULL;
849 return DKIX_EMPTY;
850 }
851 PyDictKeyEntry *ep = &ep0[ix];
852 assert(ep->me_key != NULL);
853 assert(PyUnicode_CheckExact(ep->me_key));
854 if (ep->me_key == key ||
855 (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
856 *value_addr = ep->me_value;
857 return ix;
858 }
859 perturb >>= PERTURB_SHIFT;
860 i = mask & (i*5 + perturb + 1);
861 }
862 Py_UNREACHABLE();
863 }
864
865 /* Version of lookdict for split tables.
866 * All split tables and only split tables use this lookup function.
867 * Split tables only contain unicode keys and no dummy keys,
868 * so algorithm is the same as lookdict_unicode_nodummy.
869 */
870 static Py_ssize_t _Py_HOT_FUNCTION
lookdict_split(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject ** value_addr)871 lookdict_split(PyDictObject *mp, PyObject *key,
872 Py_hash_t hash, PyObject **value_addr)
873 {
874 /* mp must split table */
875 assert(mp->ma_values != NULL);
876 if (!PyUnicode_CheckExact(key)) {
877 Py_ssize_t ix = lookdict(mp, key, hash, value_addr);
878 if (ix >= 0) {
879 *value_addr = mp->ma_values[ix];
880 }
881 return ix;
882 }
883
884 PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
885 size_t mask = DK_MASK(mp->ma_keys);
886 size_t perturb = (size_t)hash;
887 size_t i = (size_t)hash & mask;
888
889 for (;;) {
890 Py_ssize_t ix = dk_get_index(mp->ma_keys, i);
891 assert (ix != DKIX_DUMMY);
892 if (ix == DKIX_EMPTY) {
893 *value_addr = NULL;
894 return DKIX_EMPTY;
895 }
896 PyDictKeyEntry *ep = &ep0[ix];
897 assert(ep->me_key != NULL);
898 assert(PyUnicode_CheckExact(ep->me_key));
899 if (ep->me_key == key ||
900 (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
901 *value_addr = mp->ma_values[ix];
902 return ix;
903 }
904 perturb >>= PERTURB_SHIFT;
905 i = mask & (i*5 + perturb + 1);
906 }
907 Py_UNREACHABLE();
908 }
909
910 int
_PyDict_HasOnlyStringKeys(PyObject * dict)911 _PyDict_HasOnlyStringKeys(PyObject *dict)
912 {
913 Py_ssize_t pos = 0;
914 PyObject *key, *value;
915 assert(PyDict_Check(dict));
916 /* Shortcut */
917 if (((PyDictObject *)dict)->ma_keys->dk_lookup != lookdict)
918 return 1;
919 while (PyDict_Next(dict, &pos, &key, &value))
920 if (!PyUnicode_Check(key))
921 return 0;
922 return 1;
923 }
924
925 #define MAINTAIN_TRACKING(mp, key, value) \
926 do { \
927 if (!_PyObject_GC_IS_TRACKED(mp)) { \
928 if (_PyObject_GC_MAY_BE_TRACKED(key) || \
929 _PyObject_GC_MAY_BE_TRACKED(value)) { \
930 _PyObject_GC_TRACK(mp); \
931 } \
932 } \
933 } while(0)
934
935 void
_PyDict_MaybeUntrack(PyObject * op)936 _PyDict_MaybeUntrack(PyObject *op)
937 {
938 PyDictObject *mp;
939 PyObject *value;
940 Py_ssize_t i, numentries;
941 PyDictKeyEntry *ep0;
942
943 if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
944 return;
945
946 mp = (PyDictObject *) op;
947 ep0 = DK_ENTRIES(mp->ma_keys);
948 numentries = mp->ma_keys->dk_nentries;
949 if (_PyDict_HasSplitTable(mp)) {
950 for (i = 0; i < numentries; i++) {
951 if ((value = mp->ma_values[i]) == NULL)
952 continue;
953 if (_PyObject_GC_MAY_BE_TRACKED(value)) {
954 assert(!_PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key));
955 return;
956 }
957 }
958 }
959 else {
960 for (i = 0; i < numentries; i++) {
961 if ((value = ep0[i].me_value) == NULL)
962 continue;
963 if (_PyObject_GC_MAY_BE_TRACKED(value) ||
964 _PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key))
965 return;
966 }
967 }
968 _PyObject_GC_UNTRACK(op);
969 }
970
971 /* Internal function to find slot for an item from its hash
972 when it is known that the key is not present in the dict.
973
974 The dict must be combined. */
975 static Py_ssize_t
find_empty_slot(PyDictKeysObject * keys,Py_hash_t hash)976 find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash)
977 {
978 assert(keys != NULL);
979
980 const size_t mask = DK_MASK(keys);
981 size_t i = hash & mask;
982 Py_ssize_t ix = dk_get_index(keys, i);
983 for (size_t perturb = hash; ix >= 0;) {
984 perturb >>= PERTURB_SHIFT;
985 i = (i*5 + perturb + 1) & mask;
986 ix = dk_get_index(keys, i);
987 }
988 return i;
989 }
990
991 static int
insertion_resize(PyDictObject * mp)992 insertion_resize(PyDictObject *mp)
993 {
994 return dictresize(mp, GROWTH_RATE(mp));
995 }
996
997 /*
998 Internal routine to insert a new item into the table.
999 Used both by the internal resize routine and by the public insert routine.
1000 Returns -1 if an error occurred, or 0 on success.
1001 */
1002 static int
insertdict(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject * value)1003 insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
1004 {
1005 PyObject *old_value;
1006 PyDictKeyEntry *ep;
1007
1008 Py_INCREF(key);
1009 Py_INCREF(value);
1010 if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
1011 if (insertion_resize(mp) < 0)
1012 goto Fail;
1013 }
1014
1015 Py_ssize_t ix = mp->ma_keys->dk_lookup(mp, key, hash, &old_value);
1016 if (ix == DKIX_ERROR)
1017 goto Fail;
1018
1019 assert(PyUnicode_CheckExact(key) || mp->ma_keys->dk_lookup == lookdict);
1020 MAINTAIN_TRACKING(mp, key, value);
1021
1022 /* When insertion order is different from shared key, we can't share
1023 * the key anymore. Convert this instance to combine table.
1024 */
1025 if (_PyDict_HasSplitTable(mp) &&
1026 ((ix >= 0 && old_value == NULL && mp->ma_used != ix) ||
1027 (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
1028 if (insertion_resize(mp) < 0)
1029 goto Fail;
1030 ix = DKIX_EMPTY;
1031 }
1032
1033 if (ix == DKIX_EMPTY) {
1034 /* Insert into new slot. */
1035 assert(old_value == NULL);
1036 if (mp->ma_keys->dk_usable <= 0) {
1037 /* Need to resize. */
1038 if (insertion_resize(mp) < 0)
1039 goto Fail;
1040 }
1041 Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash);
1042 ep = &DK_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries];
1043 dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
1044 ep->me_key = key;
1045 ep->me_hash = hash;
1046 if (mp->ma_values) {
1047 assert (mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
1048 mp->ma_values[mp->ma_keys->dk_nentries] = value;
1049 }
1050 else {
1051 ep->me_value = value;
1052 }
1053 mp->ma_used++;
1054 mp->ma_version_tag = DICT_NEXT_VERSION();
1055 mp->ma_keys->dk_usable--;
1056 mp->ma_keys->dk_nentries++;
1057 assert(mp->ma_keys->dk_usable >= 0);
1058 assert(_PyDict_CheckConsistency(mp));
1059 return 0;
1060 }
1061
1062 if (_PyDict_HasSplitTable(mp)) {
1063 mp->ma_values[ix] = value;
1064 if (old_value == NULL) {
1065 /* pending state */
1066 assert(ix == mp->ma_used);
1067 mp->ma_used++;
1068 }
1069 }
1070 else {
1071 assert(old_value != NULL);
1072 DK_ENTRIES(mp->ma_keys)[ix].me_value = value;
1073 }
1074
1075 mp->ma_version_tag = DICT_NEXT_VERSION();
1076 Py_XDECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
1077 assert(_PyDict_CheckConsistency(mp));
1078 Py_DECREF(key);
1079 return 0;
1080
1081 Fail:
1082 Py_DECREF(value);
1083 Py_DECREF(key);
1084 return -1;
1085 }
1086
1087 /*
1088 Internal routine used by dictresize() to build a hashtable of entries.
1089 */
1090 static void
build_indices(PyDictKeysObject * keys,PyDictKeyEntry * ep,Py_ssize_t n)1091 build_indices(PyDictKeysObject *keys, PyDictKeyEntry *ep, Py_ssize_t n)
1092 {
1093 size_t mask = (size_t)DK_SIZE(keys) - 1;
1094 for (Py_ssize_t ix = 0; ix != n; ix++, ep++) {
1095 Py_hash_t hash = ep->me_hash;
1096 size_t i = hash & mask;
1097 for (size_t perturb = hash; dk_get_index(keys, i) != DKIX_EMPTY;) {
1098 perturb >>= PERTURB_SHIFT;
1099 i = mask & (i*5 + perturb + 1);
1100 }
1101 dk_set_index(keys, i, ix);
1102 }
1103 }
1104
1105 /*
1106 Restructure the table by allocating a new table and reinserting all
1107 items again. When entries have been deleted, the new table may
1108 actually be smaller than the old one.
1109 If a table is split (its keys and hashes are shared, its values are not),
1110 then the values are temporarily copied into the table, it is resized as
1111 a combined table, then the me_value slots in the old table are NULLed out.
1112 After resizing a table is always combined,
1113 but can be resplit by make_keys_shared().
1114 */
1115 static int
dictresize(PyDictObject * mp,Py_ssize_t minsize)1116 dictresize(PyDictObject *mp, Py_ssize_t minsize)
1117 {
1118 Py_ssize_t newsize, numentries;
1119 PyDictKeysObject *oldkeys;
1120 PyObject **oldvalues;
1121 PyDictKeyEntry *oldentries, *newentries;
1122
1123 /* Find the smallest table size > minused. */
1124 for (newsize = PyDict_MINSIZE;
1125 newsize < minsize && newsize > 0;
1126 newsize <<= 1)
1127 ;
1128 if (newsize <= 0) {
1129 PyErr_NoMemory();
1130 return -1;
1131 }
1132
1133 oldkeys = mp->ma_keys;
1134
1135 /* NOTE: Current odict checks mp->ma_keys to detect resize happen.
1136 * So we can't reuse oldkeys even if oldkeys->dk_size == newsize.
1137 * TODO: Try reusing oldkeys when reimplement odict.
1138 */
1139
1140 /* Allocate a new table. */
1141 mp->ma_keys = new_keys_object(newsize);
1142 if (mp->ma_keys == NULL) {
1143 mp->ma_keys = oldkeys;
1144 return -1;
1145 }
1146 // New table must be large enough.
1147 assert(mp->ma_keys->dk_usable >= mp->ma_used);
1148 if (oldkeys->dk_lookup == lookdict)
1149 mp->ma_keys->dk_lookup = lookdict;
1150
1151 numentries = mp->ma_used;
1152 oldentries = DK_ENTRIES(oldkeys);
1153 newentries = DK_ENTRIES(mp->ma_keys);
1154 oldvalues = mp->ma_values;
1155 if (oldvalues != NULL) {
1156 /* Convert split table into new combined table.
1157 * We must incref keys; we can transfer values.
1158 * Note that values of split table is always dense.
1159 */
1160 for (Py_ssize_t i = 0; i < numentries; i++) {
1161 assert(oldvalues[i] != NULL);
1162 PyDictKeyEntry *ep = &oldentries[i];
1163 PyObject *key = ep->me_key;
1164 Py_INCREF(key);
1165 newentries[i].me_key = key;
1166 newentries[i].me_hash = ep->me_hash;
1167 newentries[i].me_value = oldvalues[i];
1168 }
1169
1170 DK_DECREF(oldkeys);
1171 mp->ma_values = NULL;
1172 if (oldvalues != empty_values) {
1173 free_values(oldvalues);
1174 }
1175 }
1176 else { // combined table.
1177 if (oldkeys->dk_nentries == numentries) {
1178 memcpy(newentries, oldentries, numentries * sizeof(PyDictKeyEntry));
1179 }
1180 else {
1181 PyDictKeyEntry *ep = oldentries;
1182 for (Py_ssize_t i = 0; i < numentries; i++) {
1183 while (ep->me_value == NULL)
1184 ep++;
1185 newentries[i] = *ep++;
1186 }
1187 }
1188
1189 assert(oldkeys->dk_lookup != lookdict_split);
1190 assert(oldkeys->dk_refcnt == 1);
1191 if (oldkeys->dk_size == PyDict_MINSIZE &&
1192 numfreekeys < PyDict_MAXFREELIST) {
1193 DK_DEBUG_DECREF keys_free_list[numfreekeys++] = oldkeys;
1194 }
1195 else {
1196 DK_DEBUG_DECREF PyObject_FREE(oldkeys);
1197 }
1198 }
1199
1200 build_indices(mp->ma_keys, newentries, numentries);
1201 mp->ma_keys->dk_usable -= numentries;
1202 mp->ma_keys->dk_nentries = numentries;
1203 return 0;
1204 }
1205
1206 /* Returns NULL if unable to split table.
1207 * A NULL return does not necessarily indicate an error */
1208 static PyDictKeysObject *
make_keys_shared(PyObject * op)1209 make_keys_shared(PyObject *op)
1210 {
1211 Py_ssize_t i;
1212 Py_ssize_t size;
1213 PyDictObject *mp = (PyDictObject *)op;
1214
1215 if (!PyDict_CheckExact(op))
1216 return NULL;
1217 if (!_PyDict_HasSplitTable(mp)) {
1218 PyDictKeyEntry *ep0;
1219 PyObject **values;
1220 assert(mp->ma_keys->dk_refcnt == 1);
1221 if (mp->ma_keys->dk_lookup == lookdict) {
1222 return NULL;
1223 }
1224 else if (mp->ma_keys->dk_lookup == lookdict_unicode) {
1225 /* Remove dummy keys */
1226 if (dictresize(mp, DK_SIZE(mp->ma_keys)))
1227 return NULL;
1228 }
1229 assert(mp->ma_keys->dk_lookup == lookdict_unicode_nodummy);
1230 /* Copy values into a new array */
1231 ep0 = DK_ENTRIES(mp->ma_keys);
1232 size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
1233 values = new_values(size);
1234 if (values == NULL) {
1235 PyErr_SetString(PyExc_MemoryError,
1236 "Not enough memory to allocate new values array");
1237 return NULL;
1238 }
1239 for (i = 0; i < size; i++) {
1240 values[i] = ep0[i].me_value;
1241 ep0[i].me_value = NULL;
1242 }
1243 mp->ma_keys->dk_lookup = lookdict_split;
1244 mp->ma_values = values;
1245 }
1246 DK_INCREF(mp->ma_keys);
1247 return mp->ma_keys;
1248 }
1249
1250 PyObject *
_PyDict_NewPresized(Py_ssize_t minused)1251 _PyDict_NewPresized(Py_ssize_t minused)
1252 {
1253 const Py_ssize_t max_presize = 128 * 1024;
1254 Py_ssize_t newsize;
1255 PyDictKeysObject *new_keys;
1256
1257 /* There are no strict guarantee that returned dict can contain minused
1258 * items without resize. So we create medium size dict instead of very
1259 * large dict or MemoryError.
1260 */
1261 if (minused > USABLE_FRACTION(max_presize)) {
1262 newsize = max_presize;
1263 }
1264 else {
1265 Py_ssize_t minsize = ESTIMATE_SIZE(minused);
1266 newsize = PyDict_MINSIZE;
1267 while (newsize < minsize) {
1268 newsize <<= 1;
1269 }
1270 }
1271 assert(IS_POWER_OF_2(newsize));
1272
1273 new_keys = new_keys_object(newsize);
1274 if (new_keys == NULL)
1275 return NULL;
1276 return new_dict(new_keys, NULL);
1277 }
1278
1279 /* Note that, for historical reasons, PyDict_GetItem() suppresses all errors
1280 * that may occur (originally dicts supported only string keys, and exceptions
1281 * weren't possible). So, while the original intent was that a NULL return
1282 * meant the key wasn't present, in reality it can mean that, or that an error
1283 * (suppressed) occurred while computing the key's hash, or that some error
1284 * (suppressed) occurred when comparing keys in the dict's internal probe
1285 * sequence. A nasty example of the latter is when a Python-coded comparison
1286 * function hits a stack-depth error, which can cause this to return NULL
1287 * even if the key is present.
1288 */
1289 PyObject *
PyDict_GetItem(PyObject * op,PyObject * key)1290 PyDict_GetItem(PyObject *op, PyObject *key)
1291 {
1292 Py_hash_t hash;
1293 Py_ssize_t ix;
1294 PyDictObject *mp = (PyDictObject *)op;
1295 PyThreadState *tstate;
1296 PyObject *value;
1297
1298 if (!PyDict_Check(op))
1299 return NULL;
1300 if (!PyUnicode_CheckExact(key) ||
1301 (hash = ((PyASCIIObject *) key)->hash) == -1)
1302 {
1303 hash = PyObject_Hash(key);
1304 if (hash == -1) {
1305 PyErr_Clear();
1306 return NULL;
1307 }
1308 }
1309
1310 /* We can arrive here with a NULL tstate during initialization: try
1311 running "python -Wi" for an example related to string interning.
1312 Let's just hope that no exception occurs then... This must be
1313 _PyThreadState_Current and not PyThreadState_GET() because in debug
1314 mode, the latter complains if tstate is NULL. */
1315 tstate = PyThreadState_GET();
1316 if (tstate != NULL && tstate->curexc_type != NULL) {
1317 /* preserve the existing exception */
1318 PyObject *err_type, *err_value, *err_tb;
1319 PyErr_Fetch(&err_type, &err_value, &err_tb);
1320 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
1321 /* ignore errors */
1322 PyErr_Restore(err_type, err_value, err_tb);
1323 if (ix < 0)
1324 return NULL;
1325 }
1326 else {
1327 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
1328 if (ix < 0) {
1329 PyErr_Clear();
1330 return NULL;
1331 }
1332 }
1333 return value;
1334 }
1335
1336 /* Same as PyDict_GetItemWithError() but with hash supplied by caller.
1337 This returns NULL *with* an exception set if an exception occurred.
1338 It returns NULL *without* an exception set if the key wasn't present.
1339 */
1340 PyObject *
_PyDict_GetItem_KnownHash(PyObject * op,PyObject * key,Py_hash_t hash)1341 _PyDict_GetItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
1342 {
1343 Py_ssize_t ix;
1344 PyDictObject *mp = (PyDictObject *)op;
1345 PyObject *value;
1346
1347 if (!PyDict_Check(op)) {
1348 PyErr_BadInternalCall();
1349 return NULL;
1350 }
1351
1352 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
1353 if (ix < 0) {
1354 return NULL;
1355 }
1356 return value;
1357 }
1358
1359 /* Variant of PyDict_GetItem() that doesn't suppress exceptions.
1360 This returns NULL *with* an exception set if an exception occurred.
1361 It returns NULL *without* an exception set if the key wasn't present.
1362 */
1363 PyObject *
PyDict_GetItemWithError(PyObject * op,PyObject * key)1364 PyDict_GetItemWithError(PyObject *op, PyObject *key)
1365 {
1366 Py_ssize_t ix;
1367 Py_hash_t hash;
1368 PyDictObject*mp = (PyDictObject *)op;
1369 PyObject *value;
1370
1371 if (!PyDict_Check(op)) {
1372 PyErr_BadInternalCall();
1373 return NULL;
1374 }
1375 if (!PyUnicode_CheckExact(key) ||
1376 (hash = ((PyASCIIObject *) key)->hash) == -1)
1377 {
1378 hash = PyObject_Hash(key);
1379 if (hash == -1) {
1380 return NULL;
1381 }
1382 }
1383
1384 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
1385 if (ix < 0)
1386 return NULL;
1387 return value;
1388 }
1389
1390 PyObject *
_PyDict_GetItemIdWithError(PyObject * dp,struct _Py_Identifier * key)1391 _PyDict_GetItemIdWithError(PyObject *dp, struct _Py_Identifier *key)
1392 {
1393 PyObject *kv;
1394 kv = _PyUnicode_FromId(key); /* borrowed */
1395 if (kv == NULL)
1396 return NULL;
1397 return PyDict_GetItemWithError(dp, kv);
1398 }
1399
1400 /* Fast version of global value lookup (LOAD_GLOBAL).
1401 * Lookup in globals, then builtins.
1402 *
1403 * Raise an exception and return NULL if an error occurred (ex: computing the
1404 * key hash failed, key comparison failed, ...). Return NULL if the key doesn't
1405 * exist. Return the value if the key exists.
1406 */
1407 PyObject *
_PyDict_LoadGlobal(PyDictObject * globals,PyDictObject * builtins,PyObject * key)1408 _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
1409 {
1410 Py_ssize_t ix;
1411 Py_hash_t hash;
1412 PyObject *value;
1413
1414 if (!PyUnicode_CheckExact(key) ||
1415 (hash = ((PyASCIIObject *) key)->hash) == -1)
1416 {
1417 hash = PyObject_Hash(key);
1418 if (hash == -1)
1419 return NULL;
1420 }
1421
1422 /* namespace 1: globals */
1423 ix = globals->ma_keys->dk_lookup(globals, key, hash, &value);
1424 if (ix == DKIX_ERROR)
1425 return NULL;
1426 if (ix != DKIX_EMPTY && value != NULL)
1427 return value;
1428
1429 /* namespace 2: builtins */
1430 ix = builtins->ma_keys->dk_lookup(builtins, key, hash, &value);
1431 if (ix < 0)
1432 return NULL;
1433 return value;
1434 }
1435
1436 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
1437 * dictionary if it's merely replacing the value for an existing key.
1438 * This means that it's safe to loop over a dictionary with PyDict_Next()
1439 * and occasionally replace a value -- but you can't insert new keys or
1440 * remove them.
1441 */
1442 int
PyDict_SetItem(PyObject * op,PyObject * key,PyObject * value)1443 PyDict_SetItem(PyObject *op, PyObject *key, PyObject *value)
1444 {
1445 PyDictObject *mp;
1446 Py_hash_t hash;
1447 if (!PyDict_Check(op)) {
1448 PyErr_BadInternalCall();
1449 return -1;
1450 }
1451 assert(key);
1452 assert(value);
1453 mp = (PyDictObject *)op;
1454 if (!PyUnicode_CheckExact(key) ||
1455 (hash = ((PyASCIIObject *) key)->hash) == -1)
1456 {
1457 hash = PyObject_Hash(key);
1458 if (hash == -1)
1459 return -1;
1460 }
1461
1462 /* insertdict() handles any resizing that might be necessary */
1463 return insertdict(mp, key, hash, value);
1464 }
1465
1466 int
_PyDict_SetItem_KnownHash(PyObject * op,PyObject * key,PyObject * value,Py_hash_t hash)1467 _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value,
1468 Py_hash_t hash)
1469 {
1470 PyDictObject *mp;
1471
1472 if (!PyDict_Check(op)) {
1473 PyErr_BadInternalCall();
1474 return -1;
1475 }
1476 assert(key);
1477 assert(value);
1478 assert(hash != -1);
1479 mp = (PyDictObject *)op;
1480
1481 /* insertdict() handles any resizing that might be necessary */
1482 return insertdict(mp, key, hash, value);
1483 }
1484
1485 static int
delitem_common(PyDictObject * mp,Py_hash_t hash,Py_ssize_t ix,PyObject * old_value)1486 delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
1487 PyObject *old_value)
1488 {
1489 PyObject *old_key;
1490 PyDictKeyEntry *ep;
1491
1492 Py_ssize_t hashpos = lookdict_index(mp->ma_keys, hash, ix);
1493 assert(hashpos >= 0);
1494
1495 mp->ma_used--;
1496 mp->ma_version_tag = DICT_NEXT_VERSION();
1497 ep = &DK_ENTRIES(mp->ma_keys)[ix];
1498 dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
1499 ENSURE_ALLOWS_DELETIONS(mp);
1500 old_key = ep->me_key;
1501 ep->me_key = NULL;
1502 ep->me_value = NULL;
1503 Py_DECREF(old_key);
1504 Py_DECREF(old_value);
1505
1506 assert(_PyDict_CheckConsistency(mp));
1507 return 0;
1508 }
1509
1510 int
PyDict_DelItem(PyObject * op,PyObject * key)1511 PyDict_DelItem(PyObject *op, PyObject *key)
1512 {
1513 Py_hash_t hash;
1514 assert(key);
1515 if (!PyUnicode_CheckExact(key) ||
1516 (hash = ((PyASCIIObject *) key)->hash) == -1) {
1517 hash = PyObject_Hash(key);
1518 if (hash == -1)
1519 return -1;
1520 }
1521
1522 return _PyDict_DelItem_KnownHash(op, key, hash);
1523 }
1524
1525 int
_PyDict_DelItem_KnownHash(PyObject * op,PyObject * key,Py_hash_t hash)1526 _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
1527 {
1528 Py_ssize_t ix;
1529 PyDictObject *mp;
1530 PyObject *old_value;
1531
1532 if (!PyDict_Check(op)) {
1533 PyErr_BadInternalCall();
1534 return -1;
1535 }
1536 assert(key);
1537 assert(hash != -1);
1538 mp = (PyDictObject *)op;
1539 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value);
1540 if (ix == DKIX_ERROR)
1541 return -1;
1542 if (ix == DKIX_EMPTY || old_value == NULL) {
1543 _PyErr_SetKeyError(key);
1544 return -1;
1545 }
1546
1547 // Split table doesn't allow deletion. Combine it.
1548 if (_PyDict_HasSplitTable(mp)) {
1549 if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
1550 return -1;
1551 }
1552 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value);
1553 assert(ix >= 0);
1554 }
1555
1556 return delitem_common(mp, hash, ix, old_value);
1557 }
1558
1559 /* This function promises that the predicate -> deletion sequence is atomic
1560 * (i.e. protected by the GIL), assuming the predicate itself doesn't
1561 * release the GIL.
1562 */
1563 int
_PyDict_DelItemIf(PyObject * op,PyObject * key,int (* predicate)(PyObject * value))1564 _PyDict_DelItemIf(PyObject *op, PyObject *key,
1565 int (*predicate)(PyObject *value))
1566 {
1567 Py_ssize_t hashpos, ix;
1568 PyDictObject *mp;
1569 Py_hash_t hash;
1570 PyObject *old_value;
1571 int res;
1572
1573 if (!PyDict_Check(op)) {
1574 PyErr_BadInternalCall();
1575 return -1;
1576 }
1577 assert(key);
1578 hash = PyObject_Hash(key);
1579 if (hash == -1)
1580 return -1;
1581 mp = (PyDictObject *)op;
1582 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value);
1583 if (ix == DKIX_ERROR)
1584 return -1;
1585 if (ix == DKIX_EMPTY || old_value == NULL) {
1586 _PyErr_SetKeyError(key);
1587 return -1;
1588 }
1589
1590 // Split table doesn't allow deletion. Combine it.
1591 if (_PyDict_HasSplitTable(mp)) {
1592 if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
1593 return -1;
1594 }
1595 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value);
1596 assert(ix >= 0);
1597 }
1598
1599 res = predicate(old_value);
1600 if (res == -1)
1601 return -1;
1602
1603 hashpos = lookdict_index(mp->ma_keys, hash, ix);
1604 assert(hashpos >= 0);
1605
1606 if (res > 0)
1607 return delitem_common(mp, hashpos, ix, old_value);
1608 else
1609 return 0;
1610 }
1611
1612
1613 void
PyDict_Clear(PyObject * op)1614 PyDict_Clear(PyObject *op)
1615 {
1616 PyDictObject *mp;
1617 PyDictKeysObject *oldkeys;
1618 PyObject **oldvalues;
1619 Py_ssize_t i, n;
1620
1621 if (!PyDict_Check(op))
1622 return;
1623 mp = ((PyDictObject *)op);
1624 oldkeys = mp->ma_keys;
1625 oldvalues = mp->ma_values;
1626 if (oldvalues == empty_values)
1627 return;
1628 /* Empty the dict... */
1629 DK_INCREF(Py_EMPTY_KEYS);
1630 mp->ma_keys = Py_EMPTY_KEYS;
1631 mp->ma_values = empty_values;
1632 mp->ma_used = 0;
1633 mp->ma_version_tag = DICT_NEXT_VERSION();
1634 /* ...then clear the keys and values */
1635 if (oldvalues != NULL) {
1636 n = oldkeys->dk_nentries;
1637 for (i = 0; i < n; i++)
1638 Py_CLEAR(oldvalues[i]);
1639 free_values(oldvalues);
1640 DK_DECREF(oldkeys);
1641 }
1642 else {
1643 assert(oldkeys->dk_refcnt == 1);
1644 DK_DECREF(oldkeys);
1645 }
1646 assert(_PyDict_CheckConsistency(mp));
1647 }
1648
1649 /* Internal version of PyDict_Next that returns a hash value in addition
1650 * to the key and value.
1651 * Return 1 on success, return 0 when the reached the end of the dictionary
1652 * (or if op is not a dictionary)
1653 */
1654 int
_PyDict_Next(PyObject * op,Py_ssize_t * ppos,PyObject ** pkey,PyObject ** pvalue,Py_hash_t * phash)1655 _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
1656 PyObject **pvalue, Py_hash_t *phash)
1657 {
1658 Py_ssize_t i;
1659 PyDictObject *mp;
1660 PyDictKeyEntry *entry_ptr;
1661 PyObject *value;
1662
1663 if (!PyDict_Check(op))
1664 return 0;
1665 mp = (PyDictObject *)op;
1666 i = *ppos;
1667 if (mp->ma_values) {
1668 if (i < 0 || i >= mp->ma_used)
1669 return 0;
1670 /* values of split table is always dense */
1671 entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
1672 value = mp->ma_values[i];
1673 assert(value != NULL);
1674 }
1675 else {
1676 Py_ssize_t n = mp->ma_keys->dk_nentries;
1677 if (i < 0 || i >= n)
1678 return 0;
1679 entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
1680 while (i < n && entry_ptr->me_value == NULL) {
1681 entry_ptr++;
1682 i++;
1683 }
1684 if (i >= n)
1685 return 0;
1686 value = entry_ptr->me_value;
1687 }
1688 *ppos = i+1;
1689 if (pkey)
1690 *pkey = entry_ptr->me_key;
1691 if (phash)
1692 *phash = entry_ptr->me_hash;
1693 if (pvalue)
1694 *pvalue = value;
1695 return 1;
1696 }
1697
1698 /*
1699 * Iterate over a dict. Use like so:
1700 *
1701 * Py_ssize_t i;
1702 * PyObject *key, *value;
1703 * i = 0; # important! i should not otherwise be changed by you
1704 * while (PyDict_Next(yourdict, &i, &key, &value)) {
1705 * Refer to borrowed references in key and value.
1706 * }
1707 *
1708 * Return 1 on success, return 0 when the reached the end of the dictionary
1709 * (or if op is not a dictionary)
1710 *
1711 * CAUTION: In general, it isn't safe to use PyDict_Next in a loop that
1712 * mutates the dict. One exception: it is safe if the loop merely changes
1713 * the values associated with the keys (but doesn't insert new keys or
1714 * delete keys), via PyDict_SetItem().
1715 */
1716 int
PyDict_Next(PyObject * op,Py_ssize_t * ppos,PyObject ** pkey,PyObject ** pvalue)1717 PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
1718 {
1719 return _PyDict_Next(op, ppos, pkey, pvalue, NULL);
1720 }
1721
1722 /* Internal version of dict.pop(). */
1723 PyObject *
_PyDict_Pop_KnownHash(PyObject * dict,PyObject * key,Py_hash_t hash,PyObject * deflt)1724 _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt)
1725 {
1726 Py_ssize_t ix, hashpos;
1727 PyObject *old_value, *old_key;
1728 PyDictKeyEntry *ep;
1729 PyDictObject *mp;
1730
1731 assert(PyDict_Check(dict));
1732 mp = (PyDictObject *)dict;
1733
1734 if (mp->ma_used == 0) {
1735 if (deflt) {
1736 Py_INCREF(deflt);
1737 return deflt;
1738 }
1739 _PyErr_SetKeyError(key);
1740 return NULL;
1741 }
1742 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value);
1743 if (ix == DKIX_ERROR)
1744 return NULL;
1745 if (ix == DKIX_EMPTY || old_value == NULL) {
1746 if (deflt) {
1747 Py_INCREF(deflt);
1748 return deflt;
1749 }
1750 _PyErr_SetKeyError(key);
1751 return NULL;
1752 }
1753
1754 // Split table doesn't allow deletion. Combine it.
1755 if (_PyDict_HasSplitTable(mp)) {
1756 if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
1757 return NULL;
1758 }
1759 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value);
1760 assert(ix >= 0);
1761 }
1762
1763 hashpos = lookdict_index(mp->ma_keys, hash, ix);
1764 assert(hashpos >= 0);
1765 assert(old_value != NULL);
1766 mp->ma_used--;
1767 mp->ma_version_tag = DICT_NEXT_VERSION();
1768 dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
1769 ep = &DK_ENTRIES(mp->ma_keys)[ix];
1770 ENSURE_ALLOWS_DELETIONS(mp);
1771 old_key = ep->me_key;
1772 ep->me_key = NULL;
1773 ep->me_value = NULL;
1774 Py_DECREF(old_key);
1775
1776 assert(_PyDict_CheckConsistency(mp));
1777 return old_value;
1778 }
1779
1780 PyObject *
_PyDict_Pop(PyObject * dict,PyObject * key,PyObject * deflt)1781 _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt)
1782 {
1783 Py_hash_t hash;
1784
1785 if (((PyDictObject *)dict)->ma_used == 0) {
1786 if (deflt) {
1787 Py_INCREF(deflt);
1788 return deflt;
1789 }
1790 _PyErr_SetKeyError(key);
1791 return NULL;
1792 }
1793 if (!PyUnicode_CheckExact(key) ||
1794 (hash = ((PyASCIIObject *) key)->hash) == -1) {
1795 hash = PyObject_Hash(key);
1796 if (hash == -1)
1797 return NULL;
1798 }
1799 return _PyDict_Pop_KnownHash(dict, key, hash, deflt);
1800 }
1801
1802 /* Internal version of dict.from_keys(). It is subclass-friendly. */
1803 PyObject *
_PyDict_FromKeys(PyObject * cls,PyObject * iterable,PyObject * value)1804 _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
1805 {
1806 PyObject *it; /* iter(iterable) */
1807 PyObject *key;
1808 PyObject *d;
1809 int status;
1810
1811 d = _PyObject_CallNoArg(cls);
1812 if (d == NULL)
1813 return NULL;
1814
1815 if (PyDict_CheckExact(d) && ((PyDictObject *)d)->ma_used == 0) {
1816 if (PyDict_CheckExact(iterable)) {
1817 PyDictObject *mp = (PyDictObject *)d;
1818 PyObject *oldvalue;
1819 Py_ssize_t pos = 0;
1820 PyObject *key;
1821 Py_hash_t hash;
1822
1823 if (dictresize(mp, ESTIMATE_SIZE(PyDict_GET_SIZE(iterable)))) {
1824 Py_DECREF(d);
1825 return NULL;
1826 }
1827
1828 while (_PyDict_Next(iterable, &pos, &key, &oldvalue, &hash)) {
1829 if (insertdict(mp, key, hash, value)) {
1830 Py_DECREF(d);
1831 return NULL;
1832 }
1833 }
1834 return d;
1835 }
1836 if (PyAnySet_CheckExact(iterable)) {
1837 PyDictObject *mp = (PyDictObject *)d;
1838 Py_ssize_t pos = 0;
1839 PyObject *key;
1840 Py_hash_t hash;
1841
1842 if (dictresize(mp, ESTIMATE_SIZE(PySet_GET_SIZE(iterable)))) {
1843 Py_DECREF(d);
1844 return NULL;
1845 }
1846
1847 while (_PySet_NextEntry(iterable, &pos, &key, &hash)) {
1848 if (insertdict(mp, key, hash, value)) {
1849 Py_DECREF(d);
1850 return NULL;
1851 }
1852 }
1853 return d;
1854 }
1855 }
1856
1857 it = PyObject_GetIter(iterable);
1858 if (it == NULL){
1859 Py_DECREF(d);
1860 return NULL;
1861 }
1862
1863 if (PyDict_CheckExact(d)) {
1864 while ((key = PyIter_Next(it)) != NULL) {
1865 status = PyDict_SetItem(d, key, value);
1866 Py_DECREF(key);
1867 if (status < 0)
1868 goto Fail;
1869 }
1870 } else {
1871 while ((key = PyIter_Next(it)) != NULL) {
1872 status = PyObject_SetItem(d, key, value);
1873 Py_DECREF(key);
1874 if (status < 0)
1875 goto Fail;
1876 }
1877 }
1878
1879 if (PyErr_Occurred())
1880 goto Fail;
1881 Py_DECREF(it);
1882 return d;
1883
1884 Fail:
1885 Py_DECREF(it);
1886 Py_DECREF(d);
1887 return NULL;
1888 }
1889
1890 /* Methods */
1891
1892 static void
dict_dealloc(PyDictObject * mp)1893 dict_dealloc(PyDictObject *mp)
1894 {
1895 PyObject **values = mp->ma_values;
1896 PyDictKeysObject *keys = mp->ma_keys;
1897 Py_ssize_t i, n;
1898
1899 /* bpo-31095: UnTrack is needed before calling any callbacks */
1900 PyObject_GC_UnTrack(mp);
1901 Py_TRASHCAN_SAFE_BEGIN(mp)
1902 if (values != NULL) {
1903 if (values != empty_values) {
1904 for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
1905 Py_XDECREF(values[i]);
1906 }
1907 free_values(values);
1908 }
1909 DK_DECREF(keys);
1910 }
1911 else if (keys != NULL) {
1912 assert(keys->dk_refcnt == 1);
1913 DK_DECREF(keys);
1914 }
1915 if (numfree < PyDict_MAXFREELIST && Py_TYPE(mp) == &PyDict_Type)
1916 free_list[numfree++] = mp;
1917 else
1918 Py_TYPE(mp)->tp_free((PyObject *)mp);
1919 Py_TRASHCAN_SAFE_END(mp)
1920 }
1921
1922
1923 static PyObject *
dict_repr(PyDictObject * mp)1924 dict_repr(PyDictObject *mp)
1925 {
1926 Py_ssize_t i;
1927 PyObject *key = NULL, *value = NULL;
1928 _PyUnicodeWriter writer;
1929 int first;
1930
1931 i = Py_ReprEnter((PyObject *)mp);
1932 if (i != 0) {
1933 return i > 0 ? PyUnicode_FromString("{...}") : NULL;
1934 }
1935
1936 if (mp->ma_used == 0) {
1937 Py_ReprLeave((PyObject *)mp);
1938 return PyUnicode_FromString("{}");
1939 }
1940
1941 _PyUnicodeWriter_Init(&writer);
1942 writer.overallocate = 1;
1943 /* "{" + "1: 2" + ", 3: 4" * (len - 1) + "}" */
1944 writer.min_length = 1 + 4 + (2 + 4) * (mp->ma_used - 1) + 1;
1945
1946 if (_PyUnicodeWriter_WriteChar(&writer, '{') < 0)
1947 goto error;
1948
1949 /* Do repr() on each key+value pair, and insert ": " between them.
1950 Note that repr may mutate the dict. */
1951 i = 0;
1952 first = 1;
1953 while (PyDict_Next((PyObject *)mp, &i, &key, &value)) {
1954 PyObject *s;
1955 int res;
1956
1957 /* Prevent repr from deleting key or value during key format. */
1958 Py_INCREF(key);
1959 Py_INCREF(value);
1960
1961 if (!first) {
1962 if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0)
1963 goto error;
1964 }
1965 first = 0;
1966
1967 s = PyObject_Repr(key);
1968 if (s == NULL)
1969 goto error;
1970 res = _PyUnicodeWriter_WriteStr(&writer, s);
1971 Py_DECREF(s);
1972 if (res < 0)
1973 goto error;
1974
1975 if (_PyUnicodeWriter_WriteASCIIString(&writer, ": ", 2) < 0)
1976 goto error;
1977
1978 s = PyObject_Repr(value);
1979 if (s == NULL)
1980 goto error;
1981 res = _PyUnicodeWriter_WriteStr(&writer, s);
1982 Py_DECREF(s);
1983 if (res < 0)
1984 goto error;
1985
1986 Py_CLEAR(key);
1987 Py_CLEAR(value);
1988 }
1989
1990 writer.overallocate = 0;
1991 if (_PyUnicodeWriter_WriteChar(&writer, '}') < 0)
1992 goto error;
1993
1994 Py_ReprLeave((PyObject *)mp);
1995
1996 return _PyUnicodeWriter_Finish(&writer);
1997
1998 error:
1999 Py_ReprLeave((PyObject *)mp);
2000 _PyUnicodeWriter_Dealloc(&writer);
2001 Py_XDECREF(key);
2002 Py_XDECREF(value);
2003 return NULL;
2004 }
2005
2006 static Py_ssize_t
dict_length(PyDictObject * mp)2007 dict_length(PyDictObject *mp)
2008 {
2009 return mp->ma_used;
2010 }
2011
2012 static PyObject *
dict_subscript(PyDictObject * mp,PyObject * key)2013 dict_subscript(PyDictObject *mp, PyObject *key)
2014 {
2015 Py_ssize_t ix;
2016 Py_hash_t hash;
2017 PyObject *value;
2018
2019 if (!PyUnicode_CheckExact(key) ||
2020 (hash = ((PyASCIIObject *) key)->hash) == -1) {
2021 hash = PyObject_Hash(key);
2022 if (hash == -1)
2023 return NULL;
2024 }
2025 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
2026 if (ix == DKIX_ERROR)
2027 return NULL;
2028 if (ix == DKIX_EMPTY || value == NULL) {
2029 if (!PyDict_CheckExact(mp)) {
2030 /* Look up __missing__ method if we're a subclass. */
2031 PyObject *missing, *res;
2032 _Py_IDENTIFIER(__missing__);
2033 missing = _PyObject_LookupSpecial((PyObject *)mp, &PyId___missing__);
2034 if (missing != NULL) {
2035 res = PyObject_CallFunctionObjArgs(missing,
2036 key, NULL);
2037 Py_DECREF(missing);
2038 return res;
2039 }
2040 else if (PyErr_Occurred())
2041 return NULL;
2042 }
2043 _PyErr_SetKeyError(key);
2044 return NULL;
2045 }
2046 Py_INCREF(value);
2047 return value;
2048 }
2049
2050 static int
dict_ass_sub(PyDictObject * mp,PyObject * v,PyObject * w)2051 dict_ass_sub(PyDictObject *mp, PyObject *v, PyObject *w)
2052 {
2053 if (w == NULL)
2054 return PyDict_DelItem((PyObject *)mp, v);
2055 else
2056 return PyDict_SetItem((PyObject *)mp, v, w);
2057 }
2058
2059 static PyMappingMethods dict_as_mapping = {
2060 (lenfunc)dict_length, /*mp_length*/
2061 (binaryfunc)dict_subscript, /*mp_subscript*/
2062 (objobjargproc)dict_ass_sub, /*mp_ass_subscript*/
2063 };
2064
2065 static PyObject *
dict_keys(PyDictObject * mp)2066 dict_keys(PyDictObject *mp)
2067 {
2068 PyObject *v;
2069 Py_ssize_t i, j;
2070 PyDictKeyEntry *ep;
2071 Py_ssize_t size, n, offset;
2072 PyObject **value_ptr;
2073
2074 again:
2075 n = mp->ma_used;
2076 v = PyList_New(n);
2077 if (v == NULL)
2078 return NULL;
2079 if (n != mp->ma_used) {
2080 /* Durnit. The allocations caused the dict to resize.
2081 * Just start over, this shouldn't normally happen.
2082 */
2083 Py_DECREF(v);
2084 goto again;
2085 }
2086 ep = DK_ENTRIES(mp->ma_keys);
2087 size = mp->ma_keys->dk_nentries;
2088 if (mp->ma_values) {
2089 value_ptr = mp->ma_values;
2090 offset = sizeof(PyObject *);
2091 }
2092 else {
2093 value_ptr = &ep[0].me_value;
2094 offset = sizeof(PyDictKeyEntry);
2095 }
2096 for (i = 0, j = 0; i < size; i++) {
2097 if (*value_ptr != NULL) {
2098 PyObject *key = ep[i].me_key;
2099 Py_INCREF(key);
2100 PyList_SET_ITEM(v, j, key);
2101 j++;
2102 }
2103 value_ptr = (PyObject **)(((char *)value_ptr) + offset);
2104 }
2105 assert(j == n);
2106 return v;
2107 }
2108
2109 static PyObject *
dict_values(PyDictObject * mp)2110 dict_values(PyDictObject *mp)
2111 {
2112 PyObject *v;
2113 Py_ssize_t i, j;
2114 PyDictKeyEntry *ep;
2115 Py_ssize_t size, n, offset;
2116 PyObject **value_ptr;
2117
2118 again:
2119 n = mp->ma_used;
2120 v = PyList_New(n);
2121 if (v == NULL)
2122 return NULL;
2123 if (n != mp->ma_used) {
2124 /* Durnit. The allocations caused the dict to resize.
2125 * Just start over, this shouldn't normally happen.
2126 */
2127 Py_DECREF(v);
2128 goto again;
2129 }
2130 ep = DK_ENTRIES(mp->ma_keys);
2131 size = mp->ma_keys->dk_nentries;
2132 if (mp->ma_values) {
2133 value_ptr = mp->ma_values;
2134 offset = sizeof(PyObject *);
2135 }
2136 else {
2137 value_ptr = &ep[0].me_value;
2138 offset = sizeof(PyDictKeyEntry);
2139 }
2140 for (i = 0, j = 0; i < size; i++) {
2141 PyObject *value = *value_ptr;
2142 value_ptr = (PyObject **)(((char *)value_ptr) + offset);
2143 if (value != NULL) {
2144 Py_INCREF(value);
2145 PyList_SET_ITEM(v, j, value);
2146 j++;
2147 }
2148 }
2149 assert(j == n);
2150 return v;
2151 }
2152
2153 static PyObject *
dict_items(PyDictObject * mp)2154 dict_items(PyDictObject *mp)
2155 {
2156 PyObject *v;
2157 Py_ssize_t i, j, n;
2158 Py_ssize_t size, offset;
2159 PyObject *item, *key;
2160 PyDictKeyEntry *ep;
2161 PyObject **value_ptr;
2162
2163 /* Preallocate the list of tuples, to avoid allocations during
2164 * the loop over the items, which could trigger GC, which
2165 * could resize the dict. :-(
2166 */
2167 again:
2168 n = mp->ma_used;
2169 v = PyList_New(n);
2170 if (v == NULL)
2171 return NULL;
2172 for (i = 0; i < n; i++) {
2173 item = PyTuple_New(2);
2174 if (item == NULL) {
2175 Py_DECREF(v);
2176 return NULL;
2177 }
2178 PyList_SET_ITEM(v, i, item);
2179 }
2180 if (n != mp->ma_used) {
2181 /* Durnit. The allocations caused the dict to resize.
2182 * Just start over, this shouldn't normally happen.
2183 */
2184 Py_DECREF(v);
2185 goto again;
2186 }
2187 /* Nothing we do below makes any function calls. */
2188 ep = DK_ENTRIES(mp->ma_keys);
2189 size = mp->ma_keys->dk_nentries;
2190 if (mp->ma_values) {
2191 value_ptr = mp->ma_values;
2192 offset = sizeof(PyObject *);
2193 }
2194 else {
2195 value_ptr = &ep[0].me_value;
2196 offset = sizeof(PyDictKeyEntry);
2197 }
2198 for (i = 0, j = 0; i < size; i++) {
2199 PyObject *value = *value_ptr;
2200 value_ptr = (PyObject **)(((char *)value_ptr) + offset);
2201 if (value != NULL) {
2202 key = ep[i].me_key;
2203 item = PyList_GET_ITEM(v, j);
2204 Py_INCREF(key);
2205 PyTuple_SET_ITEM(item, 0, key);
2206 Py_INCREF(value);
2207 PyTuple_SET_ITEM(item, 1, value);
2208 j++;
2209 }
2210 }
2211 assert(j == n);
2212 return v;
2213 }
2214
2215 /*[clinic input]
2216 @classmethod
2217 dict.fromkeys
2218 iterable: object
2219 value: object=None
2220 /
2221
2222 Create a new dictionary with keys from iterable and values set to value.
2223 [clinic start generated code]*/
2224
2225 static PyObject *
dict_fromkeys_impl(PyTypeObject * type,PyObject * iterable,PyObject * value)2226 dict_fromkeys_impl(PyTypeObject *type, PyObject *iterable, PyObject *value)
2227 /*[clinic end generated code: output=8fb98e4b10384999 input=382ba4855d0f74c3]*/
2228 {
2229 return _PyDict_FromKeys((PyObject *)type, iterable, value);
2230 }
2231
2232 static int
dict_update_common(PyObject * self,PyObject * args,PyObject * kwds,const char * methname)2233 dict_update_common(PyObject *self, PyObject *args, PyObject *kwds,
2234 const char *methname)
2235 {
2236 PyObject *arg = NULL;
2237 int result = 0;
2238
2239 if (!PyArg_UnpackTuple(args, methname, 0, 1, &arg)) {
2240 result = -1;
2241 }
2242 else if (arg != NULL) {
2243 _Py_IDENTIFIER(keys);
2244 PyObject *func;
2245 if (_PyObject_LookupAttrId(arg, &PyId_keys, &func) < 0) {
2246 result = -1;
2247 }
2248 else if (func != NULL) {
2249 Py_DECREF(func);
2250 result = PyDict_Merge(self, arg, 1);
2251 }
2252 else {
2253 result = PyDict_MergeFromSeq2(self, arg, 1);
2254 }
2255 }
2256
2257 if (result == 0 && kwds != NULL) {
2258 if (PyArg_ValidateKeywordArguments(kwds))
2259 result = PyDict_Merge(self, kwds, 1);
2260 else
2261 result = -1;
2262 }
2263 return result;
2264 }
2265
2266 /* Note: dict.update() uses the METH_VARARGS|METH_KEYWORDS calling convention.
2267 Using METH_FASTCALL|METH_KEYWORDS would make dict.update(**dict2) calls
2268 slower, see the issue #29312. */
2269 static PyObject *
dict_update(PyObject * self,PyObject * args,PyObject * kwds)2270 dict_update(PyObject *self, PyObject *args, PyObject *kwds)
2271 {
2272 if (dict_update_common(self, args, kwds, "update") != -1)
2273 Py_RETURN_NONE;
2274 return NULL;
2275 }
2276
2277 /* Update unconditionally replaces existing items.
2278 Merge has a 3rd argument 'override'; if set, it acts like Update,
2279 otherwise it leaves existing items unchanged.
2280
2281 PyDict_{Update,Merge} update/merge from a mapping object.
2282
2283 PyDict_MergeFromSeq2 updates/merges from any iterable object
2284 producing iterable objects of length 2.
2285 */
2286
2287 int
PyDict_MergeFromSeq2(PyObject * d,PyObject * seq2,int override)2288 PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
2289 {
2290 PyObject *it; /* iter(seq2) */
2291 Py_ssize_t i; /* index into seq2 of current element */
2292 PyObject *item; /* seq2[i] */
2293 PyObject *fast; /* item as a 2-tuple or 2-list */
2294
2295 assert(d != NULL);
2296 assert(PyDict_Check(d));
2297 assert(seq2 != NULL);
2298
2299 it = PyObject_GetIter(seq2);
2300 if (it == NULL)
2301 return -1;
2302
2303 for (i = 0; ; ++i) {
2304 PyObject *key, *value;
2305 Py_ssize_t n;
2306
2307 fast = NULL;
2308 item = PyIter_Next(it);
2309 if (item == NULL) {
2310 if (PyErr_Occurred())
2311 goto Fail;
2312 break;
2313 }
2314
2315 /* Convert item to sequence, and verify length 2. */
2316 fast = PySequence_Fast(item, "");
2317 if (fast == NULL) {
2318 if (PyErr_ExceptionMatches(PyExc_TypeError))
2319 PyErr_Format(PyExc_TypeError,
2320 "cannot convert dictionary update "
2321 "sequence element #%zd to a sequence",
2322 i);
2323 goto Fail;
2324 }
2325 n = PySequence_Fast_GET_SIZE(fast);
2326 if (n != 2) {
2327 PyErr_Format(PyExc_ValueError,
2328 "dictionary update sequence element #%zd "
2329 "has length %zd; 2 is required",
2330 i, n);
2331 goto Fail;
2332 }
2333
2334 /* Update/merge with this (key, value) pair. */
2335 key = PySequence_Fast_GET_ITEM(fast, 0);
2336 value = PySequence_Fast_GET_ITEM(fast, 1);
2337 Py_INCREF(key);
2338 Py_INCREF(value);
2339 if (override || PyDict_GetItem(d, key) == NULL) {
2340 int status = PyDict_SetItem(d, key, value);
2341 if (status < 0) {
2342 Py_DECREF(key);
2343 Py_DECREF(value);
2344 goto Fail;
2345 }
2346 }
2347 Py_DECREF(key);
2348 Py_DECREF(value);
2349 Py_DECREF(fast);
2350 Py_DECREF(item);
2351 }
2352
2353 i = 0;
2354 assert(_PyDict_CheckConsistency((PyDictObject *)d));
2355 goto Return;
2356 Fail:
2357 Py_XDECREF(item);
2358 Py_XDECREF(fast);
2359 i = -1;
2360 Return:
2361 Py_DECREF(it);
2362 return Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
2363 }
2364
2365 static int
dict_merge(PyObject * a,PyObject * b,int override)2366 dict_merge(PyObject *a, PyObject *b, int override)
2367 {
2368 PyDictObject *mp, *other;
2369 Py_ssize_t i, n;
2370 PyDictKeyEntry *entry, *ep0;
2371
2372 assert(0 <= override && override <= 2);
2373
2374 /* We accept for the argument either a concrete dictionary object,
2375 * or an abstract "mapping" object. For the former, we can do
2376 * things quite efficiently. For the latter, we only require that
2377 * PyMapping_Keys() and PyObject_GetItem() be supported.
2378 */
2379 if (a == NULL || !PyDict_Check(a) || b == NULL) {
2380 PyErr_BadInternalCall();
2381 return -1;
2382 }
2383 mp = (PyDictObject*)a;
2384 if (PyDict_Check(b) && (Py_TYPE(b)->tp_iter == (getiterfunc)dict_iter)) {
2385 other = (PyDictObject*)b;
2386 if (other == mp || other->ma_used == 0)
2387 /* a.update(a) or a.update({}); nothing to do */
2388 return 0;
2389 if (mp->ma_used == 0)
2390 /* Since the target dict is empty, PyDict_GetItem()
2391 * always returns NULL. Setting override to 1
2392 * skips the unnecessary test.
2393 */
2394 override = 1;
2395 /* Do one big resize at the start, rather than
2396 * incrementally resizing as we insert new items. Expect
2397 * that there will be no (or few) overlapping keys.
2398 */
2399 if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) {
2400 if (dictresize(mp, ESTIMATE_SIZE(mp->ma_used + other->ma_used))) {
2401 return -1;
2402 }
2403 }
2404 ep0 = DK_ENTRIES(other->ma_keys);
2405 for (i = 0, n = other->ma_keys->dk_nentries; i < n; i++) {
2406 PyObject *key, *value;
2407 Py_hash_t hash;
2408 entry = &ep0[i];
2409 key = entry->me_key;
2410 hash = entry->me_hash;
2411 if (other->ma_values)
2412 value = other->ma_values[i];
2413 else
2414 value = entry->me_value;
2415
2416 if (value != NULL) {
2417 int err = 0;
2418 Py_INCREF(key);
2419 Py_INCREF(value);
2420 if (override == 1)
2421 err = insertdict(mp, key, hash, value);
2422 else if (_PyDict_GetItem_KnownHash(a, key, hash) == NULL) {
2423 if (PyErr_Occurred()) {
2424 Py_DECREF(value);
2425 Py_DECREF(key);
2426 return -1;
2427 }
2428 err = insertdict(mp, key, hash, value);
2429 }
2430 else if (override != 0) {
2431 _PyErr_SetKeyError(key);
2432 Py_DECREF(value);
2433 Py_DECREF(key);
2434 return -1;
2435 }
2436 Py_DECREF(value);
2437 Py_DECREF(key);
2438 if (err != 0)
2439 return -1;
2440
2441 if (n != other->ma_keys->dk_nentries) {
2442 PyErr_SetString(PyExc_RuntimeError,
2443 "dict mutated during update");
2444 return -1;
2445 }
2446 }
2447 }
2448 }
2449 else {
2450 /* Do it the generic, slower way */
2451 PyObject *keys = PyMapping_Keys(b);
2452 PyObject *iter;
2453 PyObject *key, *value;
2454 int status;
2455
2456 if (keys == NULL)
2457 /* Docstring says this is equivalent to E.keys() so
2458 * if E doesn't have a .keys() method we want
2459 * AttributeError to percolate up. Might as well
2460 * do the same for any other error.
2461 */
2462 return -1;
2463
2464 iter = PyObject_GetIter(keys);
2465 Py_DECREF(keys);
2466 if (iter == NULL)
2467 return -1;
2468
2469 for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) {
2470 if (override != 1 && PyDict_GetItem(a, key) != NULL) {
2471 if (override != 0) {
2472 _PyErr_SetKeyError(key);
2473 Py_DECREF(key);
2474 Py_DECREF(iter);
2475 return -1;
2476 }
2477 Py_DECREF(key);
2478 continue;
2479 }
2480 value = PyObject_GetItem(b, key);
2481 if (value == NULL) {
2482 Py_DECREF(iter);
2483 Py_DECREF(key);
2484 return -1;
2485 }
2486 status = PyDict_SetItem(a, key, value);
2487 Py_DECREF(key);
2488 Py_DECREF(value);
2489 if (status < 0) {
2490 Py_DECREF(iter);
2491 return -1;
2492 }
2493 }
2494 Py_DECREF(iter);
2495 if (PyErr_Occurred())
2496 /* Iterator completed, via error */
2497 return -1;
2498 }
2499 assert(_PyDict_CheckConsistency((PyDictObject *)a));
2500 return 0;
2501 }
2502
2503 int
PyDict_Update(PyObject * a,PyObject * b)2504 PyDict_Update(PyObject *a, PyObject *b)
2505 {
2506 return dict_merge(a, b, 1);
2507 }
2508
2509 int
PyDict_Merge(PyObject * a,PyObject * b,int override)2510 PyDict_Merge(PyObject *a, PyObject *b, int override)
2511 {
2512 /* XXX Deprecate override not in (0, 1). */
2513 return dict_merge(a, b, override != 0);
2514 }
2515
2516 int
_PyDict_MergeEx(PyObject * a,PyObject * b,int override)2517 _PyDict_MergeEx(PyObject *a, PyObject *b, int override)
2518 {
2519 return dict_merge(a, b, override);
2520 }
2521
2522 static PyObject *
dict_copy(PyDictObject * mp)2523 dict_copy(PyDictObject *mp)
2524 {
2525 return PyDict_Copy((PyObject*)mp);
2526 }
2527
2528 PyObject *
PyDict_Copy(PyObject * o)2529 PyDict_Copy(PyObject *o)
2530 {
2531 PyObject *copy;
2532 PyDictObject *mp;
2533 Py_ssize_t i, n;
2534
2535 if (o == NULL || !PyDict_Check(o)) {
2536 PyErr_BadInternalCall();
2537 return NULL;
2538 }
2539
2540 mp = (PyDictObject *)o;
2541 if (mp->ma_used == 0) {
2542 /* The dict is empty; just return a new dict. */
2543 return PyDict_New();
2544 }
2545
2546 if (_PyDict_HasSplitTable(mp)) {
2547 PyDictObject *split_copy;
2548 Py_ssize_t size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
2549 PyObject **newvalues;
2550 newvalues = new_values(size);
2551 if (newvalues == NULL)
2552 return PyErr_NoMemory();
2553 split_copy = PyObject_GC_New(PyDictObject, &PyDict_Type);
2554 if (split_copy == NULL) {
2555 free_values(newvalues);
2556 return NULL;
2557 }
2558 split_copy->ma_values = newvalues;
2559 split_copy->ma_keys = mp->ma_keys;
2560 split_copy->ma_used = mp->ma_used;
2561 split_copy->ma_version_tag = DICT_NEXT_VERSION();
2562 DK_INCREF(mp->ma_keys);
2563 for (i = 0, n = size; i < n; i++) {
2564 PyObject *value = mp->ma_values[i];
2565 Py_XINCREF(value);
2566 split_copy->ma_values[i] = value;
2567 }
2568 if (_PyObject_GC_IS_TRACKED(mp))
2569 _PyObject_GC_TRACK(split_copy);
2570 return (PyObject *)split_copy;
2571 }
2572
2573 if (PyDict_CheckExact(mp) && mp->ma_values == NULL &&
2574 (mp->ma_used >= (mp->ma_keys->dk_nentries * 2) / 3))
2575 {
2576 /* Use fast-copy if:
2577
2578 (1) 'mp' is an instance of a subclassed dict; and
2579
2580 (2) 'mp' is not a split-dict; and
2581
2582 (3) if 'mp' is non-compact ('del' operation does not resize dicts),
2583 do fast-copy only if it has at most 1/3 non-used keys.
2584
2585 The last condition (3) is important to guard against a pathological
2586 case when a large dict is almost emptied with multiple del/pop
2587 operations and copied after that. In cases like this, we defer to
2588 PyDict_Merge, which produces a compacted copy.
2589 */
2590 return clone_combined_dict(mp);
2591 }
2592
2593 copy = PyDict_New();
2594 if (copy == NULL)
2595 return NULL;
2596 if (PyDict_Merge(copy, o, 1) == 0)
2597 return copy;
2598 Py_DECREF(copy);
2599 return NULL;
2600 }
2601
2602 Py_ssize_t
PyDict_Size(PyObject * mp)2603 PyDict_Size(PyObject *mp)
2604 {
2605 if (mp == NULL || !PyDict_Check(mp)) {
2606 PyErr_BadInternalCall();
2607 return -1;
2608 }
2609 return ((PyDictObject *)mp)->ma_used;
2610 }
2611
2612 PyObject *
PyDict_Keys(PyObject * mp)2613 PyDict_Keys(PyObject *mp)
2614 {
2615 if (mp == NULL || !PyDict_Check(mp)) {
2616 PyErr_BadInternalCall();
2617 return NULL;
2618 }
2619 return dict_keys((PyDictObject *)mp);
2620 }
2621
2622 PyObject *
PyDict_Values(PyObject * mp)2623 PyDict_Values(PyObject *mp)
2624 {
2625 if (mp == NULL || !PyDict_Check(mp)) {
2626 PyErr_BadInternalCall();
2627 return NULL;
2628 }
2629 return dict_values((PyDictObject *)mp);
2630 }
2631
2632 PyObject *
PyDict_Items(PyObject * mp)2633 PyDict_Items(PyObject *mp)
2634 {
2635 if (mp == NULL || !PyDict_Check(mp)) {
2636 PyErr_BadInternalCall();
2637 return NULL;
2638 }
2639 return dict_items((PyDictObject *)mp);
2640 }
2641
2642 /* Return 1 if dicts equal, 0 if not, -1 if error.
2643 * Gets out as soon as any difference is detected.
2644 * Uses only Py_EQ comparison.
2645 */
2646 static int
dict_equal(PyDictObject * a,PyDictObject * b)2647 dict_equal(PyDictObject *a, PyDictObject *b)
2648 {
2649 Py_ssize_t i;
2650
2651 if (a->ma_used != b->ma_used)
2652 /* can't be equal if # of entries differ */
2653 return 0;
2654 /* Same # of entries -- check all of 'em. Exit early on any diff. */
2655 for (i = 0; i < a->ma_keys->dk_nentries; i++) {
2656 PyDictKeyEntry *ep = &DK_ENTRIES(a->ma_keys)[i];
2657 PyObject *aval;
2658 if (a->ma_values)
2659 aval = a->ma_values[i];
2660 else
2661 aval = ep->me_value;
2662 if (aval != NULL) {
2663 int cmp;
2664 PyObject *bval;
2665 PyObject *key = ep->me_key;
2666 /* temporarily bump aval's refcount to ensure it stays
2667 alive until we're done with it */
2668 Py_INCREF(aval);
2669 /* ditto for key */
2670 Py_INCREF(key);
2671 /* reuse the known hash value */
2672 b->ma_keys->dk_lookup(b, key, ep->me_hash, &bval);
2673 if (bval == NULL) {
2674 Py_DECREF(key);
2675 Py_DECREF(aval);
2676 if (PyErr_Occurred())
2677 return -1;
2678 return 0;
2679 }
2680 cmp = PyObject_RichCompareBool(aval, bval, Py_EQ);
2681 Py_DECREF(key);
2682 Py_DECREF(aval);
2683 if (cmp <= 0) /* error or not equal */
2684 return cmp;
2685 }
2686 }
2687 return 1;
2688 }
2689
2690 static PyObject *
dict_richcompare(PyObject * v,PyObject * w,int op)2691 dict_richcompare(PyObject *v, PyObject *w, int op)
2692 {
2693 int cmp;
2694 PyObject *res;
2695
2696 if (!PyDict_Check(v) || !PyDict_Check(w)) {
2697 res = Py_NotImplemented;
2698 }
2699 else if (op == Py_EQ || op == Py_NE) {
2700 cmp = dict_equal((PyDictObject *)v, (PyDictObject *)w);
2701 if (cmp < 0)
2702 return NULL;
2703 res = (cmp == (op == Py_EQ)) ? Py_True : Py_False;
2704 }
2705 else
2706 res = Py_NotImplemented;
2707 Py_INCREF(res);
2708 return res;
2709 }
2710
2711 /*[clinic input]
2712
2713 @coexist
2714 dict.__contains__
2715
2716 key: object
2717 /
2718
2719 True if the dictionary has the specified key, else False.
2720 [clinic start generated code]*/
2721
2722 static PyObject *
dict___contains__(PyDictObject * self,PyObject * key)2723 dict___contains__(PyDictObject *self, PyObject *key)
2724 /*[clinic end generated code: output=a3d03db709ed6e6b input=fe1cb42ad831e820]*/
2725 {
2726 register PyDictObject *mp = self;
2727 Py_hash_t hash;
2728 Py_ssize_t ix;
2729 PyObject *value;
2730
2731 if (!PyUnicode_CheckExact(key) ||
2732 (hash = ((PyASCIIObject *) key)->hash) == -1) {
2733 hash = PyObject_Hash(key);
2734 if (hash == -1)
2735 return NULL;
2736 }
2737 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
2738 if (ix == DKIX_ERROR)
2739 return NULL;
2740 if (ix == DKIX_EMPTY || value == NULL)
2741 Py_RETURN_FALSE;
2742 Py_RETURN_TRUE;
2743 }
2744
2745 /*[clinic input]
2746 dict.get
2747
2748 key: object
2749 default: object = None
2750 /
2751
2752 Return the value for key if key is in the dictionary, else default.
2753 [clinic start generated code]*/
2754
2755 static PyObject *
dict_get_impl(PyDictObject * self,PyObject * key,PyObject * default_value)2756 dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value)
2757 /*[clinic end generated code: output=bba707729dee05bf input=279ddb5790b6b107]*/
2758 {
2759 PyObject *val = NULL;
2760 Py_hash_t hash;
2761 Py_ssize_t ix;
2762
2763 if (!PyUnicode_CheckExact(key) ||
2764 (hash = ((PyASCIIObject *) key)->hash) == -1) {
2765 hash = PyObject_Hash(key);
2766 if (hash == -1)
2767 return NULL;
2768 }
2769 ix = (self->ma_keys->dk_lookup) (self, key, hash, &val);
2770 if (ix == DKIX_ERROR)
2771 return NULL;
2772 if (ix == DKIX_EMPTY || val == NULL) {
2773 val = default_value;
2774 }
2775 Py_INCREF(val);
2776 return val;
2777 }
2778
2779 PyObject *
PyDict_SetDefault(PyObject * d,PyObject * key,PyObject * defaultobj)2780 PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
2781 {
2782 PyDictObject *mp = (PyDictObject *)d;
2783 PyObject *value;
2784 Py_hash_t hash;
2785
2786 if (!PyDict_Check(d)) {
2787 PyErr_BadInternalCall();
2788 return NULL;
2789 }
2790
2791 if (!PyUnicode_CheckExact(key) ||
2792 (hash = ((PyASCIIObject *) key)->hash) == -1) {
2793 hash = PyObject_Hash(key);
2794 if (hash == -1)
2795 return NULL;
2796 }
2797
2798 if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
2799 if (insertion_resize(mp) < 0)
2800 return NULL;
2801 }
2802
2803 Py_ssize_t ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
2804 if (ix == DKIX_ERROR)
2805 return NULL;
2806
2807 if (_PyDict_HasSplitTable(mp) &&
2808 ((ix >= 0 && value == NULL && mp->ma_used != ix) ||
2809 (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
2810 if (insertion_resize(mp) < 0) {
2811 return NULL;
2812 }
2813 ix = DKIX_EMPTY;
2814 }
2815
2816 if (ix == DKIX_EMPTY) {
2817 PyDictKeyEntry *ep, *ep0;
2818 value = defaultobj;
2819 if (mp->ma_keys->dk_usable <= 0) {
2820 if (insertion_resize(mp) < 0) {
2821 return NULL;
2822 }
2823 }
2824 Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash);
2825 ep0 = DK_ENTRIES(mp->ma_keys);
2826 ep = &ep0[mp->ma_keys->dk_nentries];
2827 dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
2828 Py_INCREF(key);
2829 Py_INCREF(value);
2830 MAINTAIN_TRACKING(mp, key, value);
2831 ep->me_key = key;
2832 ep->me_hash = hash;
2833 if (_PyDict_HasSplitTable(mp)) {
2834 assert(mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
2835 mp->ma_values[mp->ma_keys->dk_nentries] = value;
2836 }
2837 else {
2838 ep->me_value = value;
2839 }
2840 mp->ma_used++;
2841 mp->ma_version_tag = DICT_NEXT_VERSION();
2842 mp->ma_keys->dk_usable--;
2843 mp->ma_keys->dk_nentries++;
2844 assert(mp->ma_keys->dk_usable >= 0);
2845 }
2846 else if (value == NULL) {
2847 value = defaultobj;
2848 assert(_PyDict_HasSplitTable(mp));
2849 assert(ix == mp->ma_used);
2850 Py_INCREF(value);
2851 MAINTAIN_TRACKING(mp, key, value);
2852 mp->ma_values[ix] = value;
2853 mp->ma_used++;
2854 mp->ma_version_tag = DICT_NEXT_VERSION();
2855 }
2856
2857 assert(_PyDict_CheckConsistency(mp));
2858 return value;
2859 }
2860
2861 /*[clinic input]
2862 dict.setdefault
2863
2864 key: object
2865 default: object = None
2866 /
2867
2868 Insert key with a value of default if key is not in the dictionary.
2869
2870 Return the value for key if key is in the dictionary, else default.
2871 [clinic start generated code]*/
2872
2873 static PyObject *
dict_setdefault_impl(PyDictObject * self,PyObject * key,PyObject * default_value)2874 dict_setdefault_impl(PyDictObject *self, PyObject *key,
2875 PyObject *default_value)
2876 /*[clinic end generated code: output=f8c1101ebf69e220 input=0f063756e815fd9d]*/
2877 {
2878 PyObject *val;
2879
2880 val = PyDict_SetDefault((PyObject *)self, key, default_value);
2881 Py_XINCREF(val);
2882 return val;
2883 }
2884
2885 static PyObject *
dict_clear(PyDictObject * mp)2886 dict_clear(PyDictObject *mp)
2887 {
2888 PyDict_Clear((PyObject *)mp);
2889 Py_RETURN_NONE;
2890 }
2891
2892 static PyObject *
dict_pop(PyDictObject * mp,PyObject * args)2893 dict_pop(PyDictObject *mp, PyObject *args)
2894 {
2895 PyObject *key, *deflt = NULL;
2896
2897 if(!PyArg_UnpackTuple(args, "pop", 1, 2, &key, &deflt))
2898 return NULL;
2899
2900 return _PyDict_Pop((PyObject*)mp, key, deflt);
2901 }
2902
2903 static PyObject *
dict_popitem(PyDictObject * mp)2904 dict_popitem(PyDictObject *mp)
2905 {
2906 Py_ssize_t i, j;
2907 PyDictKeyEntry *ep0, *ep;
2908 PyObject *res;
2909
2910 /* Allocate the result tuple before checking the size. Believe it
2911 * or not, this allocation could trigger a garbage collection which
2912 * could empty the dict, so if we checked the size first and that
2913 * happened, the result would be an infinite loop (searching for an
2914 * entry that no longer exists). Note that the usual popitem()
2915 * idiom is "while d: k, v = d.popitem()". so needing to throw the
2916 * tuple away if the dict *is* empty isn't a significant
2917 * inefficiency -- possible, but unlikely in practice.
2918 */
2919 res = PyTuple_New(2);
2920 if (res == NULL)
2921 return NULL;
2922 if (mp->ma_used == 0) {
2923 Py_DECREF(res);
2924 PyErr_SetString(PyExc_KeyError,
2925 "popitem(): dictionary is empty");
2926 return NULL;
2927 }
2928 /* Convert split table to combined table */
2929 if (mp->ma_keys->dk_lookup == lookdict_split) {
2930 if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
2931 Py_DECREF(res);
2932 return NULL;
2933 }
2934 }
2935 ENSURE_ALLOWS_DELETIONS(mp);
2936
2937 /* Pop last item */
2938 ep0 = DK_ENTRIES(mp->ma_keys);
2939 i = mp->ma_keys->dk_nentries - 1;
2940 while (i >= 0 && ep0[i].me_value == NULL) {
2941 i--;
2942 }
2943 assert(i >= 0);
2944
2945 ep = &ep0[i];
2946 j = lookdict_index(mp->ma_keys, ep->me_hash, i);
2947 assert(j >= 0);
2948 assert(dk_get_index(mp->ma_keys, j) == i);
2949 dk_set_index(mp->ma_keys, j, DKIX_DUMMY);
2950
2951 PyTuple_SET_ITEM(res, 0, ep->me_key);
2952 PyTuple_SET_ITEM(res, 1, ep->me_value);
2953 ep->me_key = NULL;
2954 ep->me_value = NULL;
2955 /* We can't dk_usable++ since there is DKIX_DUMMY in indices */
2956 mp->ma_keys->dk_nentries = i;
2957 mp->ma_used--;
2958 mp->ma_version_tag = DICT_NEXT_VERSION();
2959 assert(_PyDict_CheckConsistency(mp));
2960 return res;
2961 }
2962
2963 static int
dict_traverse(PyObject * op,visitproc visit,void * arg)2964 dict_traverse(PyObject *op, visitproc visit, void *arg)
2965 {
2966 PyDictObject *mp = (PyDictObject *)op;
2967 PyDictKeysObject *keys = mp->ma_keys;
2968 PyDictKeyEntry *entries = DK_ENTRIES(keys);
2969 Py_ssize_t i, n = keys->dk_nentries;
2970
2971 if (keys->dk_lookup == lookdict) {
2972 for (i = 0; i < n; i++) {
2973 if (entries[i].me_value != NULL) {
2974 Py_VISIT(entries[i].me_value);
2975 Py_VISIT(entries[i].me_key);
2976 }
2977 }
2978 }
2979 else {
2980 if (mp->ma_values != NULL) {
2981 for (i = 0; i < n; i++) {
2982 Py_VISIT(mp->ma_values[i]);
2983 }
2984 }
2985 else {
2986 for (i = 0; i < n; i++) {
2987 Py_VISIT(entries[i].me_value);
2988 }
2989 }
2990 }
2991 return 0;
2992 }
2993
2994 static int
dict_tp_clear(PyObject * op)2995 dict_tp_clear(PyObject *op)
2996 {
2997 PyDict_Clear(op);
2998 return 0;
2999 }
3000
3001 static PyObject *dictiter_new(PyDictObject *, PyTypeObject *);
3002
3003 Py_ssize_t
_PyDict_SizeOf(PyDictObject * mp)3004 _PyDict_SizeOf(PyDictObject *mp)
3005 {
3006 Py_ssize_t size, usable, res;
3007
3008 size = DK_SIZE(mp->ma_keys);
3009 usable = USABLE_FRACTION(size);
3010
3011 res = _PyObject_SIZE(Py_TYPE(mp));
3012 if (mp->ma_values)
3013 res += usable * sizeof(PyObject*);
3014 /* If the dictionary is split, the keys portion is accounted-for
3015 in the type object. */
3016 if (mp->ma_keys->dk_refcnt == 1)
3017 res += (sizeof(PyDictKeysObject)
3018 + DK_IXSIZE(mp->ma_keys) * size
3019 + sizeof(PyDictKeyEntry) * usable);
3020 return res;
3021 }
3022
3023 Py_ssize_t
_PyDict_KeysSize(PyDictKeysObject * keys)3024 _PyDict_KeysSize(PyDictKeysObject *keys)
3025 {
3026 return (sizeof(PyDictKeysObject)
3027 + DK_IXSIZE(keys) * DK_SIZE(keys)
3028 + USABLE_FRACTION(DK_SIZE(keys)) * sizeof(PyDictKeyEntry));
3029 }
3030
3031 static PyObject *
dict_sizeof(PyDictObject * mp)3032 dict_sizeof(PyDictObject *mp)
3033 {
3034 return PyLong_FromSsize_t(_PyDict_SizeOf(mp));
3035 }
3036
3037 PyDoc_STRVAR(getitem__doc__, "x.__getitem__(y) <==> x[y]");
3038
3039 PyDoc_STRVAR(sizeof__doc__,
3040 "D.__sizeof__() -> size of D in memory, in bytes");
3041
3042 PyDoc_STRVAR(pop__doc__,
3043 "D.pop(k[,d]) -> v, remove specified key and return the corresponding value.\n\
3044 If key is not found, d is returned if given, otherwise KeyError is raised");
3045
3046 PyDoc_STRVAR(popitem__doc__,
3047 "D.popitem() -> (k, v), remove and return some (key, value) pair as a\n\
3048 2-tuple; but raise KeyError if D is empty.");
3049
3050 PyDoc_STRVAR(update__doc__,
3051 "D.update([E, ]**F) -> None. Update D from dict/iterable E and F.\n\
3052 If E is present and has a .keys() method, then does: for k in E: D[k] = E[k]\n\
3053 If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v\n\
3054 In either case, this is followed by: for k in F: D[k] = F[k]");
3055
3056 PyDoc_STRVAR(clear__doc__,
3057 "D.clear() -> None. Remove all items from D.");
3058
3059 PyDoc_STRVAR(copy__doc__,
3060 "D.copy() -> a shallow copy of D");
3061
3062 /* Forward */
3063 static PyObject *dictkeys_new(PyObject *);
3064 static PyObject *dictitems_new(PyObject *);
3065 static PyObject *dictvalues_new(PyObject *);
3066
3067 PyDoc_STRVAR(keys__doc__,
3068 "D.keys() -> a set-like object providing a view on D's keys");
3069 PyDoc_STRVAR(items__doc__,
3070 "D.items() -> a set-like object providing a view on D's items");
3071 PyDoc_STRVAR(values__doc__,
3072 "D.values() -> an object providing a view on D's values");
3073
3074 static PyMethodDef mapp_methods[] = {
3075 DICT___CONTAINS___METHODDEF
3076 {"__getitem__", (PyCFunction)dict_subscript, METH_O | METH_COEXIST,
3077 getitem__doc__},
3078 {"__sizeof__", (PyCFunction)dict_sizeof, METH_NOARGS,
3079 sizeof__doc__},
3080 DICT_GET_METHODDEF
3081 DICT_SETDEFAULT_METHODDEF
3082 {"pop", (PyCFunction)dict_pop, METH_VARARGS,
3083 pop__doc__},
3084 {"popitem", (PyCFunction)dict_popitem, METH_NOARGS,
3085 popitem__doc__},
3086 {"keys", (PyCFunction)dictkeys_new, METH_NOARGS,
3087 keys__doc__},
3088 {"items", (PyCFunction)dictitems_new, METH_NOARGS,
3089 items__doc__},
3090 {"values", (PyCFunction)dictvalues_new, METH_NOARGS,
3091 values__doc__},
3092 {"update", (PyCFunction)dict_update, METH_VARARGS | METH_KEYWORDS,
3093 update__doc__},
3094 DICT_FROMKEYS_METHODDEF
3095 {"clear", (PyCFunction)dict_clear, METH_NOARGS,
3096 clear__doc__},
3097 {"copy", (PyCFunction)dict_copy, METH_NOARGS,
3098 copy__doc__},
3099 {NULL, NULL} /* sentinel */
3100 };
3101
3102 /* Return 1 if `key` is in dict `op`, 0 if not, and -1 on error. */
3103 int
PyDict_Contains(PyObject * op,PyObject * key)3104 PyDict_Contains(PyObject *op, PyObject *key)
3105 {
3106 Py_hash_t hash;
3107 Py_ssize_t ix;
3108 PyDictObject *mp = (PyDictObject *)op;
3109 PyObject *value;
3110
3111 if (!PyUnicode_CheckExact(key) ||
3112 (hash = ((PyASCIIObject *) key)->hash) == -1) {
3113 hash = PyObject_Hash(key);
3114 if (hash == -1)
3115 return -1;
3116 }
3117 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
3118 if (ix == DKIX_ERROR)
3119 return -1;
3120 return (ix != DKIX_EMPTY && value != NULL);
3121 }
3122
3123 /* Internal version of PyDict_Contains used when the hash value is already known */
3124 int
_PyDict_Contains(PyObject * op,PyObject * key,Py_hash_t hash)3125 _PyDict_Contains(PyObject *op, PyObject *key, Py_hash_t hash)
3126 {
3127 PyDictObject *mp = (PyDictObject *)op;
3128 PyObject *value;
3129 Py_ssize_t ix;
3130
3131 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value);
3132 if (ix == DKIX_ERROR)
3133 return -1;
3134 return (ix != DKIX_EMPTY && value != NULL);
3135 }
3136
3137 /* Hack to implement "key in dict" */
3138 static PySequenceMethods dict_as_sequence = {
3139 0, /* sq_length */
3140 0, /* sq_concat */
3141 0, /* sq_repeat */
3142 0, /* sq_item */
3143 0, /* sq_slice */
3144 0, /* sq_ass_item */
3145 0, /* sq_ass_slice */
3146 PyDict_Contains, /* sq_contains */
3147 0, /* sq_inplace_concat */
3148 0, /* sq_inplace_repeat */
3149 };
3150
3151 static PyObject *
dict_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3152 dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3153 {
3154 PyObject *self;
3155 PyDictObject *d;
3156
3157 assert(type != NULL && type->tp_alloc != NULL);
3158 self = type->tp_alloc(type, 0);
3159 if (self == NULL)
3160 return NULL;
3161 d = (PyDictObject *)self;
3162
3163 /* The object has been implicitly tracked by tp_alloc */
3164 if (type == &PyDict_Type)
3165 _PyObject_GC_UNTRACK(d);
3166
3167 d->ma_used = 0;
3168 d->ma_version_tag = DICT_NEXT_VERSION();
3169 d->ma_keys = new_keys_object(PyDict_MINSIZE);
3170 if (d->ma_keys == NULL) {
3171 Py_DECREF(self);
3172 return NULL;
3173 }
3174 assert(_PyDict_CheckConsistency(d));
3175 return self;
3176 }
3177
3178 static int
dict_init(PyObject * self,PyObject * args,PyObject * kwds)3179 dict_init(PyObject *self, PyObject *args, PyObject *kwds)
3180 {
3181 return dict_update_common(self, args, kwds, "dict");
3182 }
3183
3184 static PyObject *
dict_iter(PyDictObject * dict)3185 dict_iter(PyDictObject *dict)
3186 {
3187 return dictiter_new(dict, &PyDictIterKey_Type);
3188 }
3189
3190 PyDoc_STRVAR(dictionary_doc,
3191 "dict() -> new empty dictionary\n"
3192 "dict(mapping) -> new dictionary initialized from a mapping object's\n"
3193 " (key, value) pairs\n"
3194 "dict(iterable) -> new dictionary initialized as if via:\n"
3195 " d = {}\n"
3196 " for k, v in iterable:\n"
3197 " d[k] = v\n"
3198 "dict(**kwargs) -> new dictionary initialized with the name=value pairs\n"
3199 " in the keyword argument list. For example: dict(one=1, two=2)");
3200
3201 PyTypeObject PyDict_Type = {
3202 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3203 "dict",
3204 sizeof(PyDictObject),
3205 0,
3206 (destructor)dict_dealloc, /* tp_dealloc */
3207 0, /* tp_print */
3208 0, /* tp_getattr */
3209 0, /* tp_setattr */
3210 0, /* tp_reserved */
3211 (reprfunc)dict_repr, /* tp_repr */
3212 0, /* tp_as_number */
3213 &dict_as_sequence, /* tp_as_sequence */
3214 &dict_as_mapping, /* tp_as_mapping */
3215 PyObject_HashNotImplemented, /* tp_hash */
3216 0, /* tp_call */
3217 0, /* tp_str */
3218 PyObject_GenericGetAttr, /* tp_getattro */
3219 0, /* tp_setattro */
3220 0, /* tp_as_buffer */
3221 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
3222 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_DICT_SUBCLASS, /* tp_flags */
3223 dictionary_doc, /* tp_doc */
3224 dict_traverse, /* tp_traverse */
3225 dict_tp_clear, /* tp_clear */
3226 dict_richcompare, /* tp_richcompare */
3227 0, /* tp_weaklistoffset */
3228 (getiterfunc)dict_iter, /* tp_iter */
3229 0, /* tp_iternext */
3230 mapp_methods, /* tp_methods */
3231 0, /* tp_members */
3232 0, /* tp_getset */
3233 0, /* tp_base */
3234 0, /* tp_dict */
3235 0, /* tp_descr_get */
3236 0, /* tp_descr_set */
3237 0, /* tp_dictoffset */
3238 dict_init, /* tp_init */
3239 PyType_GenericAlloc, /* tp_alloc */
3240 dict_new, /* tp_new */
3241 PyObject_GC_Del, /* tp_free */
3242 };
3243
3244 PyObject *
_PyDict_GetItemId(PyObject * dp,struct _Py_Identifier * key)3245 _PyDict_GetItemId(PyObject *dp, struct _Py_Identifier *key)
3246 {
3247 PyObject *kv;
3248 kv = _PyUnicode_FromId(key); /* borrowed */
3249 if (kv == NULL) {
3250 PyErr_Clear();
3251 return NULL;
3252 }
3253 return PyDict_GetItem(dp, kv);
3254 }
3255
3256 /* For backward compatibility with old dictionary interface */
3257
3258 PyObject *
PyDict_GetItemString(PyObject * v,const char * key)3259 PyDict_GetItemString(PyObject *v, const char *key)
3260 {
3261 PyObject *kv, *rv;
3262 kv = PyUnicode_FromString(key);
3263 if (kv == NULL) {
3264 PyErr_Clear();
3265 return NULL;
3266 }
3267 rv = PyDict_GetItem(v, kv);
3268 Py_DECREF(kv);
3269 return rv;
3270 }
3271
3272 int
_PyDict_SetItemId(PyObject * v,struct _Py_Identifier * key,PyObject * item)3273 _PyDict_SetItemId(PyObject *v, struct _Py_Identifier *key, PyObject *item)
3274 {
3275 PyObject *kv;
3276 kv = _PyUnicode_FromId(key); /* borrowed */
3277 if (kv == NULL)
3278 return -1;
3279 return PyDict_SetItem(v, kv, item);
3280 }
3281
3282 int
PyDict_SetItemString(PyObject * v,const char * key,PyObject * item)3283 PyDict_SetItemString(PyObject *v, const char *key, PyObject *item)
3284 {
3285 PyObject *kv;
3286 int err;
3287 kv = PyUnicode_FromString(key);
3288 if (kv == NULL)
3289 return -1;
3290 PyUnicode_InternInPlace(&kv); /* XXX Should we really? */
3291 err = PyDict_SetItem(v, kv, item);
3292 Py_DECREF(kv);
3293 return err;
3294 }
3295
3296 int
_PyDict_DelItemId(PyObject * v,_Py_Identifier * key)3297 _PyDict_DelItemId(PyObject *v, _Py_Identifier *key)
3298 {
3299 PyObject *kv = _PyUnicode_FromId(key); /* borrowed */
3300 if (kv == NULL)
3301 return -1;
3302 return PyDict_DelItem(v, kv);
3303 }
3304
3305 int
PyDict_DelItemString(PyObject * v,const char * key)3306 PyDict_DelItemString(PyObject *v, const char *key)
3307 {
3308 PyObject *kv;
3309 int err;
3310 kv = PyUnicode_FromString(key);
3311 if (kv == NULL)
3312 return -1;
3313 err = PyDict_DelItem(v, kv);
3314 Py_DECREF(kv);
3315 return err;
3316 }
3317
3318 /* Dictionary iterator types */
3319
3320 typedef struct {
3321 PyObject_HEAD
3322 PyDictObject *di_dict; /* Set to NULL when iterator is exhausted */
3323 Py_ssize_t di_used;
3324 Py_ssize_t di_pos;
3325 PyObject* di_result; /* reusable result tuple for iteritems */
3326 Py_ssize_t len;
3327 } dictiterobject;
3328
3329 static PyObject *
dictiter_new(PyDictObject * dict,PyTypeObject * itertype)3330 dictiter_new(PyDictObject *dict, PyTypeObject *itertype)
3331 {
3332 dictiterobject *di;
3333 di = PyObject_GC_New(dictiterobject, itertype);
3334 if (di == NULL)
3335 return NULL;
3336 Py_INCREF(dict);
3337 di->di_dict = dict;
3338 di->di_used = dict->ma_used;
3339 di->di_pos = 0;
3340 di->len = dict->ma_used;
3341 if (itertype == &PyDictIterItem_Type) {
3342 di->di_result = PyTuple_Pack(2, Py_None, Py_None);
3343 if (di->di_result == NULL) {
3344 Py_DECREF(di);
3345 return NULL;
3346 }
3347 }
3348 else
3349 di->di_result = NULL;
3350 _PyObject_GC_TRACK(di);
3351 return (PyObject *)di;
3352 }
3353
3354 static void
dictiter_dealloc(dictiterobject * di)3355 dictiter_dealloc(dictiterobject *di)
3356 {
3357 /* bpo-31095: UnTrack is needed before calling any callbacks */
3358 _PyObject_GC_UNTRACK(di);
3359 Py_XDECREF(di->di_dict);
3360 Py_XDECREF(di->di_result);
3361 PyObject_GC_Del(di);
3362 }
3363
3364 static int
dictiter_traverse(dictiterobject * di,visitproc visit,void * arg)3365 dictiter_traverse(dictiterobject *di, visitproc visit, void *arg)
3366 {
3367 Py_VISIT(di->di_dict);
3368 Py_VISIT(di->di_result);
3369 return 0;
3370 }
3371
3372 static PyObject *
dictiter_len(dictiterobject * di)3373 dictiter_len(dictiterobject *di)
3374 {
3375 Py_ssize_t len = 0;
3376 if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used)
3377 len = di->len;
3378 return PyLong_FromSize_t(len);
3379 }
3380
3381 PyDoc_STRVAR(length_hint_doc,
3382 "Private method returning an estimate of len(list(it)).");
3383
3384 static PyObject *
3385 dictiter_reduce(dictiterobject *di);
3386
3387 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3388
3389 static PyMethodDef dictiter_methods[] = {
3390 {"__length_hint__", (PyCFunction)dictiter_len, METH_NOARGS,
3391 length_hint_doc},
3392 {"__reduce__", (PyCFunction)dictiter_reduce, METH_NOARGS,
3393 reduce_doc},
3394 {NULL, NULL} /* sentinel */
3395 };
3396
3397 static PyObject*
dictiter_iternextkey(dictiterobject * di)3398 dictiter_iternextkey(dictiterobject *di)
3399 {
3400 PyObject *key;
3401 Py_ssize_t i;
3402 PyDictKeysObject *k;
3403 PyDictObject *d = di->di_dict;
3404
3405 if (d == NULL)
3406 return NULL;
3407 assert (PyDict_Check(d));
3408
3409 if (di->di_used != d->ma_used) {
3410 PyErr_SetString(PyExc_RuntimeError,
3411 "dictionary changed size during iteration");
3412 di->di_used = -1; /* Make this state sticky */
3413 return NULL;
3414 }
3415
3416 i = di->di_pos;
3417 k = d->ma_keys;
3418 assert(i >= 0);
3419 if (d->ma_values) {
3420 if (i >= d->ma_used)
3421 goto fail;
3422 key = DK_ENTRIES(k)[i].me_key;
3423 assert(d->ma_values[i] != NULL);
3424 }
3425 else {
3426 Py_ssize_t n = k->dk_nentries;
3427 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(k)[i];
3428 while (i < n && entry_ptr->me_value == NULL) {
3429 entry_ptr++;
3430 i++;
3431 }
3432 if (i >= n)
3433 goto fail;
3434 key = entry_ptr->me_key;
3435 }
3436 di->di_pos = i+1;
3437 di->len--;
3438 Py_INCREF(key);
3439 return key;
3440
3441 fail:
3442 di->di_dict = NULL;
3443 Py_DECREF(d);
3444 return NULL;
3445 }
3446
3447 PyTypeObject PyDictIterKey_Type = {
3448 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3449 "dict_keyiterator", /* tp_name */
3450 sizeof(dictiterobject), /* tp_basicsize */
3451 0, /* tp_itemsize */
3452 /* methods */
3453 (destructor)dictiter_dealloc, /* tp_dealloc */
3454 0, /* tp_print */
3455 0, /* tp_getattr */
3456 0, /* tp_setattr */
3457 0, /* tp_reserved */
3458 0, /* tp_repr */
3459 0, /* tp_as_number */
3460 0, /* tp_as_sequence */
3461 0, /* tp_as_mapping */
3462 0, /* tp_hash */
3463 0, /* tp_call */
3464 0, /* tp_str */
3465 PyObject_GenericGetAttr, /* tp_getattro */
3466 0, /* tp_setattro */
3467 0, /* tp_as_buffer */
3468 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3469 0, /* tp_doc */
3470 (traverseproc)dictiter_traverse, /* tp_traverse */
3471 0, /* tp_clear */
3472 0, /* tp_richcompare */
3473 0, /* tp_weaklistoffset */
3474 PyObject_SelfIter, /* tp_iter */
3475 (iternextfunc)dictiter_iternextkey, /* tp_iternext */
3476 dictiter_methods, /* tp_methods */
3477 0,
3478 };
3479
3480 static PyObject *
dictiter_iternextvalue(dictiterobject * di)3481 dictiter_iternextvalue(dictiterobject *di)
3482 {
3483 PyObject *value;
3484 Py_ssize_t i;
3485 PyDictObject *d = di->di_dict;
3486
3487 if (d == NULL)
3488 return NULL;
3489 assert (PyDict_Check(d));
3490
3491 if (di->di_used != d->ma_used) {
3492 PyErr_SetString(PyExc_RuntimeError,
3493 "dictionary changed size during iteration");
3494 di->di_used = -1; /* Make this state sticky */
3495 return NULL;
3496 }
3497
3498 i = di->di_pos;
3499 assert(i >= 0);
3500 if (d->ma_values) {
3501 if (i >= d->ma_used)
3502 goto fail;
3503 value = d->ma_values[i];
3504 assert(value != NULL);
3505 }
3506 else {
3507 Py_ssize_t n = d->ma_keys->dk_nentries;
3508 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
3509 while (i < n && entry_ptr->me_value == NULL) {
3510 entry_ptr++;
3511 i++;
3512 }
3513 if (i >= n)
3514 goto fail;
3515 value = entry_ptr->me_value;
3516 }
3517 di->di_pos = i+1;
3518 di->len--;
3519 Py_INCREF(value);
3520 return value;
3521
3522 fail:
3523 di->di_dict = NULL;
3524 Py_DECREF(d);
3525 return NULL;
3526 }
3527
3528 PyTypeObject PyDictIterValue_Type = {
3529 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3530 "dict_valueiterator", /* tp_name */
3531 sizeof(dictiterobject), /* tp_basicsize */
3532 0, /* tp_itemsize */
3533 /* methods */
3534 (destructor)dictiter_dealloc, /* tp_dealloc */
3535 0, /* tp_print */
3536 0, /* tp_getattr */
3537 0, /* tp_setattr */
3538 0, /* tp_reserved */
3539 0, /* tp_repr */
3540 0, /* tp_as_number */
3541 0, /* tp_as_sequence */
3542 0, /* tp_as_mapping */
3543 0, /* tp_hash */
3544 0, /* tp_call */
3545 0, /* tp_str */
3546 PyObject_GenericGetAttr, /* tp_getattro */
3547 0, /* tp_setattro */
3548 0, /* tp_as_buffer */
3549 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3550 0, /* tp_doc */
3551 (traverseproc)dictiter_traverse, /* tp_traverse */
3552 0, /* tp_clear */
3553 0, /* tp_richcompare */
3554 0, /* tp_weaklistoffset */
3555 PyObject_SelfIter, /* tp_iter */
3556 (iternextfunc)dictiter_iternextvalue, /* tp_iternext */
3557 dictiter_methods, /* tp_methods */
3558 0,
3559 };
3560
3561 static PyObject *
dictiter_iternextitem(dictiterobject * di)3562 dictiter_iternextitem(dictiterobject *di)
3563 {
3564 PyObject *key, *value, *result;
3565 Py_ssize_t i;
3566 PyDictObject *d = di->di_dict;
3567
3568 if (d == NULL)
3569 return NULL;
3570 assert (PyDict_Check(d));
3571
3572 if (di->di_used != d->ma_used) {
3573 PyErr_SetString(PyExc_RuntimeError,
3574 "dictionary changed size during iteration");
3575 di->di_used = -1; /* Make this state sticky */
3576 return NULL;
3577 }
3578
3579 i = di->di_pos;
3580 assert(i >= 0);
3581 if (d->ma_values) {
3582 if (i >= d->ma_used)
3583 goto fail;
3584 key = DK_ENTRIES(d->ma_keys)[i].me_key;
3585 value = d->ma_values[i];
3586 assert(value != NULL);
3587 }
3588 else {
3589 Py_ssize_t n = d->ma_keys->dk_nentries;
3590 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
3591 while (i < n && entry_ptr->me_value == NULL) {
3592 entry_ptr++;
3593 i++;
3594 }
3595 if (i >= n)
3596 goto fail;
3597 key = entry_ptr->me_key;
3598 value = entry_ptr->me_value;
3599 }
3600 di->di_pos = i+1;
3601 di->len--;
3602 Py_INCREF(key);
3603 Py_INCREF(value);
3604 result = di->di_result;
3605 if (Py_REFCNT(result) == 1) {
3606 PyObject *oldkey = PyTuple_GET_ITEM(result, 0);
3607 PyObject *oldvalue = PyTuple_GET_ITEM(result, 1);
3608 PyTuple_SET_ITEM(result, 0, key); /* steals reference */
3609 PyTuple_SET_ITEM(result, 1, value); /* steals reference */
3610 Py_INCREF(result);
3611 Py_DECREF(oldkey);
3612 Py_DECREF(oldvalue);
3613 }
3614 else {
3615 result = PyTuple_New(2);
3616 if (result == NULL)
3617 return NULL;
3618 PyTuple_SET_ITEM(result, 0, key); /* steals reference */
3619 PyTuple_SET_ITEM(result, 1, value); /* steals reference */
3620 }
3621 return result;
3622
3623 fail:
3624 di->di_dict = NULL;
3625 Py_DECREF(d);
3626 return NULL;
3627 }
3628
3629 PyTypeObject PyDictIterItem_Type = {
3630 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3631 "dict_itemiterator", /* tp_name */
3632 sizeof(dictiterobject), /* tp_basicsize */
3633 0, /* tp_itemsize */
3634 /* methods */
3635 (destructor)dictiter_dealloc, /* tp_dealloc */
3636 0, /* tp_print */
3637 0, /* tp_getattr */
3638 0, /* tp_setattr */
3639 0, /* tp_reserved */
3640 0, /* tp_repr */
3641 0, /* tp_as_number */
3642 0, /* tp_as_sequence */
3643 0, /* tp_as_mapping */
3644 0, /* tp_hash */
3645 0, /* tp_call */
3646 0, /* tp_str */
3647 PyObject_GenericGetAttr, /* tp_getattro */
3648 0, /* tp_setattro */
3649 0, /* tp_as_buffer */
3650 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3651 0, /* tp_doc */
3652 (traverseproc)dictiter_traverse, /* tp_traverse */
3653 0, /* tp_clear */
3654 0, /* tp_richcompare */
3655 0, /* tp_weaklistoffset */
3656 PyObject_SelfIter, /* tp_iter */
3657 (iternextfunc)dictiter_iternextitem, /* tp_iternext */
3658 dictiter_methods, /* tp_methods */
3659 0,
3660 };
3661
3662
3663 static PyObject *
dictiter_reduce(dictiterobject * di)3664 dictiter_reduce(dictiterobject *di)
3665 {
3666 PyObject *list;
3667 dictiterobject tmp;
3668
3669 list = PyList_New(0);
3670 if (!list)
3671 return NULL;
3672
3673 /* copy the itertor state */
3674 tmp = *di;
3675 Py_XINCREF(tmp.di_dict);
3676
3677 /* iterate the temporary into a list */
3678 for(;;) {
3679 PyObject *element = 0;
3680 if (Py_TYPE(di) == &PyDictIterItem_Type)
3681 element = dictiter_iternextitem(&tmp);
3682 else if (Py_TYPE(di) == &PyDictIterKey_Type)
3683 element = dictiter_iternextkey(&tmp);
3684 else if (Py_TYPE(di) == &PyDictIterValue_Type)
3685 element = dictiter_iternextvalue(&tmp);
3686 else
3687 Py_UNREACHABLE();
3688 if (element) {
3689 if (PyList_Append(list, element)) {
3690 Py_DECREF(element);
3691 Py_DECREF(list);
3692 Py_XDECREF(tmp.di_dict);
3693 return NULL;
3694 }
3695 Py_DECREF(element);
3696 } else
3697 break;
3698 }
3699 Py_XDECREF(tmp.di_dict);
3700 /* check for error */
3701 if (tmp.di_dict != NULL) {
3702 /* we have an error */
3703 Py_DECREF(list);
3704 return NULL;
3705 }
3706 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), list);
3707 }
3708
3709 /***********************************************/
3710 /* View objects for keys(), items(), values(). */
3711 /***********************************************/
3712
3713 /* The instance lay-out is the same for all three; but the type differs. */
3714
3715 static void
dictview_dealloc(_PyDictViewObject * dv)3716 dictview_dealloc(_PyDictViewObject *dv)
3717 {
3718 /* bpo-31095: UnTrack is needed before calling any callbacks */
3719 _PyObject_GC_UNTRACK(dv);
3720 Py_XDECREF(dv->dv_dict);
3721 PyObject_GC_Del(dv);
3722 }
3723
3724 static int
dictview_traverse(_PyDictViewObject * dv,visitproc visit,void * arg)3725 dictview_traverse(_PyDictViewObject *dv, visitproc visit, void *arg)
3726 {
3727 Py_VISIT(dv->dv_dict);
3728 return 0;
3729 }
3730
3731 static Py_ssize_t
dictview_len(_PyDictViewObject * dv)3732 dictview_len(_PyDictViewObject *dv)
3733 {
3734 Py_ssize_t len = 0;
3735 if (dv->dv_dict != NULL)
3736 len = dv->dv_dict->ma_used;
3737 return len;
3738 }
3739
3740 PyObject *
_PyDictView_New(PyObject * dict,PyTypeObject * type)3741 _PyDictView_New(PyObject *dict, PyTypeObject *type)
3742 {
3743 _PyDictViewObject *dv;
3744 if (dict == NULL) {
3745 PyErr_BadInternalCall();
3746 return NULL;
3747 }
3748 if (!PyDict_Check(dict)) {
3749 /* XXX Get rid of this restriction later */
3750 PyErr_Format(PyExc_TypeError,
3751 "%s() requires a dict argument, not '%s'",
3752 type->tp_name, dict->ob_type->tp_name);
3753 return NULL;
3754 }
3755 dv = PyObject_GC_New(_PyDictViewObject, type);
3756 if (dv == NULL)
3757 return NULL;
3758 Py_INCREF(dict);
3759 dv->dv_dict = (PyDictObject *)dict;
3760 _PyObject_GC_TRACK(dv);
3761 return (PyObject *)dv;
3762 }
3763
3764 /* TODO(guido): The views objects are not complete:
3765
3766 * support more set operations
3767 * support arbitrary mappings?
3768 - either these should be static or exported in dictobject.h
3769 - if public then they should probably be in builtins
3770 */
3771
3772 /* Return 1 if self is a subset of other, iterating over self;
3773 0 if not; -1 if an error occurred. */
3774 static int
all_contained_in(PyObject * self,PyObject * other)3775 all_contained_in(PyObject *self, PyObject *other)
3776 {
3777 PyObject *iter = PyObject_GetIter(self);
3778 int ok = 1;
3779
3780 if (iter == NULL)
3781 return -1;
3782 for (;;) {
3783 PyObject *next = PyIter_Next(iter);
3784 if (next == NULL) {
3785 if (PyErr_Occurred())
3786 ok = -1;
3787 break;
3788 }
3789 ok = PySequence_Contains(other, next);
3790 Py_DECREF(next);
3791 if (ok <= 0)
3792 break;
3793 }
3794 Py_DECREF(iter);
3795 return ok;
3796 }
3797
3798 static PyObject *
dictview_richcompare(PyObject * self,PyObject * other,int op)3799 dictview_richcompare(PyObject *self, PyObject *other, int op)
3800 {
3801 Py_ssize_t len_self, len_other;
3802 int ok;
3803 PyObject *result;
3804
3805 assert(self != NULL);
3806 assert(PyDictViewSet_Check(self));
3807 assert(other != NULL);
3808
3809 if (!PyAnySet_Check(other) && !PyDictViewSet_Check(other))
3810 Py_RETURN_NOTIMPLEMENTED;
3811
3812 len_self = PyObject_Size(self);
3813 if (len_self < 0)
3814 return NULL;
3815 len_other = PyObject_Size(other);
3816 if (len_other < 0)
3817 return NULL;
3818
3819 ok = 0;
3820 switch(op) {
3821
3822 case Py_NE:
3823 case Py_EQ:
3824 if (len_self == len_other)
3825 ok = all_contained_in(self, other);
3826 if (op == Py_NE && ok >= 0)
3827 ok = !ok;
3828 break;
3829
3830 case Py_LT:
3831 if (len_self < len_other)
3832 ok = all_contained_in(self, other);
3833 break;
3834
3835 case Py_LE:
3836 if (len_self <= len_other)
3837 ok = all_contained_in(self, other);
3838 break;
3839
3840 case Py_GT:
3841 if (len_self > len_other)
3842 ok = all_contained_in(other, self);
3843 break;
3844
3845 case Py_GE:
3846 if (len_self >= len_other)
3847 ok = all_contained_in(other, self);
3848 break;
3849
3850 }
3851 if (ok < 0)
3852 return NULL;
3853 result = ok ? Py_True : Py_False;
3854 Py_INCREF(result);
3855 return result;
3856 }
3857
3858 static PyObject *
dictview_repr(_PyDictViewObject * dv)3859 dictview_repr(_PyDictViewObject *dv)
3860 {
3861 PyObject *seq;
3862 PyObject *result = NULL;
3863 Py_ssize_t rc;
3864
3865 rc = Py_ReprEnter((PyObject *)dv);
3866 if (rc != 0) {
3867 return rc > 0 ? PyUnicode_FromString("...") : NULL;
3868 }
3869 seq = PySequence_List((PyObject *)dv);
3870 if (seq == NULL) {
3871 goto Done;
3872 }
3873 result = PyUnicode_FromFormat("%s(%R)", Py_TYPE(dv)->tp_name, seq);
3874 Py_DECREF(seq);
3875
3876 Done:
3877 Py_ReprLeave((PyObject *)dv);
3878 return result;
3879 }
3880
3881 /*** dict_keys ***/
3882
3883 static PyObject *
dictkeys_iter(_PyDictViewObject * dv)3884 dictkeys_iter(_PyDictViewObject *dv)
3885 {
3886 if (dv->dv_dict == NULL) {
3887 Py_RETURN_NONE;
3888 }
3889 return dictiter_new(dv->dv_dict, &PyDictIterKey_Type);
3890 }
3891
3892 static int
dictkeys_contains(_PyDictViewObject * dv,PyObject * obj)3893 dictkeys_contains(_PyDictViewObject *dv, PyObject *obj)
3894 {
3895 if (dv->dv_dict == NULL)
3896 return 0;
3897 return PyDict_Contains((PyObject *)dv->dv_dict, obj);
3898 }
3899
3900 static PySequenceMethods dictkeys_as_sequence = {
3901 (lenfunc)dictview_len, /* sq_length */
3902 0, /* sq_concat */
3903 0, /* sq_repeat */
3904 0, /* sq_item */
3905 0, /* sq_slice */
3906 0, /* sq_ass_item */
3907 0, /* sq_ass_slice */
3908 (objobjproc)dictkeys_contains, /* sq_contains */
3909 };
3910
3911 static PyObject*
dictviews_sub(PyObject * self,PyObject * other)3912 dictviews_sub(PyObject* self, PyObject *other)
3913 {
3914 PyObject *result = PySet_New(self);
3915 PyObject *tmp;
3916 _Py_IDENTIFIER(difference_update);
3917
3918 if (result == NULL)
3919 return NULL;
3920
3921 tmp = _PyObject_CallMethodIdObjArgs(result, &PyId_difference_update, other, NULL);
3922 if (tmp == NULL) {
3923 Py_DECREF(result);
3924 return NULL;
3925 }
3926
3927 Py_DECREF(tmp);
3928 return result;
3929 }
3930
3931 PyObject*
_PyDictView_Intersect(PyObject * self,PyObject * other)3932 _PyDictView_Intersect(PyObject* self, PyObject *other)
3933 {
3934 PyObject *result = PySet_New(self);
3935 PyObject *tmp;
3936 _Py_IDENTIFIER(intersection_update);
3937
3938 if (result == NULL)
3939 return NULL;
3940
3941 tmp = _PyObject_CallMethodIdObjArgs(result, &PyId_intersection_update, other, NULL);
3942 if (tmp == NULL) {
3943 Py_DECREF(result);
3944 return NULL;
3945 }
3946
3947 Py_DECREF(tmp);
3948 return result;
3949 }
3950
3951 static PyObject*
dictviews_or(PyObject * self,PyObject * other)3952 dictviews_or(PyObject* self, PyObject *other)
3953 {
3954 PyObject *result = PySet_New(self);
3955 PyObject *tmp;
3956 _Py_IDENTIFIER(update);
3957
3958 if (result == NULL)
3959 return NULL;
3960
3961 tmp = _PyObject_CallMethodIdObjArgs(result, &PyId_update, other, NULL);
3962 if (tmp == NULL) {
3963 Py_DECREF(result);
3964 return NULL;
3965 }
3966
3967 Py_DECREF(tmp);
3968 return result;
3969 }
3970
3971 static PyObject*
dictviews_xor(PyObject * self,PyObject * other)3972 dictviews_xor(PyObject* self, PyObject *other)
3973 {
3974 PyObject *result = PySet_New(self);
3975 PyObject *tmp;
3976 _Py_IDENTIFIER(symmetric_difference_update);
3977
3978 if (result == NULL)
3979 return NULL;
3980
3981 tmp = _PyObject_CallMethodIdObjArgs(result, &PyId_symmetric_difference_update, other, NULL);
3982 if (tmp == NULL) {
3983 Py_DECREF(result);
3984 return NULL;
3985 }
3986
3987 Py_DECREF(tmp);
3988 return result;
3989 }
3990
3991 static PyNumberMethods dictviews_as_number = {
3992 0, /*nb_add*/
3993 (binaryfunc)dictviews_sub, /*nb_subtract*/
3994 0, /*nb_multiply*/
3995 0, /*nb_remainder*/
3996 0, /*nb_divmod*/
3997 0, /*nb_power*/
3998 0, /*nb_negative*/
3999 0, /*nb_positive*/
4000 0, /*nb_absolute*/
4001 0, /*nb_bool*/
4002 0, /*nb_invert*/
4003 0, /*nb_lshift*/
4004 0, /*nb_rshift*/
4005 (binaryfunc)_PyDictView_Intersect, /*nb_and*/
4006 (binaryfunc)dictviews_xor, /*nb_xor*/
4007 (binaryfunc)dictviews_or, /*nb_or*/
4008 };
4009
4010 static PyObject*
dictviews_isdisjoint(PyObject * self,PyObject * other)4011 dictviews_isdisjoint(PyObject *self, PyObject *other)
4012 {
4013 PyObject *it;
4014 PyObject *item = NULL;
4015
4016 if (self == other) {
4017 if (dictview_len((_PyDictViewObject *)self) == 0)
4018 Py_RETURN_TRUE;
4019 else
4020 Py_RETURN_FALSE;
4021 }
4022
4023 /* Iterate over the shorter object (only if other is a set,
4024 * because PySequence_Contains may be expensive otherwise): */
4025 if (PyAnySet_Check(other) || PyDictViewSet_Check(other)) {
4026 Py_ssize_t len_self = dictview_len((_PyDictViewObject *)self);
4027 Py_ssize_t len_other = PyObject_Size(other);
4028 if (len_other == -1)
4029 return NULL;
4030
4031 if ((len_other > len_self)) {
4032 PyObject *tmp = other;
4033 other = self;
4034 self = tmp;
4035 }
4036 }
4037
4038 it = PyObject_GetIter(other);
4039 if (it == NULL)
4040 return NULL;
4041
4042 while ((item = PyIter_Next(it)) != NULL) {
4043 int contains = PySequence_Contains(self, item);
4044 Py_DECREF(item);
4045 if (contains == -1) {
4046 Py_DECREF(it);
4047 return NULL;
4048 }
4049
4050 if (contains) {
4051 Py_DECREF(it);
4052 Py_RETURN_FALSE;
4053 }
4054 }
4055 Py_DECREF(it);
4056 if (PyErr_Occurred())
4057 return NULL; /* PyIter_Next raised an exception. */
4058 Py_RETURN_TRUE;
4059 }
4060
4061 PyDoc_STRVAR(isdisjoint_doc,
4062 "Return True if the view and the given iterable have a null intersection.");
4063
4064 static PyMethodDef dictkeys_methods[] = {
4065 {"isdisjoint", (PyCFunction)dictviews_isdisjoint, METH_O,
4066 isdisjoint_doc},
4067 {NULL, NULL} /* sentinel */
4068 };
4069
4070 PyTypeObject PyDictKeys_Type = {
4071 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4072 "dict_keys", /* tp_name */
4073 sizeof(_PyDictViewObject), /* tp_basicsize */
4074 0, /* tp_itemsize */
4075 /* methods */
4076 (destructor)dictview_dealloc, /* tp_dealloc */
4077 0, /* tp_print */
4078 0, /* tp_getattr */
4079 0, /* tp_setattr */
4080 0, /* tp_reserved */
4081 (reprfunc)dictview_repr, /* tp_repr */
4082 &dictviews_as_number, /* tp_as_number */
4083 &dictkeys_as_sequence, /* tp_as_sequence */
4084 0, /* tp_as_mapping */
4085 0, /* tp_hash */
4086 0, /* tp_call */
4087 0, /* tp_str */
4088 PyObject_GenericGetAttr, /* tp_getattro */
4089 0, /* tp_setattro */
4090 0, /* tp_as_buffer */
4091 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4092 0, /* tp_doc */
4093 (traverseproc)dictview_traverse, /* tp_traverse */
4094 0, /* tp_clear */
4095 dictview_richcompare, /* tp_richcompare */
4096 0, /* tp_weaklistoffset */
4097 (getiterfunc)dictkeys_iter, /* tp_iter */
4098 0, /* tp_iternext */
4099 dictkeys_methods, /* tp_methods */
4100 0,
4101 };
4102
4103 static PyObject *
dictkeys_new(PyObject * dict)4104 dictkeys_new(PyObject *dict)
4105 {
4106 return _PyDictView_New(dict, &PyDictKeys_Type);
4107 }
4108
4109 /*** dict_items ***/
4110
4111 static PyObject *
dictitems_iter(_PyDictViewObject * dv)4112 dictitems_iter(_PyDictViewObject *dv)
4113 {
4114 if (dv->dv_dict == NULL) {
4115 Py_RETURN_NONE;
4116 }
4117 return dictiter_new(dv->dv_dict, &PyDictIterItem_Type);
4118 }
4119
4120 static int
dictitems_contains(_PyDictViewObject * dv,PyObject * obj)4121 dictitems_contains(_PyDictViewObject *dv, PyObject *obj)
4122 {
4123 int result;
4124 PyObject *key, *value, *found;
4125 if (dv->dv_dict == NULL)
4126 return 0;
4127 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 2)
4128 return 0;
4129 key = PyTuple_GET_ITEM(obj, 0);
4130 value = PyTuple_GET_ITEM(obj, 1);
4131 found = PyDict_GetItemWithError((PyObject *)dv->dv_dict, key);
4132 if (found == NULL) {
4133 if (PyErr_Occurred())
4134 return -1;
4135 return 0;
4136 }
4137 Py_INCREF(found);
4138 result = PyObject_RichCompareBool(value, found, Py_EQ);
4139 Py_DECREF(found);
4140 return result;
4141 }
4142
4143 static PySequenceMethods dictitems_as_sequence = {
4144 (lenfunc)dictview_len, /* sq_length */
4145 0, /* sq_concat */
4146 0, /* sq_repeat */
4147 0, /* sq_item */
4148 0, /* sq_slice */
4149 0, /* sq_ass_item */
4150 0, /* sq_ass_slice */
4151 (objobjproc)dictitems_contains, /* sq_contains */
4152 };
4153
4154 static PyMethodDef dictitems_methods[] = {
4155 {"isdisjoint", (PyCFunction)dictviews_isdisjoint, METH_O,
4156 isdisjoint_doc},
4157 {NULL, NULL} /* sentinel */
4158 };
4159
4160 PyTypeObject PyDictItems_Type = {
4161 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4162 "dict_items", /* tp_name */
4163 sizeof(_PyDictViewObject), /* tp_basicsize */
4164 0, /* tp_itemsize */
4165 /* methods */
4166 (destructor)dictview_dealloc, /* tp_dealloc */
4167 0, /* tp_print */
4168 0, /* tp_getattr */
4169 0, /* tp_setattr */
4170 0, /* tp_reserved */
4171 (reprfunc)dictview_repr, /* tp_repr */
4172 &dictviews_as_number, /* tp_as_number */
4173 &dictitems_as_sequence, /* tp_as_sequence */
4174 0, /* tp_as_mapping */
4175 0, /* tp_hash */
4176 0, /* tp_call */
4177 0, /* tp_str */
4178 PyObject_GenericGetAttr, /* tp_getattro */
4179 0, /* tp_setattro */
4180 0, /* tp_as_buffer */
4181 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4182 0, /* tp_doc */
4183 (traverseproc)dictview_traverse, /* tp_traverse */
4184 0, /* tp_clear */
4185 dictview_richcompare, /* tp_richcompare */
4186 0, /* tp_weaklistoffset */
4187 (getiterfunc)dictitems_iter, /* tp_iter */
4188 0, /* tp_iternext */
4189 dictitems_methods, /* tp_methods */
4190 0,
4191 };
4192
4193 static PyObject *
dictitems_new(PyObject * dict)4194 dictitems_new(PyObject *dict)
4195 {
4196 return _PyDictView_New(dict, &PyDictItems_Type);
4197 }
4198
4199 /*** dict_values ***/
4200
4201 static PyObject *
dictvalues_iter(_PyDictViewObject * dv)4202 dictvalues_iter(_PyDictViewObject *dv)
4203 {
4204 if (dv->dv_dict == NULL) {
4205 Py_RETURN_NONE;
4206 }
4207 return dictiter_new(dv->dv_dict, &PyDictIterValue_Type);
4208 }
4209
4210 static PySequenceMethods dictvalues_as_sequence = {
4211 (lenfunc)dictview_len, /* sq_length */
4212 0, /* sq_concat */
4213 0, /* sq_repeat */
4214 0, /* sq_item */
4215 0, /* sq_slice */
4216 0, /* sq_ass_item */
4217 0, /* sq_ass_slice */
4218 (objobjproc)0, /* sq_contains */
4219 };
4220
4221 static PyMethodDef dictvalues_methods[] = {
4222 {NULL, NULL} /* sentinel */
4223 };
4224
4225 PyTypeObject PyDictValues_Type = {
4226 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4227 "dict_values", /* tp_name */
4228 sizeof(_PyDictViewObject), /* tp_basicsize */
4229 0, /* tp_itemsize */
4230 /* methods */
4231 (destructor)dictview_dealloc, /* tp_dealloc */
4232 0, /* tp_print */
4233 0, /* tp_getattr */
4234 0, /* tp_setattr */
4235 0, /* tp_reserved */
4236 (reprfunc)dictview_repr, /* tp_repr */
4237 0, /* tp_as_number */
4238 &dictvalues_as_sequence, /* tp_as_sequence */
4239 0, /* tp_as_mapping */
4240 0, /* tp_hash */
4241 0, /* tp_call */
4242 0, /* tp_str */
4243 PyObject_GenericGetAttr, /* tp_getattro */
4244 0, /* tp_setattro */
4245 0, /* tp_as_buffer */
4246 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4247 0, /* tp_doc */
4248 (traverseproc)dictview_traverse, /* tp_traverse */
4249 0, /* tp_clear */
4250 0, /* tp_richcompare */
4251 0, /* tp_weaklistoffset */
4252 (getiterfunc)dictvalues_iter, /* tp_iter */
4253 0, /* tp_iternext */
4254 dictvalues_methods, /* tp_methods */
4255 0,
4256 };
4257
4258 static PyObject *
dictvalues_new(PyObject * dict)4259 dictvalues_new(PyObject *dict)
4260 {
4261 return _PyDictView_New(dict, &PyDictValues_Type);
4262 }
4263
4264 /* Returns NULL if cannot allocate a new PyDictKeysObject,
4265 but does not set an error */
4266 PyDictKeysObject *
_PyDict_NewKeysForClass(void)4267 _PyDict_NewKeysForClass(void)
4268 {
4269 PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE);
4270 if (keys == NULL)
4271 PyErr_Clear();
4272 else
4273 keys->dk_lookup = lookdict_split;
4274 return keys;
4275 }
4276
4277 #define CACHED_KEYS(tp) (((PyHeapTypeObject*)tp)->ht_cached_keys)
4278
4279 PyObject *
PyObject_GenericGetDict(PyObject * obj,void * context)4280 PyObject_GenericGetDict(PyObject *obj, void *context)
4281 {
4282 PyObject *dict, **dictptr = _PyObject_GetDictPtr(obj);
4283 if (dictptr == NULL) {
4284 PyErr_SetString(PyExc_AttributeError,
4285 "This object has no __dict__");
4286 return NULL;
4287 }
4288 dict = *dictptr;
4289 if (dict == NULL) {
4290 PyTypeObject *tp = Py_TYPE(obj);
4291 if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && CACHED_KEYS(tp)) {
4292 DK_INCREF(CACHED_KEYS(tp));
4293 *dictptr = dict = new_dict_with_shared_keys(CACHED_KEYS(tp));
4294 }
4295 else {
4296 *dictptr = dict = PyDict_New();
4297 }
4298 }
4299 Py_XINCREF(dict);
4300 return dict;
4301 }
4302
4303 int
_PyObjectDict_SetItem(PyTypeObject * tp,PyObject ** dictptr,PyObject * key,PyObject * value)4304 _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
4305 PyObject *key, PyObject *value)
4306 {
4307 PyObject *dict;
4308 int res;
4309 PyDictKeysObject *cached;
4310
4311 assert(dictptr != NULL);
4312 if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && (cached = CACHED_KEYS(tp))) {
4313 assert(dictptr != NULL);
4314 dict = *dictptr;
4315 if (dict == NULL) {
4316 DK_INCREF(cached);
4317 dict = new_dict_with_shared_keys(cached);
4318 if (dict == NULL)
4319 return -1;
4320 *dictptr = dict;
4321 }
4322 if (value == NULL) {
4323 res = PyDict_DelItem(dict, key);
4324 // Since key sharing dict doesn't allow deletion, PyDict_DelItem()
4325 // always converts dict to combined form.
4326 if ((cached = CACHED_KEYS(tp)) != NULL) {
4327 CACHED_KEYS(tp) = NULL;
4328 DK_DECREF(cached);
4329 }
4330 }
4331 else {
4332 int was_shared = (cached == ((PyDictObject *)dict)->ma_keys);
4333 res = PyDict_SetItem(dict, key, value);
4334 if (was_shared &&
4335 (cached = CACHED_KEYS(tp)) != NULL &&
4336 cached != ((PyDictObject *)dict)->ma_keys) {
4337 /* PyDict_SetItem() may call dictresize and convert split table
4338 * into combined table. In such case, convert it to split
4339 * table again and update type's shared key only when this is
4340 * the only dict sharing key with the type.
4341 *
4342 * This is to allow using shared key in class like this:
4343 *
4344 * class C:
4345 * def __init__(self):
4346 * # one dict resize happens
4347 * self.a, self.b, self.c = 1, 2, 3
4348 * self.d, self.e, self.f = 4, 5, 6
4349 * a = C()
4350 */
4351 if (cached->dk_refcnt == 1) {
4352 CACHED_KEYS(tp) = make_keys_shared(dict);
4353 }
4354 else {
4355 CACHED_KEYS(tp) = NULL;
4356 }
4357 DK_DECREF(cached);
4358 if (CACHED_KEYS(tp) == NULL && PyErr_Occurred())
4359 return -1;
4360 }
4361 }
4362 } else {
4363 dict = *dictptr;
4364 if (dict == NULL) {
4365 dict = PyDict_New();
4366 if (dict == NULL)
4367 return -1;
4368 *dictptr = dict;
4369 }
4370 if (value == NULL) {
4371 res = PyDict_DelItem(dict, key);
4372 } else {
4373 res = PyDict_SetItem(dict, key, value);
4374 }
4375 }
4376 return res;
4377 }
4378
4379 void
_PyDictKeys_DecRef(PyDictKeysObject * keys)4380 _PyDictKeys_DecRef(PyDictKeysObject *keys)
4381 {
4382 DK_DECREF(keys);
4383 }
4384