1 /* Dictionary object implementation using a hash table */
2
3 /* The distribution includes a separate file, Objects/dictnotes.txt,
4 describing explorations into dictionary design and optimization.
5 It covers typical dictionary use patterns, the parameters for
6 tuning dictionaries, and several ideas for possible optimizations.
7 */
8
9 /* PyDictKeysObject
10
11 This implements the dictionary's hashtable.
12
13 As of Python 3.6, this is compact and ordered. Basic idea is described here.
14 https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html
15
16 layout:
17
18 +---------------+
19 | dk_refcnt |
20 | dk_size |
21 | dk_lookup |
22 | dk_usable |
23 | dk_nentries |
24 +---------------+
25 | dk_indices |
26 | |
27 +---------------+
28 | dk_entries |
29 | |
30 +---------------+
31
32 dk_indices is actual hashtable. It holds index in entries, or DKIX_EMPTY(-1)
33 or DKIX_DUMMY(-2).
34 Size of indices is dk_size. Type of each index in indices is vary on dk_size:
35
36 * int8 for dk_size <= 128
37 * int16 for 256 <= dk_size <= 2**15
38 * int32 for 2**16 <= dk_size <= 2**31
39 * int64 for 2**32 <= dk_size
40
41 dk_entries is array of PyDictKeyEntry. It's size is USABLE_FRACTION(dk_size).
42 DK_ENTRIES(dk) can be used to get pointer to entries.
43
44 NOTE: Since negative value is used for DKIX_EMPTY and DKIX_DUMMY, type of
45 dk_indices entry is signed integer and int16 is used for table which
46 dk_size == 256.
47 */
48
49
50 /*
51 The DictObject can be in one of two forms.
52
53 Either:
54 A combined table:
55 ma_values == NULL, dk_refcnt == 1.
56 Values are stored in the me_value field of the PyDictKeysObject.
57 Or:
58 A split table:
59 ma_values != NULL, dk_refcnt >= 1
60 Values are stored in the ma_values array.
61 Only string (unicode) keys are allowed.
62 All dicts sharing same key must have same insertion order.
63
64 There are four kinds of slots in the table (slot is index, and
65 DK_ENTRIES(keys)[index] if index >= 0):
66
67 1. Unused. index == DKIX_EMPTY
68 Does not hold an active (key, value) pair now and never did. Unused can
69 transition to Active upon key insertion. This is each slot's initial state.
70
71 2. Active. index >= 0, me_key != NULL and me_value != NULL
72 Holds an active (key, value) pair. Active can transition to Dummy or
73 Pending upon key deletion (for combined and split tables respectively).
74 This is the only case in which me_value != NULL.
75
76 3. Dummy. index == DKIX_DUMMY (combined only)
77 Previously held an active (key, value) pair, but that was deleted and an
78 active pair has not yet overwritten the slot. Dummy can transition to
79 Active upon key insertion. Dummy slots cannot be made Unused again
80 else the probe sequence in case of collision would have no way to know
81 they were once active.
82
83 4. Pending. index >= 0, key != NULL, and value == NULL (split only)
84 Not yet inserted in split-table.
85 */
86
87 /*
88 Preserving insertion order
89
90 It's simple for combined table. Since dk_entries is mostly append only, we can
91 get insertion order by just iterating dk_entries.
92
93 One exception is .popitem(). It removes last item in dk_entries and decrement
94 dk_nentries to achieve amortized O(1). Since there are DKIX_DUMMY remains in
95 dk_indices, we can't increment dk_usable even though dk_nentries is
96 decremented.
97
98 In split table, inserting into pending entry is allowed only for dk_entries[ix]
99 where ix == mp->ma_used. Inserting into other index and deleting item cause
100 converting the dict to the combined table.
101 */
102
103 /* PyDict_MINSIZE is the starting size for any new dict.
104 * 8 allows dicts with no more than 5 active entries; experiments suggested
105 * this suffices for the majority of dicts (consisting mostly of usually-small
106 * dicts created to pass keyword arguments).
107 * Making this 8, rather than 4 reduces the number of resizes for most
108 * dictionaries, without any significant extra memory use.
109 */
110 #define PyDict_MINSIZE 8
111
112 #include "Python.h"
113 #include "dict-common.h"
114 #include "stringlib/eq.h" /* to get unicode_eq() */
115
116 /*[clinic input]
117 class dict "PyDictObject *" "&PyDict_Type"
118 [clinic start generated code]*/
119 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=f157a5a0ce9589d6]*/
120
121
122 /*
123 To ensure the lookup algorithm terminates, there must be at least one Unused
124 slot (NULL key) in the table.
125 To avoid slowing down lookups on a near-full table, we resize the table when
126 it's USABLE_FRACTION (currently two-thirds) full.
127 */
128
129 #define PERTURB_SHIFT 5
130
131 /*
132 Major subtleties ahead: Most hash schemes depend on having a "good" hash
133 function, in the sense of simulating randomness. Python doesn't: its most
134 important hash functions (for ints) are very regular in common
135 cases:
136
137 >>>[hash(i) for i in range(4)]
138 [0, 1, 2, 3]
139
140 This isn't necessarily bad! To the contrary, in a table of size 2**i, taking
141 the low-order i bits as the initial table index is extremely fast, and there
142 are no collisions at all for dicts indexed by a contiguous range of ints. So
143 this gives better-than-random behavior in common cases, and that's very
144 desirable.
145
146 OTOH, when collisions occur, the tendency to fill contiguous slices of the
147 hash table makes a good collision resolution strategy crucial. Taking only
148 the last i bits of the hash code is also vulnerable: for example, consider
149 the list [i << 16 for i in range(20000)] as a set of keys. Since ints are
150 their own hash codes, and this fits in a dict of size 2**15, the last 15 bits
151 of every hash code are all 0: they *all* map to the same table index.
152
153 But catering to unusual cases should not slow the usual ones, so we just take
154 the last i bits anyway. It's up to collision resolution to do the rest. If
155 we *usually* find the key we're looking for on the first try (and, it turns
156 out, we usually do -- the table load factor is kept under 2/3, so the odds
157 are solidly in our favor), then it makes best sense to keep the initial index
158 computation dirt cheap.
159
160 The first half of collision resolution is to visit table indices via this
161 recurrence:
162
163 j = ((5*j) + 1) mod 2**i
164
165 For any initial j in range(2**i), repeating that 2**i times generates each
166 int in range(2**i) exactly once (see any text on random-number generation for
167 proof). By itself, this doesn't help much: like linear probing (setting
168 j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed
169 order. This would be bad, except that's not the only thing we do, and it's
170 actually *good* in the common cases where hash keys are consecutive. In an
171 example that's really too small to make this entirely clear, for a table of
172 size 2**3 the order of indices is:
173
174 0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating]
175
176 If two things come in at index 5, the first place we look after is index 2,
177 not 6, so if another comes in at index 6 the collision at 5 didn't hurt it.
178 Linear probing is deadly in this case because there the fixed probe order
179 is the *same* as the order consecutive keys are likely to arrive. But it's
180 extremely unlikely hash codes will follow a 5*j+1 recurrence by accident,
181 and certain that consecutive hash codes do not.
182
183 The other half of the strategy is to get the other bits of the hash code
184 into play. This is done by initializing a (unsigned) vrbl "perturb" to the
185 full hash code, and changing the recurrence to:
186
187 perturb >>= PERTURB_SHIFT;
188 j = (5*j) + 1 + perturb;
189 use j % 2**i as the next table index;
190
191 Now the probe sequence depends (eventually) on every bit in the hash code,
192 and the pseudo-scrambling property of recurring on 5*j+1 is more valuable,
193 because it quickly magnifies small differences in the bits that didn't affect
194 the initial index. Note that because perturb is unsigned, if the recurrence
195 is executed often enough perturb eventually becomes and remains 0. At that
196 point (very rarely reached) the recurrence is on (just) 5*j+1 again, and
197 that's certain to find an empty slot eventually (since it generates every int
198 in range(2**i), and we make sure there's always at least one empty slot).
199
200 Selecting a good value for PERTURB_SHIFT is a balancing act. You want it
201 small so that the high bits of the hash code continue to affect the probe
202 sequence across iterations; but you want it large so that in really bad cases
203 the high-order hash bits have an effect on early iterations. 5 was "the
204 best" in minimizing total collisions across experiments Tim Peters ran (on
205 both normal and pathological cases), but 4 and 6 weren't significantly worse.
206
207 Historical: Reimer Behrends contributed the idea of using a polynomial-based
208 approach, using repeated multiplication by x in GF(2**n) where an irreducible
209 polynomial for each table size was chosen such that x was a primitive root.
210 Christian Tismer later extended that to use division by x instead, as an
211 efficient way to get the high bits of the hash code into play. This scheme
212 also gave excellent collision statistics, but was more expensive: two
213 if-tests were required inside the loop; computing "the next" index took about
214 the same number of operations but without as much potential parallelism
215 (e.g., computing 5*j can go on at the same time as computing 1+perturb in the
216 above, and then shifting perturb can be done while the table index is being
217 masked); and the PyDictObject struct required a member to hold the table's
218 polynomial. In Tim's experiments the current scheme ran faster, produced
219 equally good collision statistics, needed less code & used less memory.
220
221 */
222
223 /* forward declarations */
224 static Py_ssize_t lookdict(PyDictObject *mp, PyObject *key,
225 Py_hash_t hash, PyObject ***value_addr,
226 Py_ssize_t *hashpos);
227 static Py_ssize_t lookdict_unicode(PyDictObject *mp, PyObject *key,
228 Py_hash_t hash, PyObject ***value_addr,
229 Py_ssize_t *hashpos);
230 static Py_ssize_t
231 lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
232 Py_hash_t hash, PyObject ***value_addr,
233 Py_ssize_t *hashpos);
234 static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
235 Py_hash_t hash, PyObject ***value_addr,
236 Py_ssize_t *hashpos);
237
238 static int dictresize(PyDictObject *mp, Py_ssize_t minused);
239
240 /*Global counter used to set ma_version_tag field of dictionary.
241 * It is incremented each time that a dictionary is created and each
242 * time that a dictionary is modified. */
243 static uint64_t pydict_global_version = 0;
244
245 #define DICT_NEXT_VERSION() (++pydict_global_version)
246
247 /* Dictionary reuse scheme to save calls to malloc and free */
248 #ifndef PyDict_MAXFREELIST
249 #define PyDict_MAXFREELIST 80
250 #endif
251 static PyDictObject *free_list[PyDict_MAXFREELIST];
252 static int numfree = 0;
253 static PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST];
254 static int numfreekeys = 0;
255
256 #include "clinic/dictobject.c.h"
257
258 int
PyDict_ClearFreeList(void)259 PyDict_ClearFreeList(void)
260 {
261 PyDictObject *op;
262 int ret = numfree + numfreekeys;
263 while (numfree) {
264 op = free_list[--numfree];
265 assert(PyDict_CheckExact(op));
266 PyObject_GC_Del(op);
267 }
268 while (numfreekeys) {
269 PyObject_FREE(keys_free_list[--numfreekeys]);
270 }
271 return ret;
272 }
273
274 /* Print summary info about the state of the optimized allocator */
275 void
_PyDict_DebugMallocStats(FILE * out)276 _PyDict_DebugMallocStats(FILE *out)
277 {
278 _PyDebugAllocatorStats(out,
279 "free PyDictObject", numfree, sizeof(PyDictObject));
280 }
281
282
283 void
PyDict_Fini(void)284 PyDict_Fini(void)
285 {
286 PyDict_ClearFreeList();
287 }
288
289 #define DK_SIZE(dk) ((dk)->dk_size)
290 #if SIZEOF_VOID_P > 4
291 #define DK_IXSIZE(dk) \
292 (DK_SIZE(dk) <= 0xff ? \
293 1 : DK_SIZE(dk) <= 0xffff ? \
294 2 : DK_SIZE(dk) <= 0xffffffff ? \
295 4 : sizeof(int64_t))
296 #else
297 #define DK_IXSIZE(dk) \
298 (DK_SIZE(dk) <= 0xff ? \
299 1 : DK_SIZE(dk) <= 0xffff ? \
300 2 : sizeof(int32_t))
301 #endif
302 #define DK_ENTRIES(dk) \
303 ((PyDictKeyEntry*)(&(dk)->dk_indices.as_1[DK_SIZE(dk) * DK_IXSIZE(dk)]))
304
305 #define DK_DEBUG_INCREF _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA
306 #define DK_DEBUG_DECREF _Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA
307
308 #define DK_INCREF(dk) (DK_DEBUG_INCREF ++(dk)->dk_refcnt)
309 #define DK_DECREF(dk) if (DK_DEBUG_DECREF (--(dk)->dk_refcnt) == 0) free_keys_object(dk)
310 #define DK_MASK(dk) (((dk)->dk_size)-1)
311 #define IS_POWER_OF_2(x) (((x) & (x-1)) == 0)
312
313 /* lookup indices. returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */
314 static inline Py_ssize_t
dk_get_index(PyDictKeysObject * keys,Py_ssize_t i)315 dk_get_index(PyDictKeysObject *keys, Py_ssize_t i)
316 {
317 Py_ssize_t s = DK_SIZE(keys);
318 Py_ssize_t ix;
319
320 if (s <= 0xff) {
321 int8_t *indices = keys->dk_indices.as_1;
322 ix = indices[i];
323 }
324 else if (s <= 0xffff) {
325 int16_t *indices = keys->dk_indices.as_2;
326 ix = indices[i];
327 }
328 #if SIZEOF_VOID_P > 4
329 else if (s > 0xffffffff) {
330 int64_t *indices = keys->dk_indices.as_8;
331 ix = indices[i];
332 }
333 #endif
334 else {
335 int32_t *indices = keys->dk_indices.as_4;
336 ix = indices[i];
337 }
338 assert(ix >= DKIX_DUMMY);
339 return ix;
340 }
341
342 /* write to indices. */
343 static inline void
dk_set_index(PyDictKeysObject * keys,Py_ssize_t i,Py_ssize_t ix)344 dk_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
345 {
346 Py_ssize_t s = DK_SIZE(keys);
347
348 assert(ix >= DKIX_DUMMY);
349
350 if (s <= 0xff) {
351 int8_t *indices = keys->dk_indices.as_1;
352 assert(ix <= 0x7f);
353 indices[i] = (char)ix;
354 }
355 else if (s <= 0xffff) {
356 int16_t *indices = keys->dk_indices.as_2;
357 assert(ix <= 0x7fff);
358 indices[i] = (int16_t)ix;
359 }
360 #if SIZEOF_VOID_P > 4
361 else if (s > 0xffffffff) {
362 int64_t *indices = keys->dk_indices.as_8;
363 indices[i] = ix;
364 }
365 #endif
366 else {
367 int32_t *indices = keys->dk_indices.as_4;
368 assert(ix <= 0x7fffffff);
369 indices[i] = (int32_t)ix;
370 }
371 }
372
373
374 /* USABLE_FRACTION is the maximum dictionary load.
375 * Increasing this ratio makes dictionaries more dense resulting in more
376 * collisions. Decreasing it improves sparseness at the expense of spreading
377 * indices over more cache lines and at the cost of total memory consumed.
378 *
379 * USABLE_FRACTION must obey the following:
380 * (0 < USABLE_FRACTION(n) < n) for all n >= 2
381 *
382 * USABLE_FRACTION should be quick to calculate.
383 * Fractions around 1/2 to 2/3 seem to work well in practice.
384 */
385 #define USABLE_FRACTION(n) (((n) << 1)/3)
386
387 /* ESTIMATE_SIZE is reverse function of USABLE_FRACTION.
388 * This can be used to reserve enough size to insert n entries without
389 * resizing.
390 */
391 #define ESTIMATE_SIZE(n) (((n)*3+1) >> 1)
392
393 /* Alternative fraction that is otherwise close enough to 2n/3 to make
394 * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
395 * 32 * 2/3 = 21, 32 * 5/8 = 20.
396 * Its advantage is that it is faster to compute on machines with slow division.
397 * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
398 */
399
400 /* GROWTH_RATE. Growth rate upon hitting maximum load.
401 * Currently set to used*2 + capacity/2.
402 * This means that dicts double in size when growing without deletions,
403 * but have more head room when the number of deletions is on a par with the
404 * number of insertions.
405 * Raising this to used*4 doubles memory consumption depending on the size of
406 * the dictionary, but results in half the number of resizes, less effort to
407 * resize.
408 * GROWTH_RATE was set to used*4 up to version 3.2.
409 * GROWTH_RATE was set to used*2 in version 3.3.0
410 */
411 #define GROWTH_RATE(d) (((d)->ma_used*2)+((d)->ma_keys->dk_size>>1))
412
413 #define ENSURE_ALLOWS_DELETIONS(d) \
414 if ((d)->ma_keys->dk_lookup == lookdict_unicode_nodummy) { \
415 (d)->ma_keys->dk_lookup = lookdict_unicode; \
416 }
417
418 /* This immutable, empty PyDictKeysObject is used for PyDict_Clear()
419 * (which cannot fail and thus can do no allocation).
420 */
421 static PyDictKeysObject empty_keys_struct = {
422 1, /* dk_refcnt */
423 1, /* dk_size */
424 lookdict_split, /* dk_lookup */
425 0, /* dk_usable (immutable) */
426 0, /* dk_nentries */
427 .dk_indices = { .as_1 = {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
428 DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}},
429 };
430
431 static PyObject *empty_values[1] = { NULL };
432
433 #define Py_EMPTY_KEYS &empty_keys_struct
434
435 /* Uncomment to check the dict content in _PyDict_CheckConsistency() */
436 /* #define DEBUG_PYDICT */
437
438
439 #ifdef Py_DEBUG
440 static int
_PyDict_CheckConsistency(PyDictObject * mp)441 _PyDict_CheckConsistency(PyDictObject *mp)
442 {
443 PyDictKeysObject *keys = mp->ma_keys;
444 int splitted = _PyDict_HasSplitTable(mp);
445 Py_ssize_t usable = USABLE_FRACTION(keys->dk_size);
446 #ifdef DEBUG_PYDICT
447 PyDictKeyEntry *entries = DK_ENTRIES(keys);
448 Py_ssize_t i;
449 #endif
450
451 assert(0 <= mp->ma_used && mp->ma_used <= usable);
452 assert(IS_POWER_OF_2(keys->dk_size));
453 assert(0 <= keys->dk_usable
454 && keys->dk_usable <= usable);
455 assert(0 <= keys->dk_nentries
456 && keys->dk_nentries <= usable);
457 assert(keys->dk_usable + keys->dk_nentries <= usable);
458
459 if (!splitted) {
460 /* combined table */
461 assert(keys->dk_refcnt == 1);
462 }
463
464 #ifdef DEBUG_PYDICT
465 for (i=0; i < keys->dk_size; i++) {
466 Py_ssize_t ix = dk_get_index(keys, i);
467 assert(DKIX_DUMMY <= ix && ix <= usable);
468 }
469
470 for (i=0; i < usable; i++) {
471 PyDictKeyEntry *entry = &entries[i];
472 PyObject *key = entry->me_key;
473
474 if (key != NULL) {
475 if (PyUnicode_CheckExact(key)) {
476 Py_hash_t hash = ((PyASCIIObject *)key)->hash;
477 assert(hash != -1);
478 assert(entry->me_hash == hash);
479 }
480 else {
481 /* test_dict fails if PyObject_Hash() is called again */
482 assert(entry->me_hash != -1);
483 }
484 if (!splitted) {
485 assert(entry->me_value != NULL);
486 }
487 }
488
489 if (splitted) {
490 assert(entry->me_value == NULL);
491 }
492 }
493
494 if (splitted) {
495 /* splitted table */
496 for (i=0; i < mp->ma_used; i++) {
497 assert(mp->ma_values[i] != NULL);
498 }
499 }
500 #endif
501
502 return 1;
503 }
504 #endif
505
506
new_keys_object(Py_ssize_t size)507 static PyDictKeysObject *new_keys_object(Py_ssize_t size)
508 {
509 PyDictKeysObject *dk;
510 Py_ssize_t es, usable;
511
512 assert(size >= PyDict_MINSIZE);
513 assert(IS_POWER_OF_2(size));
514
515 usable = USABLE_FRACTION(size);
516 if (size <= 0xff) {
517 es = 1;
518 }
519 else if (size <= 0xffff) {
520 es = 2;
521 }
522 #if SIZEOF_VOID_P > 4
523 else if (size <= 0xffffffff) {
524 es = 4;
525 }
526 #endif
527 else {
528 es = sizeof(Py_ssize_t);
529 }
530
531 if (size == PyDict_MINSIZE && numfreekeys > 0) {
532 dk = keys_free_list[--numfreekeys];
533 }
534 else {
535 dk = PyObject_MALLOC(sizeof(PyDictKeysObject)
536 - Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
537 + es * size
538 + sizeof(PyDictKeyEntry) * usable);
539 if (dk == NULL) {
540 PyErr_NoMemory();
541 return NULL;
542 }
543 }
544 DK_DEBUG_INCREF dk->dk_refcnt = 1;
545 dk->dk_size = size;
546 dk->dk_usable = usable;
547 dk->dk_lookup = lookdict_unicode_nodummy;
548 dk->dk_nentries = 0;
549 memset(&dk->dk_indices.as_1[0], 0xff, es * size);
550 memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable);
551 return dk;
552 }
553
554 static void
free_keys_object(PyDictKeysObject * keys)555 free_keys_object(PyDictKeysObject *keys)
556 {
557 PyDictKeyEntry *entries = DK_ENTRIES(keys);
558 Py_ssize_t i, n;
559 for (i = 0, n = keys->dk_nentries; i < n; i++) {
560 Py_XDECREF(entries[i].me_key);
561 Py_XDECREF(entries[i].me_value);
562 }
563 if (keys->dk_size == PyDict_MINSIZE && numfreekeys < PyDict_MAXFREELIST) {
564 keys_free_list[numfreekeys++] = keys;
565 return;
566 }
567 PyObject_FREE(keys);
568 }
569
570 #define new_values(size) PyMem_NEW(PyObject *, size)
571 #define free_values(values) PyMem_FREE(values)
572
573 /* Consumes a reference to the keys object */
574 static PyObject *
new_dict(PyDictKeysObject * keys,PyObject ** values)575 new_dict(PyDictKeysObject *keys, PyObject **values)
576 {
577 PyDictObject *mp;
578 assert(keys != NULL);
579 if (numfree) {
580 mp = free_list[--numfree];
581 assert (mp != NULL);
582 assert (Py_TYPE(mp) == &PyDict_Type);
583 _Py_NewReference((PyObject *)mp);
584 }
585 else {
586 mp = PyObject_GC_New(PyDictObject, &PyDict_Type);
587 if (mp == NULL) {
588 DK_DECREF(keys);
589 free_values(values);
590 return NULL;
591 }
592 }
593 mp->ma_keys = keys;
594 mp->ma_values = values;
595 mp->ma_used = 0;
596 mp->ma_version_tag = DICT_NEXT_VERSION();
597 assert(_PyDict_CheckConsistency(mp));
598 return (PyObject *)mp;
599 }
600
601 /* Consumes a reference to the keys object */
602 static PyObject *
new_dict_with_shared_keys(PyDictKeysObject * keys)603 new_dict_with_shared_keys(PyDictKeysObject *keys)
604 {
605 PyObject **values;
606 Py_ssize_t i, size;
607
608 size = USABLE_FRACTION(DK_SIZE(keys));
609 values = new_values(size);
610 if (values == NULL) {
611 DK_DECREF(keys);
612 return PyErr_NoMemory();
613 }
614 for (i = 0; i < size; i++) {
615 values[i] = NULL;
616 }
617 return new_dict(keys, values);
618 }
619
620 PyObject *
PyDict_New(void)621 PyDict_New(void)
622 {
623 PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE);
624 if (keys == NULL)
625 return NULL;
626 return new_dict(keys, NULL);
627 }
628
629 /* Search index of hash table from offset of entry table */
630 static Py_ssize_t
lookdict_index(PyDictKeysObject * k,Py_hash_t hash,Py_ssize_t index)631 lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
632 {
633 size_t i;
634 size_t mask = DK_MASK(k);
635 Py_ssize_t ix;
636
637 i = (size_t)hash & mask;
638 ix = dk_get_index(k, i);
639 if (ix == index) {
640 return i;
641 }
642 if (ix == DKIX_EMPTY) {
643 return DKIX_EMPTY;
644 }
645
646 for (size_t perturb = hash;;) {
647 perturb >>= PERTURB_SHIFT;
648 i = mask & ((i << 2) + i + perturb + 1);
649 ix = dk_get_index(k, i);
650 if (ix == index) {
651 return i;
652 }
653 if (ix == DKIX_EMPTY) {
654 return DKIX_EMPTY;
655 }
656 }
657 assert(0); /* NOT REACHED */
658 return DKIX_ERROR;
659 }
660
661 /*
662 The basic lookup function used by all operations.
663 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
664 Open addressing is preferred over chaining since the link overhead for
665 chaining would be substantial (100% with typical malloc overhead).
666
667 The initial probe index is computed as hash mod the table size. Subsequent
668 probe indices are computed as explained earlier.
669
670 All arithmetic on hash should ignore overflow.
671
672 The details in this version are due to Tim Peters, building on many past
673 contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
674 Christian Tismer.
675
676 lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a
677 comparison raises an exception.
678 lookdict_unicode() below is specialized to string keys, comparison of which can
679 never raise an exception; that function can never return DKIX_ERROR when key
680 is string. Otherwise, it falls back to lookdict().
681 lookdict_unicode_nodummy is further specialized for string keys that cannot be
682 the <dummy> value.
683 For both, when the key isn't found a DKIX_EMPTY is returned. hashpos returns
684 where the key index should be inserted.
685 */
686 static Py_ssize_t
lookdict(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject *** value_addr,Py_ssize_t * hashpos)687 lookdict(PyDictObject *mp, PyObject *key,
688 Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
689 {
690 size_t i, mask;
691 Py_ssize_t ix, freeslot;
692 int cmp;
693 PyDictKeysObject *dk;
694 PyDictKeyEntry *ep0, *ep;
695 PyObject *startkey;
696
697 top:
698 dk = mp->ma_keys;
699 mask = DK_MASK(dk);
700 ep0 = DK_ENTRIES(dk);
701 i = (size_t)hash & mask;
702
703 ix = dk_get_index(dk, i);
704 if (ix == DKIX_EMPTY) {
705 if (hashpos != NULL)
706 *hashpos = i;
707 *value_addr = NULL;
708 return DKIX_EMPTY;
709 }
710 if (ix == DKIX_DUMMY) {
711 freeslot = i;
712 }
713 else {
714 ep = &ep0[ix];
715 assert(ep->me_key != NULL);
716 if (ep->me_key == key) {
717 *value_addr = &ep->me_value;
718 if (hashpos != NULL)
719 *hashpos = i;
720 return ix;
721 }
722 if (ep->me_hash == hash) {
723 startkey = ep->me_key;
724 Py_INCREF(startkey);
725 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
726 Py_DECREF(startkey);
727 if (cmp < 0) {
728 *value_addr = NULL;
729 return DKIX_ERROR;
730 }
731 if (dk == mp->ma_keys && ep->me_key == startkey) {
732 if (cmp > 0) {
733 *value_addr = &ep->me_value;
734 if (hashpos != NULL)
735 *hashpos = i;
736 return ix;
737 }
738 }
739 else {
740 /* The dict was mutated, restart */
741 goto top;
742 }
743 }
744 freeslot = -1;
745 }
746
747 for (size_t perturb = hash;;) {
748 perturb >>= PERTURB_SHIFT;
749 i = ((i << 2) + i + perturb + 1) & mask;
750 ix = dk_get_index(dk, i);
751 if (ix == DKIX_EMPTY) {
752 if (hashpos != NULL) {
753 *hashpos = (freeslot == -1) ? (Py_ssize_t)i : freeslot;
754 }
755 *value_addr = NULL;
756 return ix;
757 }
758 if (ix == DKIX_DUMMY) {
759 if (freeslot == -1)
760 freeslot = i;
761 continue;
762 }
763 ep = &ep0[ix];
764 assert(ep->me_key != NULL);
765 if (ep->me_key == key) {
766 if (hashpos != NULL) {
767 *hashpos = i;
768 }
769 *value_addr = &ep->me_value;
770 return ix;
771 }
772 if (ep->me_hash == hash) {
773 startkey = ep->me_key;
774 Py_INCREF(startkey);
775 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
776 Py_DECREF(startkey);
777 if (cmp < 0) {
778 *value_addr = NULL;
779 return DKIX_ERROR;
780 }
781 if (dk == mp->ma_keys && ep->me_key == startkey) {
782 if (cmp > 0) {
783 if (hashpos != NULL) {
784 *hashpos = i;
785 }
786 *value_addr = &ep->me_value;
787 return ix;
788 }
789 }
790 else {
791 /* The dict was mutated, restart */
792 goto top;
793 }
794 }
795 }
796 assert(0); /* NOT REACHED */
797 return 0;
798 }
799
800 /* Specialized version for string-only keys */
801 static Py_ssize_t
lookdict_unicode(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject *** value_addr,Py_ssize_t * hashpos)802 lookdict_unicode(PyDictObject *mp, PyObject *key,
803 Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
804 {
805 size_t i;
806 size_t mask = DK_MASK(mp->ma_keys);
807 Py_ssize_t ix, freeslot;
808 PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
809
810 assert(mp->ma_values == NULL);
811 /* Make sure this function doesn't have to handle non-unicode keys,
812 including subclasses of str; e.g., one reason to subclass
813 unicodes is to override __eq__, and for speed we don't cater to
814 that here. */
815 if (!PyUnicode_CheckExact(key)) {
816 mp->ma_keys->dk_lookup = lookdict;
817 return lookdict(mp, key, hash, value_addr, hashpos);
818 }
819 i = (size_t)hash & mask;
820 ix = dk_get_index(mp->ma_keys, i);
821 if (ix == DKIX_EMPTY) {
822 if (hashpos != NULL)
823 *hashpos = i;
824 *value_addr = NULL;
825 return DKIX_EMPTY;
826 }
827 if (ix == DKIX_DUMMY) {
828 freeslot = i;
829 }
830 else {
831 ep = &ep0[ix];
832 assert(ep->me_key != NULL);
833 if (ep->me_key == key
834 || (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
835 if (hashpos != NULL)
836 *hashpos = i;
837 *value_addr = &ep->me_value;
838 return ix;
839 }
840 freeslot = -1;
841 }
842
843 for (size_t perturb = hash;;) {
844 perturb >>= PERTURB_SHIFT;
845 i = mask & ((i << 2) + i + perturb + 1);
846 ix = dk_get_index(mp->ma_keys, i);
847 if (ix == DKIX_EMPTY) {
848 if (hashpos != NULL) {
849 *hashpos = (freeslot == -1) ? (Py_ssize_t)i : freeslot;
850 }
851 *value_addr = NULL;
852 return DKIX_EMPTY;
853 }
854 if (ix == DKIX_DUMMY) {
855 if (freeslot == -1)
856 freeslot = i;
857 continue;
858 }
859 ep = &ep0[ix];
860 assert(ep->me_key != NULL);
861 if (ep->me_key == key
862 || (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
863 *value_addr = &ep->me_value;
864 if (hashpos != NULL) {
865 *hashpos = i;
866 }
867 return ix;
868 }
869 }
870 assert(0); /* NOT REACHED */
871 return 0;
872 }
873
874 /* Faster version of lookdict_unicode when it is known that no <dummy> keys
875 * will be present. */
876 static Py_ssize_t
lookdict_unicode_nodummy(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject *** value_addr,Py_ssize_t * hashpos)877 lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
878 Py_hash_t hash, PyObject ***value_addr,
879 Py_ssize_t *hashpos)
880 {
881 size_t i;
882 size_t mask = DK_MASK(mp->ma_keys);
883 Py_ssize_t ix;
884 PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
885
886 assert(mp->ma_values == NULL);
887 /* Make sure this function doesn't have to handle non-unicode keys,
888 including subclasses of str; e.g., one reason to subclass
889 unicodes is to override __eq__, and for speed we don't cater to
890 that here. */
891 if (!PyUnicode_CheckExact(key)) {
892 mp->ma_keys->dk_lookup = lookdict;
893 return lookdict(mp, key, hash, value_addr, hashpos);
894 }
895 i = (size_t)hash & mask;
896 ix = dk_get_index(mp->ma_keys, i);
897 assert (ix != DKIX_DUMMY);
898 if (ix == DKIX_EMPTY) {
899 if (hashpos != NULL)
900 *hashpos = i;
901 *value_addr = NULL;
902 return DKIX_EMPTY;
903 }
904 ep = &ep0[ix];
905 assert(ep->me_key != NULL);
906 assert(PyUnicode_CheckExact(ep->me_key));
907 if (ep->me_key == key ||
908 (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
909 if (hashpos != NULL)
910 *hashpos = i;
911 *value_addr = &ep->me_value;
912 return ix;
913 }
914 for (size_t perturb = hash;;) {
915 perturb >>= PERTURB_SHIFT;
916 i = mask & ((i << 2) + i + perturb + 1);
917 ix = dk_get_index(mp->ma_keys, i);
918 assert (ix != DKIX_DUMMY);
919 if (ix == DKIX_EMPTY) {
920 if (hashpos != NULL)
921 *hashpos = i;
922 *value_addr = NULL;
923 return DKIX_EMPTY;
924 }
925 ep = &ep0[ix];
926 assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key));
927 if (ep->me_key == key ||
928 (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
929 if (hashpos != NULL)
930 *hashpos = i;
931 *value_addr = &ep->me_value;
932 return ix;
933 }
934 }
935 assert(0); /* NOT REACHED */
936 return 0;
937 }
938
939 /* Version of lookdict for split tables.
940 * All split tables and only split tables use this lookup function.
941 * Split tables only contain unicode keys and no dummy keys,
942 * so algorithm is the same as lookdict_unicode_nodummy.
943 */
944 static Py_ssize_t
lookdict_split(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject *** value_addr,Py_ssize_t * hashpos)945 lookdict_split(PyDictObject *mp, PyObject *key,
946 Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
947 {
948 size_t i;
949 size_t mask = DK_MASK(mp->ma_keys);
950 Py_ssize_t ix;
951 PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
952
953 /* mp must split table */
954 assert(mp->ma_values != NULL);
955 if (!PyUnicode_CheckExact(key)) {
956 ix = lookdict(mp, key, hash, value_addr, hashpos);
957 if (ix >= 0) {
958 *value_addr = &mp->ma_values[ix];
959 }
960 return ix;
961 }
962
963 i = (size_t)hash & mask;
964 ix = dk_get_index(mp->ma_keys, i);
965 if (ix == DKIX_EMPTY) {
966 if (hashpos != NULL)
967 *hashpos = i;
968 *value_addr = NULL;
969 return DKIX_EMPTY;
970 }
971 assert(ix >= 0);
972 ep = &ep0[ix];
973 assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key));
974 if (ep->me_key == key ||
975 (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
976 if (hashpos != NULL)
977 *hashpos = i;
978 *value_addr = &mp->ma_values[ix];
979 return ix;
980 }
981 for (size_t perturb = hash;;) {
982 perturb >>= PERTURB_SHIFT;
983 i = mask & ((i << 2) + i + perturb + 1);
984 ix = dk_get_index(mp->ma_keys, i);
985 if (ix == DKIX_EMPTY) {
986 if (hashpos != NULL)
987 *hashpos = i;
988 *value_addr = NULL;
989 return DKIX_EMPTY;
990 }
991 assert(ix >= 0);
992 ep = &ep0[ix];
993 assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key));
994 if (ep->me_key == key ||
995 (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
996 if (hashpos != NULL)
997 *hashpos = i;
998 *value_addr = &mp->ma_values[ix];
999 return ix;
1000 }
1001 }
1002 assert(0); /* NOT REACHED */
1003 return 0;
1004 }
1005
1006 int
_PyDict_HasOnlyStringKeys(PyObject * dict)1007 _PyDict_HasOnlyStringKeys(PyObject *dict)
1008 {
1009 Py_ssize_t pos = 0;
1010 PyObject *key, *value;
1011 assert(PyDict_Check(dict));
1012 /* Shortcut */
1013 if (((PyDictObject *)dict)->ma_keys->dk_lookup != lookdict)
1014 return 1;
1015 while (PyDict_Next(dict, &pos, &key, &value))
1016 if (!PyUnicode_Check(key))
1017 return 0;
1018 return 1;
1019 }
1020
1021 #define MAINTAIN_TRACKING(mp, key, value) \
1022 do { \
1023 if (!_PyObject_GC_IS_TRACKED(mp)) { \
1024 if (_PyObject_GC_MAY_BE_TRACKED(key) || \
1025 _PyObject_GC_MAY_BE_TRACKED(value)) { \
1026 _PyObject_GC_TRACK(mp); \
1027 } \
1028 } \
1029 } while(0)
1030
1031 void
_PyDict_MaybeUntrack(PyObject * op)1032 _PyDict_MaybeUntrack(PyObject *op)
1033 {
1034 PyDictObject *mp;
1035 PyObject *value;
1036 Py_ssize_t i, numentries;
1037 PyDictKeyEntry *ep0;
1038
1039 if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
1040 return;
1041
1042 mp = (PyDictObject *) op;
1043 ep0 = DK_ENTRIES(mp->ma_keys);
1044 numentries = mp->ma_keys->dk_nentries;
1045 if (_PyDict_HasSplitTable(mp)) {
1046 for (i = 0; i < numentries; i++) {
1047 if ((value = mp->ma_values[i]) == NULL)
1048 continue;
1049 if (_PyObject_GC_MAY_BE_TRACKED(value)) {
1050 assert(!_PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key));
1051 return;
1052 }
1053 }
1054 }
1055 else {
1056 for (i = 0; i < numentries; i++) {
1057 if ((value = ep0[i].me_value) == NULL)
1058 continue;
1059 if (_PyObject_GC_MAY_BE_TRACKED(value) ||
1060 _PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key))
1061 return;
1062 }
1063 }
1064 _PyObject_GC_UNTRACK(op);
1065 }
1066
1067 /* Internal function to find slot for an item from its hash
1068 when it is known that the key is not present in the dict.
1069
1070 The dict must be combined. */
1071 static void
find_empty_slot(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject *** value_addr,Py_ssize_t * hashpos)1072 find_empty_slot(PyDictObject *mp, PyObject *key, Py_hash_t hash,
1073 PyObject ***value_addr, Py_ssize_t *hashpos)
1074 {
1075 size_t i;
1076 size_t mask = DK_MASK(mp->ma_keys);
1077 Py_ssize_t ix;
1078 PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
1079
1080 assert(!_PyDict_HasSplitTable(mp));
1081 assert(hashpos != NULL);
1082 assert(key != NULL);
1083
1084 if (!PyUnicode_CheckExact(key))
1085 mp->ma_keys->dk_lookup = lookdict;
1086 i = hash & mask;
1087 ix = dk_get_index(mp->ma_keys, i);
1088 for (size_t perturb = hash; ix != DKIX_EMPTY;) {
1089 perturb >>= PERTURB_SHIFT;
1090 i = (i << 2) + i + perturb + 1;
1091 ix = dk_get_index(mp->ma_keys, i & mask);
1092 }
1093 ep = &ep0[mp->ma_keys->dk_nentries];
1094 *hashpos = i & mask;
1095 assert(ep->me_value == NULL);
1096 *value_addr = &ep->me_value;
1097 }
1098
1099 static int
insertion_resize(PyDictObject * mp)1100 insertion_resize(PyDictObject *mp)
1101 {
1102 return dictresize(mp, GROWTH_RATE(mp));
1103 }
1104
1105 /*
1106 Internal routine to insert a new item into the table.
1107 Used both by the internal resize routine and by the public insert routine.
1108 Returns -1 if an error occurred, or 0 on success.
1109 */
1110 static int
insertdict(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject * value)1111 insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
1112 {
1113 PyObject *old_value;
1114 PyObject **value_addr;
1115 PyDictKeyEntry *ep, *ep0;
1116 Py_ssize_t hashpos, ix;
1117
1118 if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
1119 if (insertion_resize(mp) < 0)
1120 return -1;
1121 }
1122
1123 ix = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr, &hashpos);
1124 if (ix == DKIX_ERROR) {
1125 return -1;
1126 }
1127
1128 assert(PyUnicode_CheckExact(key) || mp->ma_keys->dk_lookup == lookdict);
1129 Py_INCREF(value);
1130 MAINTAIN_TRACKING(mp, key, value);
1131
1132 /* When insertion order is different from shared key, we can't share
1133 * the key anymore. Convert this instance to combine table.
1134 */
1135 if (_PyDict_HasSplitTable(mp) &&
1136 ((ix >= 0 && *value_addr == NULL && mp->ma_used != ix) ||
1137 (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
1138 if (insertion_resize(mp) < 0) {
1139 Py_DECREF(value);
1140 return -1;
1141 }
1142 find_empty_slot(mp, key, hash, &value_addr, &hashpos);
1143 ix = DKIX_EMPTY;
1144 }
1145
1146 if (ix == DKIX_EMPTY) {
1147 /* Insert into new slot. */
1148 if (mp->ma_keys->dk_usable <= 0) {
1149 /* Need to resize. */
1150 if (insertion_resize(mp) < 0) {
1151 Py_DECREF(value);
1152 return -1;
1153 }
1154 find_empty_slot(mp, key, hash, &value_addr, &hashpos);
1155 }
1156 ep0 = DK_ENTRIES(mp->ma_keys);
1157 ep = &ep0[mp->ma_keys->dk_nentries];
1158 dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
1159 Py_INCREF(key);
1160 ep->me_key = key;
1161 ep->me_hash = hash;
1162 if (mp->ma_values) {
1163 assert (mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
1164 mp->ma_values[mp->ma_keys->dk_nentries] = value;
1165 }
1166 else {
1167 ep->me_value = value;
1168 }
1169 mp->ma_used++;
1170 mp->ma_version_tag = DICT_NEXT_VERSION();
1171 mp->ma_keys->dk_usable--;
1172 mp->ma_keys->dk_nentries++;
1173 assert(mp->ma_keys->dk_usable >= 0);
1174 assert(_PyDict_CheckConsistency(mp));
1175 return 0;
1176 }
1177
1178 assert(value_addr != NULL);
1179
1180 old_value = *value_addr;
1181 if (old_value != NULL) {
1182 *value_addr = value;
1183 mp->ma_version_tag = DICT_NEXT_VERSION();
1184 assert(_PyDict_CheckConsistency(mp));
1185
1186 Py_DECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
1187 return 0;
1188 }
1189
1190 /* pending state */
1191 assert(_PyDict_HasSplitTable(mp));
1192 assert(ix == mp->ma_used);
1193 *value_addr = value;
1194 mp->ma_used++;
1195 mp->ma_version_tag = DICT_NEXT_VERSION();
1196 assert(_PyDict_CheckConsistency(mp));
1197 return 0;
1198 }
1199
1200 /*
1201 Internal routine used by dictresize() to insert an item which is
1202 known to be absent from the dict. This routine also assumes that
1203 the dict contains no deleted entries. Besides the performance benefit,
1204 using insertdict() in dictresize() is dangerous (SF bug #1456209).
1205 Note that no refcounts are changed by this routine; if needed, the caller
1206 is responsible for incref'ing `key` and `value`.
1207 Neither mp->ma_used nor k->dk_usable are modified by this routine; the caller
1208 must set them correctly
1209 */
1210 static void
insertdict_clean(PyDictObject * mp,PyObject * key,Py_hash_t hash,PyObject * value)1211 insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
1212 PyObject *value)
1213 {
1214 size_t i;
1215 PyDictKeysObject *k = mp->ma_keys;
1216 size_t mask = (size_t)DK_SIZE(k)-1;
1217 PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
1218 PyDictKeyEntry *ep;
1219
1220 assert(k->dk_lookup != NULL);
1221 assert(value != NULL);
1222 assert(key != NULL);
1223 assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict);
1224 i = hash & mask;
1225 for (size_t perturb = hash; dk_get_index(k, i) != DKIX_EMPTY;) {
1226 perturb >>= PERTURB_SHIFT;
1227 i = mask & ((i << 2) + i + perturb + 1);
1228 }
1229 ep = &ep0[k->dk_nentries];
1230 assert(ep->me_value == NULL);
1231 dk_set_index(k, i, k->dk_nentries);
1232 k->dk_nentries++;
1233 ep->me_key = key;
1234 ep->me_hash = hash;
1235 ep->me_value = value;
1236 }
1237
1238 /*
1239 Restructure the table by allocating a new table and reinserting all
1240 items again. When entries have been deleted, the new table may
1241 actually be smaller than the old one.
1242 If a table is split (its keys and hashes are shared, its values are not),
1243 then the values are temporarily copied into the table, it is resized as
1244 a combined table, then the me_value slots in the old table are NULLed out.
1245 After resizing a table is always combined,
1246 but can be resplit by make_keys_shared().
1247 */
1248 static int
dictresize(PyDictObject * mp,Py_ssize_t minsize)1249 dictresize(PyDictObject *mp, Py_ssize_t minsize)
1250 {
1251 Py_ssize_t i, newsize;
1252 PyDictKeysObject *oldkeys;
1253 PyObject **oldvalues;
1254 PyDictKeyEntry *ep0;
1255
1256 /* Find the smallest table size > minused. */
1257 for (newsize = PyDict_MINSIZE;
1258 newsize < minsize && newsize > 0;
1259 newsize <<= 1)
1260 ;
1261 if (newsize <= 0) {
1262 PyErr_NoMemory();
1263 return -1;
1264 }
1265 oldkeys = mp->ma_keys;
1266 oldvalues = mp->ma_values;
1267 /* Allocate a new table. */
1268 mp->ma_keys = new_keys_object(newsize);
1269 if (mp->ma_keys == NULL) {
1270 mp->ma_keys = oldkeys;
1271 return -1;
1272 }
1273 // New table must be large enough.
1274 assert(mp->ma_keys->dk_usable >= mp->ma_used);
1275 if (oldkeys->dk_lookup == lookdict)
1276 mp->ma_keys->dk_lookup = lookdict;
1277 mp->ma_values = NULL;
1278 ep0 = DK_ENTRIES(oldkeys);
1279 /* Main loop below assumes we can transfer refcount to new keys
1280 * and that value is stored in me_value.
1281 * Increment ref-counts and copy values here to compensate
1282 * This (resizing a split table) should be relatively rare */
1283 if (oldvalues != NULL) {
1284 for (i = 0; i < oldkeys->dk_nentries; i++) {
1285 if (oldvalues[i] != NULL) {
1286 Py_INCREF(ep0[i].me_key);
1287 ep0[i].me_value = oldvalues[i];
1288 }
1289 }
1290 }
1291 /* Main loop */
1292 for (i = 0; i < oldkeys->dk_nentries; i++) {
1293 PyDictKeyEntry *ep = &ep0[i];
1294 if (ep->me_value != NULL) {
1295 insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
1296 }
1297 }
1298 mp->ma_keys->dk_usable -= mp->ma_used;
1299 if (oldvalues != NULL) {
1300 /* NULL out me_value slot in oldkeys, in case it was shared */
1301 for (i = 0; i < oldkeys->dk_nentries; i++)
1302 ep0[i].me_value = NULL;
1303 DK_DECREF(oldkeys);
1304 if (oldvalues != empty_values) {
1305 free_values(oldvalues);
1306 }
1307 }
1308 else {
1309 assert(oldkeys->dk_lookup != lookdict_split);
1310 assert(oldkeys->dk_refcnt == 1);
1311 DK_DEBUG_DECREF PyObject_FREE(oldkeys);
1312 }
1313 return 0;
1314 }
1315
1316 /* Returns NULL if unable to split table.
1317 * A NULL return does not necessarily indicate an error */
1318 static PyDictKeysObject *
make_keys_shared(PyObject * op)1319 make_keys_shared(PyObject *op)
1320 {
1321 Py_ssize_t i;
1322 Py_ssize_t size;
1323 PyDictObject *mp = (PyDictObject *)op;
1324
1325 if (!PyDict_CheckExact(op))
1326 return NULL;
1327 if (!_PyDict_HasSplitTable(mp)) {
1328 PyDictKeyEntry *ep0;
1329 PyObject **values;
1330 assert(mp->ma_keys->dk_refcnt == 1);
1331 if (mp->ma_keys->dk_lookup == lookdict) {
1332 return NULL;
1333 }
1334 else if (mp->ma_keys->dk_lookup == lookdict_unicode) {
1335 /* Remove dummy keys */
1336 if (dictresize(mp, DK_SIZE(mp->ma_keys)))
1337 return NULL;
1338 }
1339 assert(mp->ma_keys->dk_lookup == lookdict_unicode_nodummy);
1340 /* Copy values into a new array */
1341 ep0 = DK_ENTRIES(mp->ma_keys);
1342 size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
1343 values = new_values(size);
1344 if (values == NULL) {
1345 PyErr_SetString(PyExc_MemoryError,
1346 "Not enough memory to allocate new values array");
1347 return NULL;
1348 }
1349 for (i = 0; i < size; i++) {
1350 values[i] = ep0[i].me_value;
1351 ep0[i].me_value = NULL;
1352 }
1353 mp->ma_keys->dk_lookup = lookdict_split;
1354 mp->ma_values = values;
1355 }
1356 DK_INCREF(mp->ma_keys);
1357 return mp->ma_keys;
1358 }
1359
1360 PyObject *
_PyDict_NewPresized(Py_ssize_t minused)1361 _PyDict_NewPresized(Py_ssize_t minused)
1362 {
1363 const Py_ssize_t max_presize = 128 * 1024;
1364 Py_ssize_t newsize;
1365 PyDictKeysObject *new_keys;
1366
1367 /* There are no strict guarantee that returned dict can contain minused
1368 * items without resize. So we create medium size dict instead of very
1369 * large dict or MemoryError.
1370 */
1371 if (minused > USABLE_FRACTION(max_presize)) {
1372 newsize = max_presize;
1373 }
1374 else {
1375 Py_ssize_t minsize = ESTIMATE_SIZE(minused);
1376 newsize = PyDict_MINSIZE;
1377 while (newsize < minsize) {
1378 newsize <<= 1;
1379 }
1380 }
1381 assert(IS_POWER_OF_2(newsize));
1382
1383 new_keys = new_keys_object(newsize);
1384 if (new_keys == NULL)
1385 return NULL;
1386 return new_dict(new_keys, NULL);
1387 }
1388
1389 /* Note that, for historical reasons, PyDict_GetItem() suppresses all errors
1390 * that may occur (originally dicts supported only string keys, and exceptions
1391 * weren't possible). So, while the original intent was that a NULL return
1392 * meant the key wasn't present, in reality it can mean that, or that an error
1393 * (suppressed) occurred while computing the key's hash, or that some error
1394 * (suppressed) occurred when comparing keys in the dict's internal probe
1395 * sequence. A nasty example of the latter is when a Python-coded comparison
1396 * function hits a stack-depth error, which can cause this to return NULL
1397 * even if the key is present.
1398 */
1399 PyObject *
PyDict_GetItem(PyObject * op,PyObject * key)1400 PyDict_GetItem(PyObject *op, PyObject *key)
1401 {
1402 Py_hash_t hash;
1403 Py_ssize_t ix;
1404 PyDictObject *mp = (PyDictObject *)op;
1405 PyThreadState *tstate;
1406 PyObject **value_addr;
1407
1408 if (!PyDict_Check(op))
1409 return NULL;
1410 if (!PyUnicode_CheckExact(key) ||
1411 (hash = ((PyASCIIObject *) key)->hash) == -1)
1412 {
1413 hash = PyObject_Hash(key);
1414 if (hash == -1) {
1415 PyErr_Clear();
1416 return NULL;
1417 }
1418 }
1419
1420 /* We can arrive here with a NULL tstate during initialization: try
1421 running "python -Wi" for an example related to string interning.
1422 Let's just hope that no exception occurs then... This must be
1423 _PyThreadState_Current and not PyThreadState_GET() because in debug
1424 mode, the latter complains if tstate is NULL. */
1425 tstate = _PyThreadState_UncheckedGet();
1426 if (tstate != NULL && tstate->curexc_type != NULL) {
1427 /* preserve the existing exception */
1428 PyObject *err_type, *err_value, *err_tb;
1429 PyErr_Fetch(&err_type, &err_value, &err_tb);
1430 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
1431 /* ignore errors */
1432 PyErr_Restore(err_type, err_value, err_tb);
1433 if (ix < 0)
1434 return NULL;
1435 }
1436 else {
1437 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
1438 if (ix < 0) {
1439 PyErr_Clear();
1440 return NULL;
1441 }
1442 }
1443 return *value_addr;
1444 }
1445
1446 /* Same as PyDict_GetItemWithError() but with hash supplied by caller.
1447 This returns NULL *with* an exception set if an exception occurred.
1448 It returns NULL *without* an exception set if the key wasn't present.
1449 */
1450 PyObject *
_PyDict_GetItem_KnownHash(PyObject * op,PyObject * key,Py_hash_t hash)1451 _PyDict_GetItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
1452 {
1453 Py_ssize_t ix;
1454 PyDictObject *mp = (PyDictObject *)op;
1455 PyObject **value_addr;
1456
1457 if (!PyDict_Check(op)) {
1458 PyErr_BadInternalCall();
1459 return NULL;
1460 }
1461
1462 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
1463 if (ix < 0) {
1464 return NULL;
1465 }
1466 return *value_addr;
1467 }
1468
1469 /* Variant of PyDict_GetItem() that doesn't suppress exceptions.
1470 This returns NULL *with* an exception set if an exception occurred.
1471 It returns NULL *without* an exception set if the key wasn't present.
1472 */
1473 PyObject *
PyDict_GetItemWithError(PyObject * op,PyObject * key)1474 PyDict_GetItemWithError(PyObject *op, PyObject *key)
1475 {
1476 Py_ssize_t ix;
1477 Py_hash_t hash;
1478 PyDictObject*mp = (PyDictObject *)op;
1479 PyObject **value_addr;
1480
1481 if (!PyDict_Check(op)) {
1482 PyErr_BadInternalCall();
1483 return NULL;
1484 }
1485 if (!PyUnicode_CheckExact(key) ||
1486 (hash = ((PyASCIIObject *) key)->hash) == -1)
1487 {
1488 hash = PyObject_Hash(key);
1489 if (hash == -1) {
1490 return NULL;
1491 }
1492 }
1493
1494 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
1495 if (ix < 0)
1496 return NULL;
1497 return *value_addr;
1498 }
1499
1500 PyObject *
_PyDict_GetItemIdWithError(PyObject * dp,struct _Py_Identifier * key)1501 _PyDict_GetItemIdWithError(PyObject *dp, struct _Py_Identifier *key)
1502 {
1503 PyObject *kv;
1504 kv = _PyUnicode_FromId(key); /* borrowed */
1505 if (kv == NULL)
1506 return NULL;
1507 return PyDict_GetItemWithError(dp, kv);
1508 }
1509
1510 /* Fast version of global value lookup (LOAD_GLOBAL).
1511 * Lookup in globals, then builtins.
1512 *
1513 * Raise an exception and return NULL if an error occurred (ex: computing the
1514 * key hash failed, key comparison failed, ...). Return NULL if the key doesn't
1515 * exist. Return the value if the key exists.
1516 */
1517 PyObject *
_PyDict_LoadGlobal(PyDictObject * globals,PyDictObject * builtins,PyObject * key)1518 _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
1519 {
1520 Py_ssize_t ix;
1521 Py_hash_t hash;
1522 PyObject **value_addr;
1523
1524 if (!PyUnicode_CheckExact(key) ||
1525 (hash = ((PyASCIIObject *) key)->hash) == -1)
1526 {
1527 hash = PyObject_Hash(key);
1528 if (hash == -1)
1529 return NULL;
1530 }
1531
1532 /* namespace 1: globals */
1533 ix = globals->ma_keys->dk_lookup(globals, key, hash, &value_addr, NULL);
1534 if (ix == DKIX_ERROR)
1535 return NULL;
1536 if (ix != DKIX_EMPTY && *value_addr != NULL)
1537 return *value_addr;
1538
1539 /* namespace 2: builtins */
1540 ix = builtins->ma_keys->dk_lookup(builtins, key, hash, &value_addr, NULL);
1541 if (ix < 0)
1542 return NULL;
1543 return *value_addr;
1544 }
1545
1546 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
1547 * dictionary if it's merely replacing the value for an existing key.
1548 * This means that it's safe to loop over a dictionary with PyDict_Next()
1549 * and occasionally replace a value -- but you can't insert new keys or
1550 * remove them.
1551 */
1552 int
PyDict_SetItem(PyObject * op,PyObject * key,PyObject * value)1553 PyDict_SetItem(PyObject *op, PyObject *key, PyObject *value)
1554 {
1555 PyDictObject *mp;
1556 Py_hash_t hash;
1557 if (!PyDict_Check(op)) {
1558 PyErr_BadInternalCall();
1559 return -1;
1560 }
1561 assert(key);
1562 assert(value);
1563 mp = (PyDictObject *)op;
1564 if (!PyUnicode_CheckExact(key) ||
1565 (hash = ((PyASCIIObject *) key)->hash) == -1)
1566 {
1567 hash = PyObject_Hash(key);
1568 if (hash == -1)
1569 return -1;
1570 }
1571
1572 /* insertdict() handles any resizing that might be necessary */
1573 return insertdict(mp, key, hash, value);
1574 }
1575
1576 int
_PyDict_SetItem_KnownHash(PyObject * op,PyObject * key,PyObject * value,Py_hash_t hash)1577 _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value,
1578 Py_hash_t hash)
1579 {
1580 PyDictObject *mp;
1581
1582 if (!PyDict_Check(op)) {
1583 PyErr_BadInternalCall();
1584 return -1;
1585 }
1586 assert(key);
1587 assert(value);
1588 assert(hash != -1);
1589 mp = (PyDictObject *)op;
1590
1591 /* insertdict() handles any resizing that might be necessary */
1592 return insertdict(mp, key, hash, value);
1593 }
1594
1595 static int
delitem_common(PyDictObject * mp,Py_ssize_t hashpos,Py_ssize_t ix,PyObject ** value_addr)1596 delitem_common(PyDictObject *mp, Py_ssize_t hashpos, Py_ssize_t ix,
1597 PyObject **value_addr)
1598 {
1599 PyObject *old_key, *old_value;
1600 PyDictKeyEntry *ep;
1601
1602 old_value = *value_addr;
1603 assert(old_value != NULL);
1604 *value_addr = NULL;
1605 mp->ma_used--;
1606 mp->ma_version_tag = DICT_NEXT_VERSION();
1607 ep = &DK_ENTRIES(mp->ma_keys)[ix];
1608 dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
1609 ENSURE_ALLOWS_DELETIONS(mp);
1610 old_key = ep->me_key;
1611 ep->me_key = NULL;
1612 Py_DECREF(old_key);
1613 Py_DECREF(old_value);
1614
1615 assert(_PyDict_CheckConsistency(mp));
1616 return 0;
1617 }
1618
1619 int
PyDict_DelItem(PyObject * op,PyObject * key)1620 PyDict_DelItem(PyObject *op, PyObject *key)
1621 {
1622 Py_hash_t hash;
1623 assert(key);
1624 if (!PyUnicode_CheckExact(key) ||
1625 (hash = ((PyASCIIObject *) key)->hash) == -1) {
1626 hash = PyObject_Hash(key);
1627 if (hash == -1)
1628 return -1;
1629 }
1630
1631 return _PyDict_DelItem_KnownHash(op, key, hash);
1632 }
1633
1634 int
_PyDict_DelItem_KnownHash(PyObject * op,PyObject * key,Py_hash_t hash)1635 _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
1636 {
1637 Py_ssize_t hashpos, ix;
1638 PyDictObject *mp;
1639 PyObject **value_addr;
1640
1641 if (!PyDict_Check(op)) {
1642 PyErr_BadInternalCall();
1643 return -1;
1644 }
1645 assert(key);
1646 assert(hash != -1);
1647 mp = (PyDictObject *)op;
1648 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
1649 if (ix == DKIX_ERROR)
1650 return -1;
1651 if (ix == DKIX_EMPTY || *value_addr == NULL) {
1652 _PyErr_SetKeyError(key);
1653 return -1;
1654 }
1655 assert(dk_get_index(mp->ma_keys, hashpos) == ix);
1656
1657 // Split table doesn't allow deletion. Combine it.
1658 if (_PyDict_HasSplitTable(mp)) {
1659 if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
1660 return -1;
1661 }
1662 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
1663 assert(ix >= 0);
1664 }
1665 return delitem_common(mp, hashpos, ix, value_addr);
1666 }
1667
1668 /* This function promises that the predicate -> deletion sequence is atomic
1669 * (i.e. protected by the GIL), assuming the predicate itself doesn't
1670 * release the GIL.
1671 */
1672 int
_PyDict_DelItemIf(PyObject * op,PyObject * key,int (* predicate)(PyObject * value))1673 _PyDict_DelItemIf(PyObject *op, PyObject *key,
1674 int (*predicate)(PyObject *value))
1675 {
1676 Py_ssize_t hashpos, ix;
1677 PyDictObject *mp;
1678 Py_hash_t hash;
1679 PyObject **value_addr;
1680 int res;
1681
1682 if (!PyDict_Check(op)) {
1683 PyErr_BadInternalCall();
1684 return -1;
1685 }
1686 assert(key);
1687 hash = PyObject_Hash(key);
1688 if (hash == -1)
1689 return -1;
1690 mp = (PyDictObject *)op;
1691 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
1692 if (ix == DKIX_ERROR)
1693 return -1;
1694 if (ix == DKIX_EMPTY || *value_addr == NULL) {
1695 _PyErr_SetKeyError(key);
1696 return -1;
1697 }
1698 assert(dk_get_index(mp->ma_keys, hashpos) == ix);
1699
1700 // Split table doesn't allow deletion. Combine it.
1701 if (_PyDict_HasSplitTable(mp)) {
1702 if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
1703 return -1;
1704 }
1705 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
1706 assert(ix >= 0);
1707 }
1708
1709 res = predicate(*value_addr);
1710 if (res == -1)
1711 return -1;
1712 if (res > 0)
1713 return delitem_common(mp, hashpos, ix, value_addr);
1714 else
1715 return 0;
1716 }
1717
1718
1719 void
PyDict_Clear(PyObject * op)1720 PyDict_Clear(PyObject *op)
1721 {
1722 PyDictObject *mp;
1723 PyDictKeysObject *oldkeys;
1724 PyObject **oldvalues;
1725 Py_ssize_t i, n;
1726
1727 if (!PyDict_Check(op))
1728 return;
1729 mp = ((PyDictObject *)op);
1730 oldkeys = mp->ma_keys;
1731 oldvalues = mp->ma_values;
1732 if (oldvalues == empty_values)
1733 return;
1734 /* Empty the dict... */
1735 DK_INCREF(Py_EMPTY_KEYS);
1736 mp->ma_keys = Py_EMPTY_KEYS;
1737 mp->ma_values = empty_values;
1738 mp->ma_used = 0;
1739 mp->ma_version_tag = DICT_NEXT_VERSION();
1740 /* ...then clear the keys and values */
1741 if (oldvalues != NULL) {
1742 n = oldkeys->dk_nentries;
1743 for (i = 0; i < n; i++)
1744 Py_CLEAR(oldvalues[i]);
1745 free_values(oldvalues);
1746 DK_DECREF(oldkeys);
1747 }
1748 else {
1749 assert(oldkeys->dk_refcnt == 1);
1750 DK_DECREF(oldkeys);
1751 }
1752 assert(_PyDict_CheckConsistency(mp));
1753 }
1754
1755 /* Internal version of PyDict_Next that returns a hash value in addition
1756 * to the key and value.
1757 * Return 1 on success, return 0 when the reached the end of the dictionary
1758 * (or if op is not a dictionary)
1759 */
1760 int
_PyDict_Next(PyObject * op,Py_ssize_t * ppos,PyObject ** pkey,PyObject ** pvalue,Py_hash_t * phash)1761 _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
1762 PyObject **pvalue, Py_hash_t *phash)
1763 {
1764 Py_ssize_t i, n;
1765 PyDictObject *mp;
1766 PyDictKeyEntry *entry_ptr;
1767 PyObject *value;
1768
1769 if (!PyDict_Check(op))
1770 return 0;
1771 mp = (PyDictObject *)op;
1772 i = *ppos;
1773 n = mp->ma_keys->dk_nentries;
1774 if ((size_t)i >= (size_t)n)
1775 return 0;
1776 if (mp->ma_values) {
1777 PyObject **value_ptr = &mp->ma_values[i];
1778 while (i < n && *value_ptr == NULL) {
1779 value_ptr++;
1780 i++;
1781 }
1782 if (i >= n)
1783 return 0;
1784 entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
1785 value = *value_ptr;
1786 }
1787 else {
1788 entry_ptr = &DK_ENTRIES(mp->ma_keys)[i];
1789 while (i < n && entry_ptr->me_value == NULL) {
1790 entry_ptr++;
1791 i++;
1792 }
1793 if (i >= n)
1794 return 0;
1795 value = entry_ptr->me_value;
1796 }
1797 *ppos = i+1;
1798 if (pkey)
1799 *pkey = entry_ptr->me_key;
1800 if (phash)
1801 *phash = entry_ptr->me_hash;
1802 if (pvalue)
1803 *pvalue = value;
1804 return 1;
1805 }
1806
1807 /*
1808 * Iterate over a dict. Use like so:
1809 *
1810 * Py_ssize_t i;
1811 * PyObject *key, *value;
1812 * i = 0; # important! i should not otherwise be changed by you
1813 * while (PyDict_Next(yourdict, &i, &key, &value)) {
1814 * Refer to borrowed references in key and value.
1815 * }
1816 *
1817 * Return 1 on success, return 0 when the reached the end of the dictionary
1818 * (or if op is not a dictionary)
1819 *
1820 * CAUTION: In general, it isn't safe to use PyDict_Next in a loop that
1821 * mutates the dict. One exception: it is safe if the loop merely changes
1822 * the values associated with the keys (but doesn't insert new keys or
1823 * delete keys), via PyDict_SetItem().
1824 */
1825 int
PyDict_Next(PyObject * op,Py_ssize_t * ppos,PyObject ** pkey,PyObject ** pvalue)1826 PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
1827 {
1828 return _PyDict_Next(op, ppos, pkey, pvalue, NULL);
1829 }
1830
1831 /* Internal version of dict.pop(). */
1832 PyObject *
_PyDict_Pop_KnownHash(PyObject * dict,PyObject * key,Py_hash_t hash,PyObject * deflt)1833 _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt)
1834 {
1835 Py_ssize_t ix, hashpos;
1836 PyObject *old_value, *old_key;
1837 PyDictKeyEntry *ep;
1838 PyObject **value_addr;
1839 PyDictObject *mp;
1840
1841 assert(PyDict_Check(dict));
1842 mp = (PyDictObject *)dict;
1843
1844 if (mp->ma_used == 0) {
1845 if (deflt) {
1846 Py_INCREF(deflt);
1847 return deflt;
1848 }
1849 _PyErr_SetKeyError(key);
1850 return NULL;
1851 }
1852 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
1853 if (ix == DKIX_ERROR)
1854 return NULL;
1855 if (ix == DKIX_EMPTY || *value_addr == NULL) {
1856 if (deflt) {
1857 Py_INCREF(deflt);
1858 return deflt;
1859 }
1860 _PyErr_SetKeyError(key);
1861 return NULL;
1862 }
1863
1864 // Split table doesn't allow deletion. Combine it.
1865 if (_PyDict_HasSplitTable(mp)) {
1866 if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
1867 return NULL;
1868 }
1869 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
1870 assert(ix >= 0);
1871 }
1872
1873 old_value = *value_addr;
1874 assert(old_value != NULL);
1875 *value_addr = NULL;
1876 mp->ma_used--;
1877 mp->ma_version_tag = DICT_NEXT_VERSION();
1878 dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
1879 ep = &DK_ENTRIES(mp->ma_keys)[ix];
1880 ENSURE_ALLOWS_DELETIONS(mp);
1881 old_key = ep->me_key;
1882 ep->me_key = NULL;
1883 Py_DECREF(old_key);
1884
1885 assert(_PyDict_CheckConsistency(mp));
1886 return old_value;
1887 }
1888
1889 PyObject *
_PyDict_Pop(PyObject * dict,PyObject * key,PyObject * deflt)1890 _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt)
1891 {
1892 Py_hash_t hash;
1893
1894 if (((PyDictObject *)dict)->ma_used == 0) {
1895 if (deflt) {
1896 Py_INCREF(deflt);
1897 return deflt;
1898 }
1899 _PyErr_SetKeyError(key);
1900 return NULL;
1901 }
1902 if (!PyUnicode_CheckExact(key) ||
1903 (hash = ((PyASCIIObject *) key)->hash) == -1) {
1904 hash = PyObject_Hash(key);
1905 if (hash == -1)
1906 return NULL;
1907 }
1908 return _PyDict_Pop_KnownHash(dict, key, hash, deflt);
1909 }
1910
1911 /* Internal version of dict.from_keys(). It is subclass-friendly. */
1912 PyObject *
_PyDict_FromKeys(PyObject * cls,PyObject * iterable,PyObject * value)1913 _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
1914 {
1915 PyObject *it; /* iter(iterable) */
1916 PyObject *key;
1917 PyObject *d;
1918 int status;
1919
1920 d = PyObject_CallObject(cls, NULL);
1921 if (d == NULL)
1922 return NULL;
1923
1924 if (PyDict_CheckExact(d) && ((PyDictObject *)d)->ma_used == 0) {
1925 if (PyDict_CheckExact(iterable)) {
1926 PyDictObject *mp = (PyDictObject *)d;
1927 PyObject *oldvalue;
1928 Py_ssize_t pos = 0;
1929 PyObject *key;
1930 Py_hash_t hash;
1931
1932 if (dictresize(mp, ESTIMATE_SIZE(Py_SIZE(iterable)))) {
1933 Py_DECREF(d);
1934 return NULL;
1935 }
1936
1937 while (_PyDict_Next(iterable, &pos, &key, &oldvalue, &hash)) {
1938 if (insertdict(mp, key, hash, value)) {
1939 Py_DECREF(d);
1940 return NULL;
1941 }
1942 }
1943 return d;
1944 }
1945 if (PyAnySet_CheckExact(iterable)) {
1946 PyDictObject *mp = (PyDictObject *)d;
1947 Py_ssize_t pos = 0;
1948 PyObject *key;
1949 Py_hash_t hash;
1950
1951 if (dictresize(mp, ESTIMATE_SIZE(PySet_GET_SIZE(iterable)))) {
1952 Py_DECREF(d);
1953 return NULL;
1954 }
1955
1956 while (_PySet_NextEntry(iterable, &pos, &key, &hash)) {
1957 if (insertdict(mp, key, hash, value)) {
1958 Py_DECREF(d);
1959 return NULL;
1960 }
1961 }
1962 return d;
1963 }
1964 }
1965
1966 it = PyObject_GetIter(iterable);
1967 if (it == NULL){
1968 Py_DECREF(d);
1969 return NULL;
1970 }
1971
1972 if (PyDict_CheckExact(d)) {
1973 while ((key = PyIter_Next(it)) != NULL) {
1974 status = PyDict_SetItem(d, key, value);
1975 Py_DECREF(key);
1976 if (status < 0)
1977 goto Fail;
1978 }
1979 } else {
1980 while ((key = PyIter_Next(it)) != NULL) {
1981 status = PyObject_SetItem(d, key, value);
1982 Py_DECREF(key);
1983 if (status < 0)
1984 goto Fail;
1985 }
1986 }
1987
1988 if (PyErr_Occurred())
1989 goto Fail;
1990 Py_DECREF(it);
1991 return d;
1992
1993 Fail:
1994 Py_DECREF(it);
1995 Py_DECREF(d);
1996 return NULL;
1997 }
1998
1999 /* Methods */
2000
2001 static void
dict_dealloc(PyDictObject * mp)2002 dict_dealloc(PyDictObject *mp)
2003 {
2004 PyObject **values = mp->ma_values;
2005 PyDictKeysObject *keys = mp->ma_keys;
2006 Py_ssize_t i, n;
2007 PyObject_GC_UnTrack(mp);
2008 Py_TRASHCAN_SAFE_BEGIN(mp)
2009 if (values != NULL) {
2010 if (values != empty_values) {
2011 for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
2012 Py_XDECREF(values[i]);
2013 }
2014 free_values(values);
2015 }
2016 DK_DECREF(keys);
2017 }
2018 else if (keys != NULL) {
2019 assert(keys->dk_refcnt == 1);
2020 DK_DECREF(keys);
2021 }
2022 if (numfree < PyDict_MAXFREELIST && Py_TYPE(mp) == &PyDict_Type)
2023 free_list[numfree++] = mp;
2024 else
2025 Py_TYPE(mp)->tp_free((PyObject *)mp);
2026 Py_TRASHCAN_SAFE_END(mp)
2027 }
2028
2029
2030 static PyObject *
dict_repr(PyDictObject * mp)2031 dict_repr(PyDictObject *mp)
2032 {
2033 Py_ssize_t i;
2034 PyObject *key = NULL, *value = NULL;
2035 _PyUnicodeWriter writer;
2036 int first;
2037
2038 i = Py_ReprEnter((PyObject *)mp);
2039 if (i != 0) {
2040 return i > 0 ? PyUnicode_FromString("{...}") : NULL;
2041 }
2042
2043 if (mp->ma_used == 0) {
2044 Py_ReprLeave((PyObject *)mp);
2045 return PyUnicode_FromString("{}");
2046 }
2047
2048 _PyUnicodeWriter_Init(&writer);
2049 writer.overallocate = 1;
2050 /* "{" + "1: 2" + ", 3: 4" * (len - 1) + "}" */
2051 writer.min_length = 1 + 4 + (2 + 4) * (mp->ma_used - 1) + 1;
2052
2053 if (_PyUnicodeWriter_WriteChar(&writer, '{') < 0)
2054 goto error;
2055
2056 /* Do repr() on each key+value pair, and insert ": " between them.
2057 Note that repr may mutate the dict. */
2058 i = 0;
2059 first = 1;
2060 while (PyDict_Next((PyObject *)mp, &i, &key, &value)) {
2061 PyObject *s;
2062 int res;
2063
2064 /* Prevent repr from deleting key or value during key format. */
2065 Py_INCREF(key);
2066 Py_INCREF(value);
2067
2068 if (!first) {
2069 if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0)
2070 goto error;
2071 }
2072 first = 0;
2073
2074 s = PyObject_Repr(key);
2075 if (s == NULL)
2076 goto error;
2077 res = _PyUnicodeWriter_WriteStr(&writer, s);
2078 Py_DECREF(s);
2079 if (res < 0)
2080 goto error;
2081
2082 if (_PyUnicodeWriter_WriteASCIIString(&writer, ": ", 2) < 0)
2083 goto error;
2084
2085 s = PyObject_Repr(value);
2086 if (s == NULL)
2087 goto error;
2088 res = _PyUnicodeWriter_WriteStr(&writer, s);
2089 Py_DECREF(s);
2090 if (res < 0)
2091 goto error;
2092
2093 Py_CLEAR(key);
2094 Py_CLEAR(value);
2095 }
2096
2097 writer.overallocate = 0;
2098 if (_PyUnicodeWriter_WriteChar(&writer, '}') < 0)
2099 goto error;
2100
2101 Py_ReprLeave((PyObject *)mp);
2102
2103 return _PyUnicodeWriter_Finish(&writer);
2104
2105 error:
2106 Py_ReprLeave((PyObject *)mp);
2107 _PyUnicodeWriter_Dealloc(&writer);
2108 Py_XDECREF(key);
2109 Py_XDECREF(value);
2110 return NULL;
2111 }
2112
2113 static Py_ssize_t
dict_length(PyDictObject * mp)2114 dict_length(PyDictObject *mp)
2115 {
2116 return mp->ma_used;
2117 }
2118
2119 static PyObject *
dict_subscript(PyDictObject * mp,PyObject * key)2120 dict_subscript(PyDictObject *mp, PyObject *key)
2121 {
2122 PyObject *v;
2123 Py_ssize_t ix;
2124 Py_hash_t hash;
2125 PyObject **value_addr;
2126
2127 if (!PyUnicode_CheckExact(key) ||
2128 (hash = ((PyASCIIObject *) key)->hash) == -1) {
2129 hash = PyObject_Hash(key);
2130 if (hash == -1)
2131 return NULL;
2132 }
2133 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
2134 if (ix == DKIX_ERROR)
2135 return NULL;
2136 if (ix == DKIX_EMPTY || *value_addr == NULL) {
2137 if (!PyDict_CheckExact(mp)) {
2138 /* Look up __missing__ method if we're a subclass. */
2139 PyObject *missing, *res;
2140 _Py_IDENTIFIER(__missing__);
2141 missing = _PyObject_LookupSpecial((PyObject *)mp, &PyId___missing__);
2142 if (missing != NULL) {
2143 res = PyObject_CallFunctionObjArgs(missing,
2144 key, NULL);
2145 Py_DECREF(missing);
2146 return res;
2147 }
2148 else if (PyErr_Occurred())
2149 return NULL;
2150 }
2151 _PyErr_SetKeyError(key);
2152 return NULL;
2153 }
2154 v = *value_addr;
2155 Py_INCREF(v);
2156 return v;
2157 }
2158
2159 static int
dict_ass_sub(PyDictObject * mp,PyObject * v,PyObject * w)2160 dict_ass_sub(PyDictObject *mp, PyObject *v, PyObject *w)
2161 {
2162 if (w == NULL)
2163 return PyDict_DelItem((PyObject *)mp, v);
2164 else
2165 return PyDict_SetItem((PyObject *)mp, v, w);
2166 }
2167
2168 static PyMappingMethods dict_as_mapping = {
2169 (lenfunc)dict_length, /*mp_length*/
2170 (binaryfunc)dict_subscript, /*mp_subscript*/
2171 (objobjargproc)dict_ass_sub, /*mp_ass_subscript*/
2172 };
2173
2174 static PyObject *
dict_keys(PyDictObject * mp)2175 dict_keys(PyDictObject *mp)
2176 {
2177 PyObject *v;
2178 Py_ssize_t i, j;
2179 PyDictKeyEntry *ep;
2180 Py_ssize_t size, n, offset;
2181 PyObject **value_ptr;
2182
2183 again:
2184 n = mp->ma_used;
2185 v = PyList_New(n);
2186 if (v == NULL)
2187 return NULL;
2188 if (n != mp->ma_used) {
2189 /* Durnit. The allocations caused the dict to resize.
2190 * Just start over, this shouldn't normally happen.
2191 */
2192 Py_DECREF(v);
2193 goto again;
2194 }
2195 ep = DK_ENTRIES(mp->ma_keys);
2196 size = mp->ma_keys->dk_nentries;
2197 if (mp->ma_values) {
2198 value_ptr = mp->ma_values;
2199 offset = sizeof(PyObject *);
2200 }
2201 else {
2202 value_ptr = &ep[0].me_value;
2203 offset = sizeof(PyDictKeyEntry);
2204 }
2205 for (i = 0, j = 0; i < size; i++) {
2206 if (*value_ptr != NULL) {
2207 PyObject *key = ep[i].me_key;
2208 Py_INCREF(key);
2209 PyList_SET_ITEM(v, j, key);
2210 j++;
2211 }
2212 value_ptr = (PyObject **)(((char *)value_ptr) + offset);
2213 }
2214 assert(j == n);
2215 return v;
2216 }
2217
2218 static PyObject *
dict_values(PyDictObject * mp)2219 dict_values(PyDictObject *mp)
2220 {
2221 PyObject *v;
2222 Py_ssize_t i, j;
2223 PyDictKeyEntry *ep;
2224 Py_ssize_t size, n, offset;
2225 PyObject **value_ptr;
2226
2227 again:
2228 n = mp->ma_used;
2229 v = PyList_New(n);
2230 if (v == NULL)
2231 return NULL;
2232 if (n != mp->ma_used) {
2233 /* Durnit. The allocations caused the dict to resize.
2234 * Just start over, this shouldn't normally happen.
2235 */
2236 Py_DECREF(v);
2237 goto again;
2238 }
2239 ep = DK_ENTRIES(mp->ma_keys);
2240 size = mp->ma_keys->dk_nentries;
2241 if (mp->ma_values) {
2242 value_ptr = mp->ma_values;
2243 offset = sizeof(PyObject *);
2244 }
2245 else {
2246 value_ptr = &ep[0].me_value;
2247 offset = sizeof(PyDictKeyEntry);
2248 }
2249 for (i = 0, j = 0; i < size; i++) {
2250 PyObject *value = *value_ptr;
2251 value_ptr = (PyObject **)(((char *)value_ptr) + offset);
2252 if (value != NULL) {
2253 Py_INCREF(value);
2254 PyList_SET_ITEM(v, j, value);
2255 j++;
2256 }
2257 }
2258 assert(j == n);
2259 return v;
2260 }
2261
2262 static PyObject *
dict_items(PyDictObject * mp)2263 dict_items(PyDictObject *mp)
2264 {
2265 PyObject *v;
2266 Py_ssize_t i, j, n;
2267 Py_ssize_t size, offset;
2268 PyObject *item, *key;
2269 PyDictKeyEntry *ep;
2270 PyObject **value_ptr;
2271
2272 /* Preallocate the list of tuples, to avoid allocations during
2273 * the loop over the items, which could trigger GC, which
2274 * could resize the dict. :-(
2275 */
2276 again:
2277 n = mp->ma_used;
2278 v = PyList_New(n);
2279 if (v == NULL)
2280 return NULL;
2281 for (i = 0; i < n; i++) {
2282 item = PyTuple_New(2);
2283 if (item == NULL) {
2284 Py_DECREF(v);
2285 return NULL;
2286 }
2287 PyList_SET_ITEM(v, i, item);
2288 }
2289 if (n != mp->ma_used) {
2290 /* Durnit. The allocations caused the dict to resize.
2291 * Just start over, this shouldn't normally happen.
2292 */
2293 Py_DECREF(v);
2294 goto again;
2295 }
2296 /* Nothing we do below makes any function calls. */
2297 ep = DK_ENTRIES(mp->ma_keys);
2298 size = mp->ma_keys->dk_nentries;
2299 if (mp->ma_values) {
2300 value_ptr = mp->ma_values;
2301 offset = sizeof(PyObject *);
2302 }
2303 else {
2304 value_ptr = &ep[0].me_value;
2305 offset = sizeof(PyDictKeyEntry);
2306 }
2307 for (i = 0, j = 0; i < size; i++) {
2308 PyObject *value = *value_ptr;
2309 value_ptr = (PyObject **)(((char *)value_ptr) + offset);
2310 if (value != NULL) {
2311 key = ep[i].me_key;
2312 item = PyList_GET_ITEM(v, j);
2313 Py_INCREF(key);
2314 PyTuple_SET_ITEM(item, 0, key);
2315 Py_INCREF(value);
2316 PyTuple_SET_ITEM(item, 1, value);
2317 j++;
2318 }
2319 }
2320 assert(j == n);
2321 return v;
2322 }
2323
2324 /*[clinic input]
2325 @classmethod
2326 dict.fromkeys
2327 iterable: object
2328 value: object=None
2329 /
2330
2331 Returns a new dict with keys from iterable and values equal to value.
2332 [clinic start generated code]*/
2333
2334 static PyObject *
dict_fromkeys_impl(PyTypeObject * type,PyObject * iterable,PyObject * value)2335 dict_fromkeys_impl(PyTypeObject *type, PyObject *iterable, PyObject *value)
2336 /*[clinic end generated code: output=8fb98e4b10384999 input=b85a667f9bf4669d]*/
2337 {
2338 return _PyDict_FromKeys((PyObject *)type, iterable, value);
2339 }
2340
2341 static int
dict_update_common(PyObject * self,PyObject * args,PyObject * kwds,const char * methname)2342 dict_update_common(PyObject *self, PyObject *args, PyObject *kwds,
2343 const char *methname)
2344 {
2345 PyObject *arg = NULL;
2346 int result = 0;
2347
2348 if (!PyArg_UnpackTuple(args, methname, 0, 1, &arg))
2349 result = -1;
2350
2351 else if (arg != NULL) {
2352 _Py_IDENTIFIER(keys);
2353 if (_PyObject_HasAttrId(arg, &PyId_keys))
2354 result = PyDict_Merge(self, arg, 1);
2355 else
2356 result = PyDict_MergeFromSeq2(self, arg, 1);
2357 }
2358 if (result == 0 && kwds != NULL) {
2359 if (PyArg_ValidateKeywordArguments(kwds))
2360 result = PyDict_Merge(self, kwds, 1);
2361 else
2362 result = -1;
2363 }
2364 return result;
2365 }
2366
2367 static PyObject *
dict_update(PyObject * self,PyObject * args,PyObject * kwds)2368 dict_update(PyObject *self, PyObject *args, PyObject *kwds)
2369 {
2370 if (dict_update_common(self, args, kwds, "update") != -1)
2371 Py_RETURN_NONE;
2372 return NULL;
2373 }
2374
2375 /* Update unconditionally replaces existing items.
2376 Merge has a 3rd argument 'override'; if set, it acts like Update,
2377 otherwise it leaves existing items unchanged.
2378
2379 PyDict_{Update,Merge} update/merge from a mapping object.
2380
2381 PyDict_MergeFromSeq2 updates/merges from any iterable object
2382 producing iterable objects of length 2.
2383 */
2384
2385 int
PyDict_MergeFromSeq2(PyObject * d,PyObject * seq2,int override)2386 PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
2387 {
2388 PyObject *it; /* iter(seq2) */
2389 Py_ssize_t i; /* index into seq2 of current element */
2390 PyObject *item; /* seq2[i] */
2391 PyObject *fast; /* item as a 2-tuple or 2-list */
2392
2393 assert(d != NULL);
2394 assert(PyDict_Check(d));
2395 assert(seq2 != NULL);
2396
2397 it = PyObject_GetIter(seq2);
2398 if (it == NULL)
2399 return -1;
2400
2401 for (i = 0; ; ++i) {
2402 PyObject *key, *value;
2403 Py_ssize_t n;
2404
2405 fast = NULL;
2406 item = PyIter_Next(it);
2407 if (item == NULL) {
2408 if (PyErr_Occurred())
2409 goto Fail;
2410 break;
2411 }
2412
2413 /* Convert item to sequence, and verify length 2. */
2414 fast = PySequence_Fast(item, "");
2415 if (fast == NULL) {
2416 if (PyErr_ExceptionMatches(PyExc_TypeError))
2417 PyErr_Format(PyExc_TypeError,
2418 "cannot convert dictionary update "
2419 "sequence element #%zd to a sequence",
2420 i);
2421 goto Fail;
2422 }
2423 n = PySequence_Fast_GET_SIZE(fast);
2424 if (n != 2) {
2425 PyErr_Format(PyExc_ValueError,
2426 "dictionary update sequence element #%zd "
2427 "has length %zd; 2 is required",
2428 i, n);
2429 goto Fail;
2430 }
2431
2432 /* Update/merge with this (key, value) pair. */
2433 key = PySequence_Fast_GET_ITEM(fast, 0);
2434 value = PySequence_Fast_GET_ITEM(fast, 1);
2435 if (override || PyDict_GetItem(d, key) == NULL) {
2436 int status = PyDict_SetItem(d, key, value);
2437 if (status < 0)
2438 goto Fail;
2439 }
2440 Py_DECREF(fast);
2441 Py_DECREF(item);
2442 }
2443
2444 i = 0;
2445 assert(_PyDict_CheckConsistency((PyDictObject *)d));
2446 goto Return;
2447 Fail:
2448 Py_XDECREF(item);
2449 Py_XDECREF(fast);
2450 i = -1;
2451 Return:
2452 Py_DECREF(it);
2453 return Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
2454 }
2455
2456 static int
dict_merge(PyObject * a,PyObject * b,int override)2457 dict_merge(PyObject *a, PyObject *b, int override)
2458 {
2459 PyDictObject *mp, *other;
2460 Py_ssize_t i, n;
2461 PyDictKeyEntry *entry, *ep0;
2462
2463 assert(0 <= override && override <= 2);
2464
2465 /* We accept for the argument either a concrete dictionary object,
2466 * or an abstract "mapping" object. For the former, we can do
2467 * things quite efficiently. For the latter, we only require that
2468 * PyMapping_Keys() and PyObject_GetItem() be supported.
2469 */
2470 if (a == NULL || !PyDict_Check(a) || b == NULL) {
2471 PyErr_BadInternalCall();
2472 return -1;
2473 }
2474 mp = (PyDictObject*)a;
2475 if (PyDict_Check(b)) {
2476 other = (PyDictObject*)b;
2477 if (other == mp || other->ma_used == 0)
2478 /* a.update(a) or a.update({}); nothing to do */
2479 return 0;
2480 if (mp->ma_used == 0)
2481 /* Since the target dict is empty, PyDict_GetItem()
2482 * always returns NULL. Setting override to 1
2483 * skips the unnecessary test.
2484 */
2485 override = 1;
2486 /* Do one big resize at the start, rather than
2487 * incrementally resizing as we insert new items. Expect
2488 * that there will be no (or few) overlapping keys.
2489 */
2490 if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) {
2491 if (dictresize(mp, ESTIMATE_SIZE(mp->ma_used + other->ma_used))) {
2492 return -1;
2493 }
2494 }
2495 ep0 = DK_ENTRIES(other->ma_keys);
2496 for (i = 0, n = other->ma_keys->dk_nentries; i < n; i++) {
2497 PyObject *key, *value;
2498 Py_hash_t hash;
2499 entry = &ep0[i];
2500 key = entry->me_key;
2501 hash = entry->me_hash;
2502 if (other->ma_values)
2503 value = other->ma_values[i];
2504 else
2505 value = entry->me_value;
2506
2507 if (value != NULL) {
2508 int err = 0;
2509 Py_INCREF(key);
2510 Py_INCREF(value);
2511 if (override == 1)
2512 err = insertdict(mp, key, hash, value);
2513 else if (_PyDict_GetItem_KnownHash(a, key, hash) == NULL) {
2514 if (PyErr_Occurred()) {
2515 Py_DECREF(value);
2516 Py_DECREF(key);
2517 return -1;
2518 }
2519 err = insertdict(mp, key, hash, value);
2520 }
2521 else if (override != 0) {
2522 _PyErr_SetKeyError(key);
2523 Py_DECREF(value);
2524 Py_DECREF(key);
2525 return -1;
2526 }
2527 Py_DECREF(value);
2528 Py_DECREF(key);
2529 if (err != 0)
2530 return -1;
2531
2532 if (n != other->ma_keys->dk_nentries) {
2533 PyErr_SetString(PyExc_RuntimeError,
2534 "dict mutated during update");
2535 return -1;
2536 }
2537 }
2538 }
2539 }
2540 else {
2541 /* Do it the generic, slower way */
2542 PyObject *keys = PyMapping_Keys(b);
2543 PyObject *iter;
2544 PyObject *key, *value;
2545 int status;
2546
2547 if (keys == NULL)
2548 /* Docstring says this is equivalent to E.keys() so
2549 * if E doesn't have a .keys() method we want
2550 * AttributeError to percolate up. Might as well
2551 * do the same for any other error.
2552 */
2553 return -1;
2554
2555 iter = PyObject_GetIter(keys);
2556 Py_DECREF(keys);
2557 if (iter == NULL)
2558 return -1;
2559
2560 for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) {
2561 if (override != 1 && PyDict_GetItem(a, key) != NULL) {
2562 if (override != 0) {
2563 _PyErr_SetKeyError(key);
2564 Py_DECREF(key);
2565 Py_DECREF(iter);
2566 return -1;
2567 }
2568 Py_DECREF(key);
2569 continue;
2570 }
2571 value = PyObject_GetItem(b, key);
2572 if (value == NULL) {
2573 Py_DECREF(iter);
2574 Py_DECREF(key);
2575 return -1;
2576 }
2577 status = PyDict_SetItem(a, key, value);
2578 Py_DECREF(key);
2579 Py_DECREF(value);
2580 if (status < 0) {
2581 Py_DECREF(iter);
2582 return -1;
2583 }
2584 }
2585 Py_DECREF(iter);
2586 if (PyErr_Occurred())
2587 /* Iterator completed, via error */
2588 return -1;
2589 }
2590 assert(_PyDict_CheckConsistency((PyDictObject *)a));
2591 return 0;
2592 }
2593
2594 int
PyDict_Update(PyObject * a,PyObject * b)2595 PyDict_Update(PyObject *a, PyObject *b)
2596 {
2597 return dict_merge(a, b, 1);
2598 }
2599
2600 int
PyDict_Merge(PyObject * a,PyObject * b,int override)2601 PyDict_Merge(PyObject *a, PyObject *b, int override)
2602 {
2603 /* XXX Deprecate override not in (0, 1). */
2604 return dict_merge(a, b, override != 0);
2605 }
2606
2607 int
_PyDict_MergeEx(PyObject * a,PyObject * b,int override)2608 _PyDict_MergeEx(PyObject *a, PyObject *b, int override)
2609 {
2610 return dict_merge(a, b, override);
2611 }
2612
2613 static PyObject *
dict_copy(PyDictObject * mp)2614 dict_copy(PyDictObject *mp)
2615 {
2616 return PyDict_Copy((PyObject*)mp);
2617 }
2618
2619 PyObject *
PyDict_Copy(PyObject * o)2620 PyDict_Copy(PyObject *o)
2621 {
2622 PyObject *copy;
2623 PyDictObject *mp;
2624 Py_ssize_t i, n;
2625
2626 if (o == NULL || !PyDict_Check(o)) {
2627 PyErr_BadInternalCall();
2628 return NULL;
2629 }
2630 mp = (PyDictObject *)o;
2631 if (_PyDict_HasSplitTable(mp)) {
2632 PyDictObject *split_copy;
2633 Py_ssize_t size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
2634 PyObject **newvalues;
2635 newvalues = new_values(size);
2636 if (newvalues == NULL)
2637 return PyErr_NoMemory();
2638 split_copy = PyObject_GC_New(PyDictObject, &PyDict_Type);
2639 if (split_copy == NULL) {
2640 free_values(newvalues);
2641 return NULL;
2642 }
2643 split_copy->ma_values = newvalues;
2644 split_copy->ma_keys = mp->ma_keys;
2645 split_copy->ma_used = mp->ma_used;
2646 DK_INCREF(mp->ma_keys);
2647 for (i = 0, n = size; i < n; i++) {
2648 PyObject *value = mp->ma_values[i];
2649 Py_XINCREF(value);
2650 split_copy->ma_values[i] = value;
2651 }
2652 if (_PyObject_GC_IS_TRACKED(mp))
2653 _PyObject_GC_TRACK(split_copy);
2654 return (PyObject *)split_copy;
2655 }
2656 copy = PyDict_New();
2657 if (copy == NULL)
2658 return NULL;
2659 if (PyDict_Merge(copy, o, 1) == 0)
2660 return copy;
2661 Py_DECREF(copy);
2662 return NULL;
2663 }
2664
2665 Py_ssize_t
PyDict_Size(PyObject * mp)2666 PyDict_Size(PyObject *mp)
2667 {
2668 if (mp == NULL || !PyDict_Check(mp)) {
2669 PyErr_BadInternalCall();
2670 return -1;
2671 }
2672 return ((PyDictObject *)mp)->ma_used;
2673 }
2674
2675 PyObject *
PyDict_Keys(PyObject * mp)2676 PyDict_Keys(PyObject *mp)
2677 {
2678 if (mp == NULL || !PyDict_Check(mp)) {
2679 PyErr_BadInternalCall();
2680 return NULL;
2681 }
2682 return dict_keys((PyDictObject *)mp);
2683 }
2684
2685 PyObject *
PyDict_Values(PyObject * mp)2686 PyDict_Values(PyObject *mp)
2687 {
2688 if (mp == NULL || !PyDict_Check(mp)) {
2689 PyErr_BadInternalCall();
2690 return NULL;
2691 }
2692 return dict_values((PyDictObject *)mp);
2693 }
2694
2695 PyObject *
PyDict_Items(PyObject * mp)2696 PyDict_Items(PyObject *mp)
2697 {
2698 if (mp == NULL || !PyDict_Check(mp)) {
2699 PyErr_BadInternalCall();
2700 return NULL;
2701 }
2702 return dict_items((PyDictObject *)mp);
2703 }
2704
2705 /* Return 1 if dicts equal, 0 if not, -1 if error.
2706 * Gets out as soon as any difference is detected.
2707 * Uses only Py_EQ comparison.
2708 */
2709 static int
dict_equal(PyDictObject * a,PyDictObject * b)2710 dict_equal(PyDictObject *a, PyDictObject *b)
2711 {
2712 Py_ssize_t i;
2713
2714 if (a->ma_used != b->ma_used)
2715 /* can't be equal if # of entries differ */
2716 return 0;
2717 /* Same # of entries -- check all of 'em. Exit early on any diff. */
2718 for (i = 0; i < a->ma_keys->dk_nentries; i++) {
2719 PyDictKeyEntry *ep = &DK_ENTRIES(a->ma_keys)[i];
2720 PyObject *aval;
2721 if (a->ma_values)
2722 aval = a->ma_values[i];
2723 else
2724 aval = ep->me_value;
2725 if (aval != NULL) {
2726 int cmp;
2727 PyObject *bval;
2728 PyObject **vaddr;
2729 PyObject *key = ep->me_key;
2730 /* temporarily bump aval's refcount to ensure it stays
2731 alive until we're done with it */
2732 Py_INCREF(aval);
2733 /* ditto for key */
2734 Py_INCREF(key);
2735 /* reuse the known hash value */
2736 if ((b->ma_keys->dk_lookup)(b, key, ep->me_hash, &vaddr, NULL) < 0)
2737 bval = NULL;
2738 else
2739 bval = *vaddr;
2740 Py_DECREF(key);
2741 if (bval == NULL) {
2742 Py_DECREF(aval);
2743 if (PyErr_Occurred())
2744 return -1;
2745 return 0;
2746 }
2747 cmp = PyObject_RichCompareBool(aval, bval, Py_EQ);
2748 Py_DECREF(aval);
2749 if (cmp <= 0) /* error or not equal */
2750 return cmp;
2751 }
2752 }
2753 return 1;
2754 }
2755
2756 static PyObject *
dict_richcompare(PyObject * v,PyObject * w,int op)2757 dict_richcompare(PyObject *v, PyObject *w, int op)
2758 {
2759 int cmp;
2760 PyObject *res;
2761
2762 if (!PyDict_Check(v) || !PyDict_Check(w)) {
2763 res = Py_NotImplemented;
2764 }
2765 else if (op == Py_EQ || op == Py_NE) {
2766 cmp = dict_equal((PyDictObject *)v, (PyDictObject *)w);
2767 if (cmp < 0)
2768 return NULL;
2769 res = (cmp == (op == Py_EQ)) ? Py_True : Py_False;
2770 }
2771 else
2772 res = Py_NotImplemented;
2773 Py_INCREF(res);
2774 return res;
2775 }
2776
2777 /*[clinic input]
2778
2779 @coexist
2780 dict.__contains__
2781
2782 key: object
2783 /
2784
2785 True if D has a key k, else False.
2786 [clinic start generated code]*/
2787
2788 static PyObject *
dict___contains__(PyDictObject * self,PyObject * key)2789 dict___contains__(PyDictObject *self, PyObject *key)
2790 /*[clinic end generated code: output=a3d03db709ed6e6b input=b852b2a19b51ab24]*/
2791 {
2792 register PyDictObject *mp = self;
2793 Py_hash_t hash;
2794 Py_ssize_t ix;
2795 PyObject **value_addr;
2796
2797 if (!PyUnicode_CheckExact(key) ||
2798 (hash = ((PyASCIIObject *) key)->hash) == -1) {
2799 hash = PyObject_Hash(key);
2800 if (hash == -1)
2801 return NULL;
2802 }
2803 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
2804 if (ix == DKIX_ERROR)
2805 return NULL;
2806 if (ix == DKIX_EMPTY || *value_addr == NULL)
2807 Py_RETURN_FALSE;
2808 Py_RETURN_TRUE;
2809 }
2810
2811 static PyObject *
dict_get(PyDictObject * mp,PyObject * args)2812 dict_get(PyDictObject *mp, PyObject *args)
2813 {
2814 PyObject *key;
2815 PyObject *failobj = Py_None;
2816 PyObject *val = NULL;
2817 Py_hash_t hash;
2818 Py_ssize_t ix;
2819 PyObject **value_addr;
2820
2821 if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
2822 return NULL;
2823
2824 if (!PyUnicode_CheckExact(key) ||
2825 (hash = ((PyASCIIObject *) key)->hash) == -1) {
2826 hash = PyObject_Hash(key);
2827 if (hash == -1)
2828 return NULL;
2829 }
2830 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
2831 if (ix == DKIX_ERROR)
2832 return NULL;
2833 if (ix == DKIX_EMPTY || *value_addr == NULL)
2834 val = failobj;
2835 else
2836 val = *value_addr;
2837 Py_INCREF(val);
2838 return val;
2839 }
2840
2841 PyObject *
PyDict_SetDefault(PyObject * d,PyObject * key,PyObject * defaultobj)2842 PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
2843 {
2844 PyDictObject *mp = (PyDictObject *)d;
2845 PyObject *value;
2846 Py_hash_t hash;
2847 Py_ssize_t hashpos, ix;
2848 PyObject **value_addr;
2849
2850 if (!PyDict_Check(d)) {
2851 PyErr_BadInternalCall();
2852 return NULL;
2853 }
2854
2855 if (!PyUnicode_CheckExact(key) ||
2856 (hash = ((PyASCIIObject *) key)->hash) == -1) {
2857 hash = PyObject_Hash(key);
2858 if (hash == -1)
2859 return NULL;
2860 }
2861
2862 if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
2863 if (insertion_resize(mp) < 0)
2864 return NULL;
2865 }
2866
2867 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
2868 if (ix == DKIX_ERROR)
2869 return NULL;
2870
2871 if (_PyDict_HasSplitTable(mp) &&
2872 ((ix >= 0 && *value_addr == NULL && mp->ma_used != ix) ||
2873 (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
2874 if (insertion_resize(mp) < 0) {
2875 return NULL;
2876 }
2877 find_empty_slot(mp, key, hash, &value_addr, &hashpos);
2878 ix = DKIX_EMPTY;
2879 }
2880
2881 if (ix == DKIX_EMPTY) {
2882 PyDictKeyEntry *ep, *ep0;
2883 value = defaultobj;
2884 if (mp->ma_keys->dk_usable <= 0) {
2885 if (insertion_resize(mp) < 0) {
2886 return NULL;
2887 }
2888 find_empty_slot(mp, key, hash, &value_addr, &hashpos);
2889 }
2890 ep0 = DK_ENTRIES(mp->ma_keys);
2891 ep = &ep0[mp->ma_keys->dk_nentries];
2892 dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
2893 Py_INCREF(key);
2894 Py_INCREF(value);
2895 MAINTAIN_TRACKING(mp, key, value);
2896 ep->me_key = key;
2897 ep->me_hash = hash;
2898 if (mp->ma_values) {
2899 assert(mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
2900 mp->ma_values[mp->ma_keys->dk_nentries] = value;
2901 }
2902 else {
2903 ep->me_value = value;
2904 }
2905 mp->ma_used++;
2906 mp->ma_version_tag = DICT_NEXT_VERSION();
2907 mp->ma_keys->dk_usable--;
2908 mp->ma_keys->dk_nentries++;
2909 assert(mp->ma_keys->dk_usable >= 0);
2910 }
2911 else if (*value_addr == NULL) {
2912 value = defaultobj;
2913 assert(_PyDict_HasSplitTable(mp));
2914 assert(ix == mp->ma_used);
2915 Py_INCREF(value);
2916 MAINTAIN_TRACKING(mp, key, value);
2917 *value_addr = value;
2918 mp->ma_used++;
2919 mp->ma_version_tag = DICT_NEXT_VERSION();
2920 }
2921 else {
2922 value = *value_addr;
2923 }
2924
2925 assert(_PyDict_CheckConsistency(mp));
2926 return value;
2927 }
2928
2929 static PyObject *
dict_setdefault(PyDictObject * mp,PyObject * args)2930 dict_setdefault(PyDictObject *mp, PyObject *args)
2931 {
2932 PyObject *key, *val;
2933 PyObject *defaultobj = Py_None;
2934
2935 if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &defaultobj))
2936 return NULL;
2937
2938 val = PyDict_SetDefault((PyObject *)mp, key, defaultobj);
2939 Py_XINCREF(val);
2940 return val;
2941 }
2942
2943 static PyObject *
dict_clear(PyDictObject * mp)2944 dict_clear(PyDictObject *mp)
2945 {
2946 PyDict_Clear((PyObject *)mp);
2947 Py_RETURN_NONE;
2948 }
2949
2950 static PyObject *
dict_pop(PyDictObject * mp,PyObject * args)2951 dict_pop(PyDictObject *mp, PyObject *args)
2952 {
2953 PyObject *key, *deflt = NULL;
2954
2955 if(!PyArg_UnpackTuple(args, "pop", 1, 2, &key, &deflt))
2956 return NULL;
2957
2958 return _PyDict_Pop((PyObject*)mp, key, deflt);
2959 }
2960
2961 static PyObject *
dict_popitem(PyDictObject * mp)2962 dict_popitem(PyDictObject *mp)
2963 {
2964 Py_ssize_t i, j;
2965 PyDictKeyEntry *ep0, *ep;
2966 PyObject *res;
2967
2968 /* Allocate the result tuple before checking the size. Believe it
2969 * or not, this allocation could trigger a garbage collection which
2970 * could empty the dict, so if we checked the size first and that
2971 * happened, the result would be an infinite loop (searching for an
2972 * entry that no longer exists). Note that the usual popitem()
2973 * idiom is "while d: k, v = d.popitem()". so needing to throw the
2974 * tuple away if the dict *is* empty isn't a significant
2975 * inefficiency -- possible, but unlikely in practice.
2976 */
2977 res = PyTuple_New(2);
2978 if (res == NULL)
2979 return NULL;
2980 if (mp->ma_used == 0) {
2981 Py_DECREF(res);
2982 PyErr_SetString(PyExc_KeyError,
2983 "popitem(): dictionary is empty");
2984 return NULL;
2985 }
2986 /* Convert split table to combined table */
2987 if (mp->ma_keys->dk_lookup == lookdict_split) {
2988 if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
2989 Py_DECREF(res);
2990 return NULL;
2991 }
2992 }
2993 ENSURE_ALLOWS_DELETIONS(mp);
2994
2995 /* Pop last item */
2996 ep0 = DK_ENTRIES(mp->ma_keys);
2997 i = mp->ma_keys->dk_nentries - 1;
2998 while (i >= 0 && ep0[i].me_value == NULL) {
2999 i--;
3000 }
3001 assert(i >= 0);
3002
3003 ep = &ep0[i];
3004 j = lookdict_index(mp->ma_keys, ep->me_hash, i);
3005 assert(j >= 0);
3006 assert(dk_get_index(mp->ma_keys, j) == i);
3007 dk_set_index(mp->ma_keys, j, DKIX_DUMMY);
3008
3009 PyTuple_SET_ITEM(res, 0, ep->me_key);
3010 PyTuple_SET_ITEM(res, 1, ep->me_value);
3011 ep->me_key = NULL;
3012 ep->me_value = NULL;
3013 /* We can't dk_usable++ since there is DKIX_DUMMY in indices */
3014 mp->ma_keys->dk_nentries = i;
3015 mp->ma_used--;
3016 mp->ma_version_tag = DICT_NEXT_VERSION();
3017 assert(_PyDict_CheckConsistency(mp));
3018 return res;
3019 }
3020
3021 static int
dict_traverse(PyObject * op,visitproc visit,void * arg)3022 dict_traverse(PyObject *op, visitproc visit, void *arg)
3023 {
3024 PyDictObject *mp = (PyDictObject *)op;
3025 PyDictKeysObject *keys = mp->ma_keys;
3026 PyDictKeyEntry *entries = DK_ENTRIES(keys);
3027 Py_ssize_t i, n = keys->dk_nentries;
3028
3029 if (keys->dk_lookup == lookdict) {
3030 for (i = 0; i < n; i++) {
3031 if (entries[i].me_value != NULL) {
3032 Py_VISIT(entries[i].me_value);
3033 Py_VISIT(entries[i].me_key);
3034 }
3035 }
3036 }
3037 else {
3038 if (mp->ma_values != NULL) {
3039 for (i = 0; i < n; i++) {
3040 Py_VISIT(mp->ma_values[i]);
3041 }
3042 }
3043 else {
3044 for (i = 0; i < n; i++) {
3045 Py_VISIT(entries[i].me_value);
3046 }
3047 }
3048 }
3049 return 0;
3050 }
3051
3052 static int
dict_tp_clear(PyObject * op)3053 dict_tp_clear(PyObject *op)
3054 {
3055 PyDict_Clear(op);
3056 return 0;
3057 }
3058
3059 static PyObject *dictiter_new(PyDictObject *, PyTypeObject *);
3060
3061 Py_ssize_t
_PyDict_SizeOf(PyDictObject * mp)3062 _PyDict_SizeOf(PyDictObject *mp)
3063 {
3064 Py_ssize_t size, usable, res;
3065
3066 size = DK_SIZE(mp->ma_keys);
3067 usable = USABLE_FRACTION(size);
3068
3069 res = _PyObject_SIZE(Py_TYPE(mp));
3070 if (mp->ma_values)
3071 res += usable * sizeof(PyObject*);
3072 /* If the dictionary is split, the keys portion is accounted-for
3073 in the type object. */
3074 if (mp->ma_keys->dk_refcnt == 1)
3075 res += (sizeof(PyDictKeysObject)
3076 - Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
3077 + DK_IXSIZE(mp->ma_keys) * size
3078 + sizeof(PyDictKeyEntry) * usable);
3079 return res;
3080 }
3081
3082 Py_ssize_t
_PyDict_KeysSize(PyDictKeysObject * keys)3083 _PyDict_KeysSize(PyDictKeysObject *keys)
3084 {
3085 return (sizeof(PyDictKeysObject)
3086 - Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
3087 + DK_IXSIZE(keys) * DK_SIZE(keys)
3088 + USABLE_FRACTION(DK_SIZE(keys)) * sizeof(PyDictKeyEntry));
3089 }
3090
3091 static PyObject *
dict_sizeof(PyDictObject * mp)3092 dict_sizeof(PyDictObject *mp)
3093 {
3094 return PyLong_FromSsize_t(_PyDict_SizeOf(mp));
3095 }
3096
3097 PyDoc_STRVAR(getitem__doc__, "x.__getitem__(y) <==> x[y]");
3098
3099 PyDoc_STRVAR(sizeof__doc__,
3100 "D.__sizeof__() -> size of D in memory, in bytes");
3101
3102 PyDoc_STRVAR(get__doc__,
3103 "D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.");
3104
3105 PyDoc_STRVAR(setdefault_doc__,
3106 "D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D");
3107
3108 PyDoc_STRVAR(pop__doc__,
3109 "D.pop(k[,d]) -> v, remove specified key and return the corresponding value.\n\
3110 If key is not found, d is returned if given, otherwise KeyError is raised");
3111
3112 PyDoc_STRVAR(popitem__doc__,
3113 "D.popitem() -> (k, v), remove and return some (key, value) pair as a\n\
3114 2-tuple; but raise KeyError if D is empty.");
3115
3116 PyDoc_STRVAR(update__doc__,
3117 "D.update([E, ]**F) -> None. Update D from dict/iterable E and F.\n\
3118 If E is present and has a .keys() method, then does: for k in E: D[k] = E[k]\n\
3119 If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v\n\
3120 In either case, this is followed by: for k in F: D[k] = F[k]");
3121
3122 PyDoc_STRVAR(clear__doc__,
3123 "D.clear() -> None. Remove all items from D.");
3124
3125 PyDoc_STRVAR(copy__doc__,
3126 "D.copy() -> a shallow copy of D");
3127
3128 /* Forward */
3129 static PyObject *dictkeys_new(PyObject *);
3130 static PyObject *dictitems_new(PyObject *);
3131 static PyObject *dictvalues_new(PyObject *);
3132
3133 PyDoc_STRVAR(keys__doc__,
3134 "D.keys() -> a set-like object providing a view on D's keys");
3135 PyDoc_STRVAR(items__doc__,
3136 "D.items() -> a set-like object providing a view on D's items");
3137 PyDoc_STRVAR(values__doc__,
3138 "D.values() -> an object providing a view on D's values");
3139
3140 static PyMethodDef mapp_methods[] = {
3141 DICT___CONTAINS___METHODDEF
3142 {"__getitem__", (PyCFunction)dict_subscript, METH_O | METH_COEXIST,
3143 getitem__doc__},
3144 {"__sizeof__", (PyCFunction)dict_sizeof, METH_NOARGS,
3145 sizeof__doc__},
3146 {"get", (PyCFunction)dict_get, METH_VARARGS,
3147 get__doc__},
3148 {"setdefault", (PyCFunction)dict_setdefault, METH_VARARGS,
3149 setdefault_doc__},
3150 {"pop", (PyCFunction)dict_pop, METH_VARARGS,
3151 pop__doc__},
3152 {"popitem", (PyCFunction)dict_popitem, METH_NOARGS,
3153 popitem__doc__},
3154 {"keys", (PyCFunction)dictkeys_new, METH_NOARGS,
3155 keys__doc__},
3156 {"items", (PyCFunction)dictitems_new, METH_NOARGS,
3157 items__doc__},
3158 {"values", (PyCFunction)dictvalues_new, METH_NOARGS,
3159 values__doc__},
3160 {"update", (PyCFunction)dict_update, METH_VARARGS | METH_KEYWORDS,
3161 update__doc__},
3162 DICT_FROMKEYS_METHODDEF
3163 {"clear", (PyCFunction)dict_clear, METH_NOARGS,
3164 clear__doc__},
3165 {"copy", (PyCFunction)dict_copy, METH_NOARGS,
3166 copy__doc__},
3167 {NULL, NULL} /* sentinel */
3168 };
3169
3170 /* Return 1 if `key` is in dict `op`, 0 if not, and -1 on error. */
3171 int
PyDict_Contains(PyObject * op,PyObject * key)3172 PyDict_Contains(PyObject *op, PyObject *key)
3173 {
3174 Py_hash_t hash;
3175 Py_ssize_t ix;
3176 PyDictObject *mp = (PyDictObject *)op;
3177 PyObject **value_addr;
3178
3179 if (!PyUnicode_CheckExact(key) ||
3180 (hash = ((PyASCIIObject *) key)->hash) == -1) {
3181 hash = PyObject_Hash(key);
3182 if (hash == -1)
3183 return -1;
3184 }
3185 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
3186 if (ix == DKIX_ERROR)
3187 return -1;
3188 return (ix != DKIX_EMPTY && *value_addr != NULL);
3189 }
3190
3191 /* Internal version of PyDict_Contains used when the hash value is already known */
3192 int
_PyDict_Contains(PyObject * op,PyObject * key,Py_hash_t hash)3193 _PyDict_Contains(PyObject *op, PyObject *key, Py_hash_t hash)
3194 {
3195 PyDictObject *mp = (PyDictObject *)op;
3196 PyObject **value_addr;
3197 Py_ssize_t ix;
3198
3199 ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
3200 if (ix == DKIX_ERROR)
3201 return -1;
3202 return (ix != DKIX_EMPTY && *value_addr != NULL);
3203 }
3204
3205 /* Hack to implement "key in dict" */
3206 static PySequenceMethods dict_as_sequence = {
3207 0, /* sq_length */
3208 0, /* sq_concat */
3209 0, /* sq_repeat */
3210 0, /* sq_item */
3211 0, /* sq_slice */
3212 0, /* sq_ass_item */
3213 0, /* sq_ass_slice */
3214 PyDict_Contains, /* sq_contains */
3215 0, /* sq_inplace_concat */
3216 0, /* sq_inplace_repeat */
3217 };
3218
3219 static PyObject *
dict_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3220 dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3221 {
3222 PyObject *self;
3223 PyDictObject *d;
3224
3225 assert(type != NULL && type->tp_alloc != NULL);
3226 self = type->tp_alloc(type, 0);
3227 if (self == NULL)
3228 return NULL;
3229 d = (PyDictObject *)self;
3230
3231 /* The object has been implicitly tracked by tp_alloc */
3232 if (type == &PyDict_Type)
3233 _PyObject_GC_UNTRACK(d);
3234
3235 d->ma_used = 0;
3236 d->ma_version_tag = DICT_NEXT_VERSION();
3237 d->ma_keys = new_keys_object(PyDict_MINSIZE);
3238 if (d->ma_keys == NULL) {
3239 Py_DECREF(self);
3240 return NULL;
3241 }
3242 assert(_PyDict_CheckConsistency(d));
3243 return self;
3244 }
3245
3246 static int
dict_init(PyObject * self,PyObject * args,PyObject * kwds)3247 dict_init(PyObject *self, PyObject *args, PyObject *kwds)
3248 {
3249 return dict_update_common(self, args, kwds, "dict");
3250 }
3251
3252 static PyObject *
dict_iter(PyDictObject * dict)3253 dict_iter(PyDictObject *dict)
3254 {
3255 return dictiter_new(dict, &PyDictIterKey_Type);
3256 }
3257
3258 PyDoc_STRVAR(dictionary_doc,
3259 "dict() -> new empty dictionary\n"
3260 "dict(mapping) -> new dictionary initialized from a mapping object's\n"
3261 " (key, value) pairs\n"
3262 "dict(iterable) -> new dictionary initialized as if via:\n"
3263 " d = {}\n"
3264 " for k, v in iterable:\n"
3265 " d[k] = v\n"
3266 "dict(**kwargs) -> new dictionary initialized with the name=value pairs\n"
3267 " in the keyword argument list. For example: dict(one=1, two=2)");
3268
3269 PyTypeObject PyDict_Type = {
3270 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3271 "dict",
3272 sizeof(PyDictObject),
3273 0,
3274 (destructor)dict_dealloc, /* tp_dealloc */
3275 0, /* tp_print */
3276 0, /* tp_getattr */
3277 0, /* tp_setattr */
3278 0, /* tp_reserved */
3279 (reprfunc)dict_repr, /* tp_repr */
3280 0, /* tp_as_number */
3281 &dict_as_sequence, /* tp_as_sequence */
3282 &dict_as_mapping, /* tp_as_mapping */
3283 PyObject_HashNotImplemented, /* tp_hash */
3284 0, /* tp_call */
3285 0, /* tp_str */
3286 PyObject_GenericGetAttr, /* tp_getattro */
3287 0, /* tp_setattro */
3288 0, /* tp_as_buffer */
3289 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
3290 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_DICT_SUBCLASS, /* tp_flags */
3291 dictionary_doc, /* tp_doc */
3292 dict_traverse, /* tp_traverse */
3293 dict_tp_clear, /* tp_clear */
3294 dict_richcompare, /* tp_richcompare */
3295 0, /* tp_weaklistoffset */
3296 (getiterfunc)dict_iter, /* tp_iter */
3297 0, /* tp_iternext */
3298 mapp_methods, /* tp_methods */
3299 0, /* tp_members */
3300 0, /* tp_getset */
3301 0, /* tp_base */
3302 0, /* tp_dict */
3303 0, /* tp_descr_get */
3304 0, /* tp_descr_set */
3305 0, /* tp_dictoffset */
3306 dict_init, /* tp_init */
3307 PyType_GenericAlloc, /* tp_alloc */
3308 dict_new, /* tp_new */
3309 PyObject_GC_Del, /* tp_free */
3310 };
3311
3312 PyObject *
_PyDict_GetItemId(PyObject * dp,struct _Py_Identifier * key)3313 _PyDict_GetItemId(PyObject *dp, struct _Py_Identifier *key)
3314 {
3315 PyObject *kv;
3316 kv = _PyUnicode_FromId(key); /* borrowed */
3317 if (kv == NULL) {
3318 PyErr_Clear();
3319 return NULL;
3320 }
3321 return PyDict_GetItem(dp, kv);
3322 }
3323
3324 /* For backward compatibility with old dictionary interface */
3325
3326 PyObject *
PyDict_GetItemString(PyObject * v,const char * key)3327 PyDict_GetItemString(PyObject *v, const char *key)
3328 {
3329 PyObject *kv, *rv;
3330 kv = PyUnicode_FromString(key);
3331 if (kv == NULL) {
3332 PyErr_Clear();
3333 return NULL;
3334 }
3335 rv = PyDict_GetItem(v, kv);
3336 Py_DECREF(kv);
3337 return rv;
3338 }
3339
3340 int
_PyDict_SetItemId(PyObject * v,struct _Py_Identifier * key,PyObject * item)3341 _PyDict_SetItemId(PyObject *v, struct _Py_Identifier *key, PyObject *item)
3342 {
3343 PyObject *kv;
3344 kv = _PyUnicode_FromId(key); /* borrowed */
3345 if (kv == NULL)
3346 return -1;
3347 return PyDict_SetItem(v, kv, item);
3348 }
3349
3350 int
PyDict_SetItemString(PyObject * v,const char * key,PyObject * item)3351 PyDict_SetItemString(PyObject *v, const char *key, PyObject *item)
3352 {
3353 PyObject *kv;
3354 int err;
3355 kv = PyUnicode_FromString(key);
3356 if (kv == NULL)
3357 return -1;
3358 PyUnicode_InternInPlace(&kv); /* XXX Should we really? */
3359 err = PyDict_SetItem(v, kv, item);
3360 Py_DECREF(kv);
3361 return err;
3362 }
3363
3364 int
_PyDict_DelItemId(PyObject * v,_Py_Identifier * key)3365 _PyDict_DelItemId(PyObject *v, _Py_Identifier *key)
3366 {
3367 PyObject *kv = _PyUnicode_FromId(key); /* borrowed */
3368 if (kv == NULL)
3369 return -1;
3370 return PyDict_DelItem(v, kv);
3371 }
3372
3373 int
PyDict_DelItemString(PyObject * v,const char * key)3374 PyDict_DelItemString(PyObject *v, const char *key)
3375 {
3376 PyObject *kv;
3377 int err;
3378 kv = PyUnicode_FromString(key);
3379 if (kv == NULL)
3380 return -1;
3381 err = PyDict_DelItem(v, kv);
3382 Py_DECREF(kv);
3383 return err;
3384 }
3385
3386 /* Dictionary iterator types */
3387
3388 typedef struct {
3389 PyObject_HEAD
3390 PyDictObject *di_dict; /* Set to NULL when iterator is exhausted */
3391 Py_ssize_t di_used;
3392 Py_ssize_t di_pos;
3393 PyObject* di_result; /* reusable result tuple for iteritems */
3394 Py_ssize_t len;
3395 } dictiterobject;
3396
3397 static PyObject *
dictiter_new(PyDictObject * dict,PyTypeObject * itertype)3398 dictiter_new(PyDictObject *dict, PyTypeObject *itertype)
3399 {
3400 dictiterobject *di;
3401 di = PyObject_GC_New(dictiterobject, itertype);
3402 if (di == NULL)
3403 return NULL;
3404 Py_INCREF(dict);
3405 di->di_dict = dict;
3406 di->di_used = dict->ma_used;
3407 di->di_pos = 0;
3408 di->len = dict->ma_used;
3409 if (itertype == &PyDictIterItem_Type) {
3410 di->di_result = PyTuple_Pack(2, Py_None, Py_None);
3411 if (di->di_result == NULL) {
3412 Py_DECREF(di);
3413 return NULL;
3414 }
3415 }
3416 else
3417 di->di_result = NULL;
3418 _PyObject_GC_TRACK(di);
3419 return (PyObject *)di;
3420 }
3421
3422 static void
dictiter_dealloc(dictiterobject * di)3423 dictiter_dealloc(dictiterobject *di)
3424 {
3425 Py_XDECREF(di->di_dict);
3426 Py_XDECREF(di->di_result);
3427 PyObject_GC_Del(di);
3428 }
3429
3430 static int
dictiter_traverse(dictiterobject * di,visitproc visit,void * arg)3431 dictiter_traverse(dictiterobject *di, visitproc visit, void *arg)
3432 {
3433 Py_VISIT(di->di_dict);
3434 Py_VISIT(di->di_result);
3435 return 0;
3436 }
3437
3438 static PyObject *
dictiter_len(dictiterobject * di)3439 dictiter_len(dictiterobject *di)
3440 {
3441 Py_ssize_t len = 0;
3442 if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used)
3443 len = di->len;
3444 return PyLong_FromSize_t(len);
3445 }
3446
3447 PyDoc_STRVAR(length_hint_doc,
3448 "Private method returning an estimate of len(list(it)).");
3449
3450 static PyObject *
3451 dictiter_reduce(dictiterobject *di);
3452
3453 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3454
3455 static PyMethodDef dictiter_methods[] = {
3456 {"__length_hint__", (PyCFunction)dictiter_len, METH_NOARGS,
3457 length_hint_doc},
3458 {"__reduce__", (PyCFunction)dictiter_reduce, METH_NOARGS,
3459 reduce_doc},
3460 {NULL, NULL} /* sentinel */
3461 };
3462
3463 static PyObject*
dictiter_iternextkey(dictiterobject * di)3464 dictiter_iternextkey(dictiterobject *di)
3465 {
3466 PyObject *key;
3467 Py_ssize_t i, n;
3468 PyDictKeysObject *k;
3469 PyDictObject *d = di->di_dict;
3470
3471 if (d == NULL)
3472 return NULL;
3473 assert (PyDict_Check(d));
3474
3475 if (di->di_used != d->ma_used) {
3476 PyErr_SetString(PyExc_RuntimeError,
3477 "dictionary changed size during iteration");
3478 di->di_used = -1; /* Make this state sticky */
3479 return NULL;
3480 }
3481
3482 i = di->di_pos;
3483 k = d->ma_keys;
3484 n = k->dk_nentries;
3485 if (d->ma_values) {
3486 PyObject **value_ptr = &d->ma_values[i];
3487 while (i < n && *value_ptr == NULL) {
3488 value_ptr++;
3489 i++;
3490 }
3491 if (i >= n)
3492 goto fail;
3493 key = DK_ENTRIES(k)[i].me_key;
3494 }
3495 else {
3496 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(k)[i];
3497 while (i < n && entry_ptr->me_value == NULL) {
3498 entry_ptr++;
3499 i++;
3500 }
3501 if (i >= n)
3502 goto fail;
3503 key = entry_ptr->me_key;
3504 }
3505 di->di_pos = i+1;
3506 di->len--;
3507 Py_INCREF(key);
3508 return key;
3509
3510 fail:
3511 di->di_dict = NULL;
3512 Py_DECREF(d);
3513 return NULL;
3514 }
3515
3516 PyTypeObject PyDictIterKey_Type = {
3517 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3518 "dict_keyiterator", /* tp_name */
3519 sizeof(dictiterobject), /* tp_basicsize */
3520 0, /* tp_itemsize */
3521 /* methods */
3522 (destructor)dictiter_dealloc, /* tp_dealloc */
3523 0, /* tp_print */
3524 0, /* tp_getattr */
3525 0, /* tp_setattr */
3526 0, /* tp_reserved */
3527 0, /* tp_repr */
3528 0, /* tp_as_number */
3529 0, /* tp_as_sequence */
3530 0, /* tp_as_mapping */
3531 0, /* tp_hash */
3532 0, /* tp_call */
3533 0, /* tp_str */
3534 PyObject_GenericGetAttr, /* tp_getattro */
3535 0, /* tp_setattro */
3536 0, /* tp_as_buffer */
3537 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3538 0, /* tp_doc */
3539 (traverseproc)dictiter_traverse, /* tp_traverse */
3540 0, /* tp_clear */
3541 0, /* tp_richcompare */
3542 0, /* tp_weaklistoffset */
3543 PyObject_SelfIter, /* tp_iter */
3544 (iternextfunc)dictiter_iternextkey, /* tp_iternext */
3545 dictiter_methods, /* tp_methods */
3546 0,
3547 };
3548
3549 static PyObject *
dictiter_iternextvalue(dictiterobject * di)3550 dictiter_iternextvalue(dictiterobject *di)
3551 {
3552 PyObject *value;
3553 Py_ssize_t i, n;
3554 PyDictObject *d = di->di_dict;
3555
3556 if (d == NULL)
3557 return NULL;
3558 assert (PyDict_Check(d));
3559
3560 if (di->di_used != d->ma_used) {
3561 PyErr_SetString(PyExc_RuntimeError,
3562 "dictionary changed size during iteration");
3563 di->di_used = -1; /* Make this state sticky */
3564 return NULL;
3565 }
3566
3567 i = di->di_pos;
3568 n = d->ma_keys->dk_nentries;
3569 if (d->ma_values) {
3570 PyObject **value_ptr = &d->ma_values[i];
3571 while (i < n && *value_ptr == NULL) {
3572 value_ptr++;
3573 i++;
3574 }
3575 if (i >= n)
3576 goto fail;
3577 value = *value_ptr;
3578 }
3579 else {
3580 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
3581 while (i < n && entry_ptr->me_value == NULL) {
3582 entry_ptr++;
3583 i++;
3584 }
3585 if (i >= n)
3586 goto fail;
3587 value = entry_ptr->me_value;
3588 }
3589 di->di_pos = i+1;
3590 di->len--;
3591 Py_INCREF(value);
3592 return value;
3593
3594 fail:
3595 di->di_dict = NULL;
3596 Py_DECREF(d);
3597 return NULL;
3598 }
3599
3600 PyTypeObject PyDictIterValue_Type = {
3601 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3602 "dict_valueiterator", /* tp_name */
3603 sizeof(dictiterobject), /* tp_basicsize */
3604 0, /* tp_itemsize */
3605 /* methods */
3606 (destructor)dictiter_dealloc, /* tp_dealloc */
3607 0, /* tp_print */
3608 0, /* tp_getattr */
3609 0, /* tp_setattr */
3610 0, /* tp_reserved */
3611 0, /* tp_repr */
3612 0, /* tp_as_number */
3613 0, /* tp_as_sequence */
3614 0, /* tp_as_mapping */
3615 0, /* tp_hash */
3616 0, /* tp_call */
3617 0, /* tp_str */
3618 PyObject_GenericGetAttr, /* tp_getattro */
3619 0, /* tp_setattro */
3620 0, /* tp_as_buffer */
3621 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3622 0, /* tp_doc */
3623 (traverseproc)dictiter_traverse, /* tp_traverse */
3624 0, /* tp_clear */
3625 0, /* tp_richcompare */
3626 0, /* tp_weaklistoffset */
3627 PyObject_SelfIter, /* tp_iter */
3628 (iternextfunc)dictiter_iternextvalue, /* tp_iternext */
3629 dictiter_methods, /* tp_methods */
3630 0,
3631 };
3632
3633 static PyObject *
dictiter_iternextitem(dictiterobject * di)3634 dictiter_iternextitem(dictiterobject *di)
3635 {
3636 PyObject *key, *value, *result = di->di_result;
3637 Py_ssize_t i, n;
3638 PyDictObject *d = di->di_dict;
3639
3640 if (d == NULL)
3641 return NULL;
3642 assert (PyDict_Check(d));
3643
3644 if (di->di_used != d->ma_used) {
3645 PyErr_SetString(PyExc_RuntimeError,
3646 "dictionary changed size during iteration");
3647 di->di_used = -1; /* Make this state sticky */
3648 return NULL;
3649 }
3650
3651 i = di->di_pos;
3652 n = d->ma_keys->dk_nentries;
3653 if (d->ma_values) {
3654 PyObject **value_ptr = &d->ma_values[i];
3655 while (i < n && *value_ptr == NULL) {
3656 value_ptr++;
3657 i++;
3658 }
3659 if (i >= n)
3660 goto fail;
3661 key = DK_ENTRIES(d->ma_keys)[i].me_key;
3662 value = *value_ptr;
3663 }
3664 else {
3665 PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i];
3666 while (i < n && entry_ptr->me_value == NULL) {
3667 entry_ptr++;
3668 i++;
3669 }
3670 if (i >= n)
3671 goto fail;
3672 key = entry_ptr->me_key;
3673 value = entry_ptr->me_value;
3674 }
3675 di->di_pos = i+1;
3676 di->len--;
3677 if (result->ob_refcnt == 1) {
3678 Py_INCREF(result);
3679 Py_DECREF(PyTuple_GET_ITEM(result, 0));
3680 Py_DECREF(PyTuple_GET_ITEM(result, 1));
3681 }
3682 else {
3683 result = PyTuple_New(2);
3684 if (result == NULL)
3685 return NULL;
3686 }
3687 Py_INCREF(key);
3688 Py_INCREF(value);
3689 PyTuple_SET_ITEM(result, 0, key); /* steals reference */
3690 PyTuple_SET_ITEM(result, 1, value); /* steals reference */
3691 return result;
3692
3693 fail:
3694 di->di_dict = NULL;
3695 Py_DECREF(d);
3696 return NULL;
3697 }
3698
3699 PyTypeObject PyDictIterItem_Type = {
3700 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3701 "dict_itemiterator", /* tp_name */
3702 sizeof(dictiterobject), /* tp_basicsize */
3703 0, /* tp_itemsize */
3704 /* methods */
3705 (destructor)dictiter_dealloc, /* tp_dealloc */
3706 0, /* tp_print */
3707 0, /* tp_getattr */
3708 0, /* tp_setattr */
3709 0, /* tp_reserved */
3710 0, /* tp_repr */
3711 0, /* tp_as_number */
3712 0, /* tp_as_sequence */
3713 0, /* tp_as_mapping */
3714 0, /* tp_hash */
3715 0, /* tp_call */
3716 0, /* tp_str */
3717 PyObject_GenericGetAttr, /* tp_getattro */
3718 0, /* tp_setattro */
3719 0, /* tp_as_buffer */
3720 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3721 0, /* tp_doc */
3722 (traverseproc)dictiter_traverse, /* tp_traverse */
3723 0, /* tp_clear */
3724 0, /* tp_richcompare */
3725 0, /* tp_weaklistoffset */
3726 PyObject_SelfIter, /* tp_iter */
3727 (iternextfunc)dictiter_iternextitem, /* tp_iternext */
3728 dictiter_methods, /* tp_methods */
3729 0,
3730 };
3731
3732
3733 static PyObject *
dictiter_reduce(dictiterobject * di)3734 dictiter_reduce(dictiterobject *di)
3735 {
3736 PyObject *list;
3737 dictiterobject tmp;
3738
3739 list = PyList_New(0);
3740 if (!list)
3741 return NULL;
3742
3743 /* copy the itertor state */
3744 tmp = *di;
3745 Py_XINCREF(tmp.di_dict);
3746
3747 /* iterate the temporary into a list */
3748 for(;;) {
3749 PyObject *element = 0;
3750 if (Py_TYPE(di) == &PyDictIterItem_Type)
3751 element = dictiter_iternextitem(&tmp);
3752 else if (Py_TYPE(di) == &PyDictIterKey_Type)
3753 element = dictiter_iternextkey(&tmp);
3754 else if (Py_TYPE(di) == &PyDictIterValue_Type)
3755 element = dictiter_iternextvalue(&tmp);
3756 else
3757 assert(0);
3758 if (element) {
3759 if (PyList_Append(list, element)) {
3760 Py_DECREF(element);
3761 Py_DECREF(list);
3762 Py_XDECREF(tmp.di_dict);
3763 return NULL;
3764 }
3765 Py_DECREF(element);
3766 } else
3767 break;
3768 }
3769 Py_XDECREF(tmp.di_dict);
3770 /* check for error */
3771 if (tmp.di_dict != NULL) {
3772 /* we have an error */
3773 Py_DECREF(list);
3774 return NULL;
3775 }
3776 return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), list);
3777 }
3778
3779 /***********************************************/
3780 /* View objects for keys(), items(), values(). */
3781 /***********************************************/
3782
3783 /* The instance lay-out is the same for all three; but the type differs. */
3784
3785 static void
dictview_dealloc(_PyDictViewObject * dv)3786 dictview_dealloc(_PyDictViewObject *dv)
3787 {
3788 Py_XDECREF(dv->dv_dict);
3789 PyObject_GC_Del(dv);
3790 }
3791
3792 static int
dictview_traverse(_PyDictViewObject * dv,visitproc visit,void * arg)3793 dictview_traverse(_PyDictViewObject *dv, visitproc visit, void *arg)
3794 {
3795 Py_VISIT(dv->dv_dict);
3796 return 0;
3797 }
3798
3799 static Py_ssize_t
dictview_len(_PyDictViewObject * dv)3800 dictview_len(_PyDictViewObject *dv)
3801 {
3802 Py_ssize_t len = 0;
3803 if (dv->dv_dict != NULL)
3804 len = dv->dv_dict->ma_used;
3805 return len;
3806 }
3807
3808 PyObject *
_PyDictView_New(PyObject * dict,PyTypeObject * type)3809 _PyDictView_New(PyObject *dict, PyTypeObject *type)
3810 {
3811 _PyDictViewObject *dv;
3812 if (dict == NULL) {
3813 PyErr_BadInternalCall();
3814 return NULL;
3815 }
3816 if (!PyDict_Check(dict)) {
3817 /* XXX Get rid of this restriction later */
3818 PyErr_Format(PyExc_TypeError,
3819 "%s() requires a dict argument, not '%s'",
3820 type->tp_name, dict->ob_type->tp_name);
3821 return NULL;
3822 }
3823 dv = PyObject_GC_New(_PyDictViewObject, type);
3824 if (dv == NULL)
3825 return NULL;
3826 Py_INCREF(dict);
3827 dv->dv_dict = (PyDictObject *)dict;
3828 _PyObject_GC_TRACK(dv);
3829 return (PyObject *)dv;
3830 }
3831
3832 /* TODO(guido): The views objects are not complete:
3833
3834 * support more set operations
3835 * support arbitrary mappings?
3836 - either these should be static or exported in dictobject.h
3837 - if public then they should probably be in builtins
3838 */
3839
3840 /* Return 1 if self is a subset of other, iterating over self;
3841 0 if not; -1 if an error occurred. */
3842 static int
all_contained_in(PyObject * self,PyObject * other)3843 all_contained_in(PyObject *self, PyObject *other)
3844 {
3845 PyObject *iter = PyObject_GetIter(self);
3846 int ok = 1;
3847
3848 if (iter == NULL)
3849 return -1;
3850 for (;;) {
3851 PyObject *next = PyIter_Next(iter);
3852 if (next == NULL) {
3853 if (PyErr_Occurred())
3854 ok = -1;
3855 break;
3856 }
3857 ok = PySequence_Contains(other, next);
3858 Py_DECREF(next);
3859 if (ok <= 0)
3860 break;
3861 }
3862 Py_DECREF(iter);
3863 return ok;
3864 }
3865
3866 static PyObject *
dictview_richcompare(PyObject * self,PyObject * other,int op)3867 dictview_richcompare(PyObject *self, PyObject *other, int op)
3868 {
3869 Py_ssize_t len_self, len_other;
3870 int ok;
3871 PyObject *result;
3872
3873 assert(self != NULL);
3874 assert(PyDictViewSet_Check(self));
3875 assert(other != NULL);
3876
3877 if (!PyAnySet_Check(other) && !PyDictViewSet_Check(other))
3878 Py_RETURN_NOTIMPLEMENTED;
3879
3880 len_self = PyObject_Size(self);
3881 if (len_self < 0)
3882 return NULL;
3883 len_other = PyObject_Size(other);
3884 if (len_other < 0)
3885 return NULL;
3886
3887 ok = 0;
3888 switch(op) {
3889
3890 case Py_NE:
3891 case Py_EQ:
3892 if (len_self == len_other)
3893 ok = all_contained_in(self, other);
3894 if (op == Py_NE && ok >= 0)
3895 ok = !ok;
3896 break;
3897
3898 case Py_LT:
3899 if (len_self < len_other)
3900 ok = all_contained_in(self, other);
3901 break;
3902
3903 case Py_LE:
3904 if (len_self <= len_other)
3905 ok = all_contained_in(self, other);
3906 break;
3907
3908 case Py_GT:
3909 if (len_self > len_other)
3910 ok = all_contained_in(other, self);
3911 break;
3912
3913 case Py_GE:
3914 if (len_self >= len_other)
3915 ok = all_contained_in(other, self);
3916 break;
3917
3918 }
3919 if (ok < 0)
3920 return NULL;
3921 result = ok ? Py_True : Py_False;
3922 Py_INCREF(result);
3923 return result;
3924 }
3925
3926 static PyObject *
dictview_repr(_PyDictViewObject * dv)3927 dictview_repr(_PyDictViewObject *dv)
3928 {
3929 PyObject *seq;
3930 PyObject *result;
3931
3932 seq = PySequence_List((PyObject *)dv);
3933 if (seq == NULL)
3934 return NULL;
3935
3936 result = PyUnicode_FromFormat("%s(%R)", Py_TYPE(dv)->tp_name, seq);
3937 Py_DECREF(seq);
3938 return result;
3939 }
3940
3941 /*** dict_keys ***/
3942
3943 static PyObject *
dictkeys_iter(_PyDictViewObject * dv)3944 dictkeys_iter(_PyDictViewObject *dv)
3945 {
3946 if (dv->dv_dict == NULL) {
3947 Py_RETURN_NONE;
3948 }
3949 return dictiter_new(dv->dv_dict, &PyDictIterKey_Type);
3950 }
3951
3952 static int
dictkeys_contains(_PyDictViewObject * dv,PyObject * obj)3953 dictkeys_contains(_PyDictViewObject *dv, PyObject *obj)
3954 {
3955 if (dv->dv_dict == NULL)
3956 return 0;
3957 return PyDict_Contains((PyObject *)dv->dv_dict, obj);
3958 }
3959
3960 static PySequenceMethods dictkeys_as_sequence = {
3961 (lenfunc)dictview_len, /* sq_length */
3962 0, /* sq_concat */
3963 0, /* sq_repeat */
3964 0, /* sq_item */
3965 0, /* sq_slice */
3966 0, /* sq_ass_item */
3967 0, /* sq_ass_slice */
3968 (objobjproc)dictkeys_contains, /* sq_contains */
3969 };
3970
3971 static PyObject*
dictviews_sub(PyObject * self,PyObject * other)3972 dictviews_sub(PyObject* self, PyObject *other)
3973 {
3974 PyObject *result = PySet_New(self);
3975 PyObject *tmp;
3976 _Py_IDENTIFIER(difference_update);
3977
3978 if (result == NULL)
3979 return NULL;
3980
3981 tmp = _PyObject_CallMethodIdObjArgs(result, &PyId_difference_update, other, NULL);
3982 if (tmp == NULL) {
3983 Py_DECREF(result);
3984 return NULL;
3985 }
3986
3987 Py_DECREF(tmp);
3988 return result;
3989 }
3990
3991 PyObject*
_PyDictView_Intersect(PyObject * self,PyObject * other)3992 _PyDictView_Intersect(PyObject* self, PyObject *other)
3993 {
3994 PyObject *result = PySet_New(self);
3995 PyObject *tmp;
3996 _Py_IDENTIFIER(intersection_update);
3997
3998 if (result == NULL)
3999 return NULL;
4000
4001 tmp = _PyObject_CallMethodIdObjArgs(result, &PyId_intersection_update, other, NULL);
4002 if (tmp == NULL) {
4003 Py_DECREF(result);
4004 return NULL;
4005 }
4006
4007 Py_DECREF(tmp);
4008 return result;
4009 }
4010
4011 static PyObject*
dictviews_or(PyObject * self,PyObject * other)4012 dictviews_or(PyObject* self, PyObject *other)
4013 {
4014 PyObject *result = PySet_New(self);
4015 PyObject *tmp;
4016 _Py_IDENTIFIER(update);
4017
4018 if (result == NULL)
4019 return NULL;
4020
4021 tmp = _PyObject_CallMethodIdObjArgs(result, &PyId_update, other, NULL);
4022 if (tmp == NULL) {
4023 Py_DECREF(result);
4024 return NULL;
4025 }
4026
4027 Py_DECREF(tmp);
4028 return result;
4029 }
4030
4031 static PyObject*
dictviews_xor(PyObject * self,PyObject * other)4032 dictviews_xor(PyObject* self, PyObject *other)
4033 {
4034 PyObject *result = PySet_New(self);
4035 PyObject *tmp;
4036 _Py_IDENTIFIER(symmetric_difference_update);
4037
4038 if (result == NULL)
4039 return NULL;
4040
4041 tmp = _PyObject_CallMethodIdObjArgs(result, &PyId_symmetric_difference_update, other, NULL);
4042 if (tmp == NULL) {
4043 Py_DECREF(result);
4044 return NULL;
4045 }
4046
4047 Py_DECREF(tmp);
4048 return result;
4049 }
4050
4051 static PyNumberMethods dictviews_as_number = {
4052 0, /*nb_add*/
4053 (binaryfunc)dictviews_sub, /*nb_subtract*/
4054 0, /*nb_multiply*/
4055 0, /*nb_remainder*/
4056 0, /*nb_divmod*/
4057 0, /*nb_power*/
4058 0, /*nb_negative*/
4059 0, /*nb_positive*/
4060 0, /*nb_absolute*/
4061 0, /*nb_bool*/
4062 0, /*nb_invert*/
4063 0, /*nb_lshift*/
4064 0, /*nb_rshift*/
4065 (binaryfunc)_PyDictView_Intersect, /*nb_and*/
4066 (binaryfunc)dictviews_xor, /*nb_xor*/
4067 (binaryfunc)dictviews_or, /*nb_or*/
4068 };
4069
4070 static PyObject*
dictviews_isdisjoint(PyObject * self,PyObject * other)4071 dictviews_isdisjoint(PyObject *self, PyObject *other)
4072 {
4073 PyObject *it;
4074 PyObject *item = NULL;
4075
4076 if (self == other) {
4077 if (dictview_len((_PyDictViewObject *)self) == 0)
4078 Py_RETURN_TRUE;
4079 else
4080 Py_RETURN_FALSE;
4081 }
4082
4083 /* Iterate over the shorter object (only if other is a set,
4084 * because PySequence_Contains may be expensive otherwise): */
4085 if (PyAnySet_Check(other) || PyDictViewSet_Check(other)) {
4086 Py_ssize_t len_self = dictview_len((_PyDictViewObject *)self);
4087 Py_ssize_t len_other = PyObject_Size(other);
4088 if (len_other == -1)
4089 return NULL;
4090
4091 if ((len_other > len_self)) {
4092 PyObject *tmp = other;
4093 other = self;
4094 self = tmp;
4095 }
4096 }
4097
4098 it = PyObject_GetIter(other);
4099 if (it == NULL)
4100 return NULL;
4101
4102 while ((item = PyIter_Next(it)) != NULL) {
4103 int contains = PySequence_Contains(self, item);
4104 Py_DECREF(item);
4105 if (contains == -1) {
4106 Py_DECREF(it);
4107 return NULL;
4108 }
4109
4110 if (contains) {
4111 Py_DECREF(it);
4112 Py_RETURN_FALSE;
4113 }
4114 }
4115 Py_DECREF(it);
4116 if (PyErr_Occurred())
4117 return NULL; /* PyIter_Next raised an exception. */
4118 Py_RETURN_TRUE;
4119 }
4120
4121 PyDoc_STRVAR(isdisjoint_doc,
4122 "Return True if the view and the given iterable have a null intersection.");
4123
4124 static PyMethodDef dictkeys_methods[] = {
4125 {"isdisjoint", (PyCFunction)dictviews_isdisjoint, METH_O,
4126 isdisjoint_doc},
4127 {NULL, NULL} /* sentinel */
4128 };
4129
4130 PyTypeObject PyDictKeys_Type = {
4131 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4132 "dict_keys", /* tp_name */
4133 sizeof(_PyDictViewObject), /* tp_basicsize */
4134 0, /* tp_itemsize */
4135 /* methods */
4136 (destructor)dictview_dealloc, /* tp_dealloc */
4137 0, /* tp_print */
4138 0, /* tp_getattr */
4139 0, /* tp_setattr */
4140 0, /* tp_reserved */
4141 (reprfunc)dictview_repr, /* tp_repr */
4142 &dictviews_as_number, /* tp_as_number */
4143 &dictkeys_as_sequence, /* tp_as_sequence */
4144 0, /* tp_as_mapping */
4145 0, /* tp_hash */
4146 0, /* tp_call */
4147 0, /* tp_str */
4148 PyObject_GenericGetAttr, /* tp_getattro */
4149 0, /* tp_setattro */
4150 0, /* tp_as_buffer */
4151 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4152 0, /* tp_doc */
4153 (traverseproc)dictview_traverse, /* tp_traverse */
4154 0, /* tp_clear */
4155 dictview_richcompare, /* tp_richcompare */
4156 0, /* tp_weaklistoffset */
4157 (getiterfunc)dictkeys_iter, /* tp_iter */
4158 0, /* tp_iternext */
4159 dictkeys_methods, /* tp_methods */
4160 0,
4161 };
4162
4163 static PyObject *
dictkeys_new(PyObject * dict)4164 dictkeys_new(PyObject *dict)
4165 {
4166 return _PyDictView_New(dict, &PyDictKeys_Type);
4167 }
4168
4169 /*** dict_items ***/
4170
4171 static PyObject *
dictitems_iter(_PyDictViewObject * dv)4172 dictitems_iter(_PyDictViewObject *dv)
4173 {
4174 if (dv->dv_dict == NULL) {
4175 Py_RETURN_NONE;
4176 }
4177 return dictiter_new(dv->dv_dict, &PyDictIterItem_Type);
4178 }
4179
4180 static int
dictitems_contains(_PyDictViewObject * dv,PyObject * obj)4181 dictitems_contains(_PyDictViewObject *dv, PyObject *obj)
4182 {
4183 PyObject *key, *value, *found;
4184 if (dv->dv_dict == NULL)
4185 return 0;
4186 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 2)
4187 return 0;
4188 key = PyTuple_GET_ITEM(obj, 0);
4189 value = PyTuple_GET_ITEM(obj, 1);
4190 found = PyDict_GetItemWithError((PyObject *)dv->dv_dict, key);
4191 if (found == NULL) {
4192 if (PyErr_Occurred())
4193 return -1;
4194 return 0;
4195 }
4196 return PyObject_RichCompareBool(value, found, Py_EQ);
4197 }
4198
4199 static PySequenceMethods dictitems_as_sequence = {
4200 (lenfunc)dictview_len, /* sq_length */
4201 0, /* sq_concat */
4202 0, /* sq_repeat */
4203 0, /* sq_item */
4204 0, /* sq_slice */
4205 0, /* sq_ass_item */
4206 0, /* sq_ass_slice */
4207 (objobjproc)dictitems_contains, /* sq_contains */
4208 };
4209
4210 static PyMethodDef dictitems_methods[] = {
4211 {"isdisjoint", (PyCFunction)dictviews_isdisjoint, METH_O,
4212 isdisjoint_doc},
4213 {NULL, NULL} /* sentinel */
4214 };
4215
4216 PyTypeObject PyDictItems_Type = {
4217 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4218 "dict_items", /* tp_name */
4219 sizeof(_PyDictViewObject), /* tp_basicsize */
4220 0, /* tp_itemsize */
4221 /* methods */
4222 (destructor)dictview_dealloc, /* tp_dealloc */
4223 0, /* tp_print */
4224 0, /* tp_getattr */
4225 0, /* tp_setattr */
4226 0, /* tp_reserved */
4227 (reprfunc)dictview_repr, /* tp_repr */
4228 &dictviews_as_number, /* tp_as_number */
4229 &dictitems_as_sequence, /* tp_as_sequence */
4230 0, /* tp_as_mapping */
4231 0, /* tp_hash */
4232 0, /* tp_call */
4233 0, /* tp_str */
4234 PyObject_GenericGetAttr, /* tp_getattro */
4235 0, /* tp_setattro */
4236 0, /* tp_as_buffer */
4237 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4238 0, /* tp_doc */
4239 (traverseproc)dictview_traverse, /* tp_traverse */
4240 0, /* tp_clear */
4241 dictview_richcompare, /* tp_richcompare */
4242 0, /* tp_weaklistoffset */
4243 (getiterfunc)dictitems_iter, /* tp_iter */
4244 0, /* tp_iternext */
4245 dictitems_methods, /* tp_methods */
4246 0,
4247 };
4248
4249 static PyObject *
dictitems_new(PyObject * dict)4250 dictitems_new(PyObject *dict)
4251 {
4252 return _PyDictView_New(dict, &PyDictItems_Type);
4253 }
4254
4255 /*** dict_values ***/
4256
4257 static PyObject *
dictvalues_iter(_PyDictViewObject * dv)4258 dictvalues_iter(_PyDictViewObject *dv)
4259 {
4260 if (dv->dv_dict == NULL) {
4261 Py_RETURN_NONE;
4262 }
4263 return dictiter_new(dv->dv_dict, &PyDictIterValue_Type);
4264 }
4265
4266 static PySequenceMethods dictvalues_as_sequence = {
4267 (lenfunc)dictview_len, /* sq_length */
4268 0, /* sq_concat */
4269 0, /* sq_repeat */
4270 0, /* sq_item */
4271 0, /* sq_slice */
4272 0, /* sq_ass_item */
4273 0, /* sq_ass_slice */
4274 (objobjproc)0, /* sq_contains */
4275 };
4276
4277 static PyMethodDef dictvalues_methods[] = {
4278 {NULL, NULL} /* sentinel */
4279 };
4280
4281 PyTypeObject PyDictValues_Type = {
4282 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4283 "dict_values", /* tp_name */
4284 sizeof(_PyDictViewObject), /* tp_basicsize */
4285 0, /* tp_itemsize */
4286 /* methods */
4287 (destructor)dictview_dealloc, /* tp_dealloc */
4288 0, /* tp_print */
4289 0, /* tp_getattr */
4290 0, /* tp_setattr */
4291 0, /* tp_reserved */
4292 (reprfunc)dictview_repr, /* tp_repr */
4293 0, /* tp_as_number */
4294 &dictvalues_as_sequence, /* tp_as_sequence */
4295 0, /* tp_as_mapping */
4296 0, /* tp_hash */
4297 0, /* tp_call */
4298 0, /* tp_str */
4299 PyObject_GenericGetAttr, /* tp_getattro */
4300 0, /* tp_setattro */
4301 0, /* tp_as_buffer */
4302 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
4303 0, /* tp_doc */
4304 (traverseproc)dictview_traverse, /* tp_traverse */
4305 0, /* tp_clear */
4306 0, /* tp_richcompare */
4307 0, /* tp_weaklistoffset */
4308 (getiterfunc)dictvalues_iter, /* tp_iter */
4309 0, /* tp_iternext */
4310 dictvalues_methods, /* tp_methods */
4311 0,
4312 };
4313
4314 static PyObject *
dictvalues_new(PyObject * dict)4315 dictvalues_new(PyObject *dict)
4316 {
4317 return _PyDictView_New(dict, &PyDictValues_Type);
4318 }
4319
4320 /* Returns NULL if cannot allocate a new PyDictKeysObject,
4321 but does not set an error */
4322 PyDictKeysObject *
_PyDict_NewKeysForClass(void)4323 _PyDict_NewKeysForClass(void)
4324 {
4325 PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE);
4326 if (keys == NULL)
4327 PyErr_Clear();
4328 else
4329 keys->dk_lookup = lookdict_split;
4330 return keys;
4331 }
4332
4333 #define CACHED_KEYS(tp) (((PyHeapTypeObject*)tp)->ht_cached_keys)
4334
4335 PyObject *
PyObject_GenericGetDict(PyObject * obj,void * context)4336 PyObject_GenericGetDict(PyObject *obj, void *context)
4337 {
4338 PyObject *dict, **dictptr = _PyObject_GetDictPtr(obj);
4339 if (dictptr == NULL) {
4340 PyErr_SetString(PyExc_AttributeError,
4341 "This object has no __dict__");
4342 return NULL;
4343 }
4344 dict = *dictptr;
4345 if (dict == NULL) {
4346 PyTypeObject *tp = Py_TYPE(obj);
4347 if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && CACHED_KEYS(tp)) {
4348 DK_INCREF(CACHED_KEYS(tp));
4349 *dictptr = dict = new_dict_with_shared_keys(CACHED_KEYS(tp));
4350 }
4351 else {
4352 *dictptr = dict = PyDict_New();
4353 }
4354 }
4355 Py_XINCREF(dict);
4356 return dict;
4357 }
4358
4359 int
_PyObjectDict_SetItem(PyTypeObject * tp,PyObject ** dictptr,PyObject * key,PyObject * value)4360 _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
4361 PyObject *key, PyObject *value)
4362 {
4363 PyObject *dict;
4364 int res;
4365 PyDictKeysObject *cached;
4366
4367 assert(dictptr != NULL);
4368 if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && (cached = CACHED_KEYS(tp))) {
4369 assert(dictptr != NULL);
4370 dict = *dictptr;
4371 if (dict == NULL) {
4372 DK_INCREF(cached);
4373 dict = new_dict_with_shared_keys(cached);
4374 if (dict == NULL)
4375 return -1;
4376 *dictptr = dict;
4377 }
4378 if (value == NULL) {
4379 res = PyDict_DelItem(dict, key);
4380 // Since key sharing dict doesn't allow deletion, PyDict_DelItem()
4381 // always converts dict to combined form.
4382 if ((cached = CACHED_KEYS(tp)) != NULL) {
4383 CACHED_KEYS(tp) = NULL;
4384 DK_DECREF(cached);
4385 }
4386 }
4387 else {
4388 int was_shared = (cached == ((PyDictObject *)dict)->ma_keys);
4389 res = PyDict_SetItem(dict, key, value);
4390 if (was_shared &&
4391 (cached = CACHED_KEYS(tp)) != NULL &&
4392 cached != ((PyDictObject *)dict)->ma_keys) {
4393 /* PyDict_SetItem() may call dictresize and convert split table
4394 * into combined table. In such case, convert it to split
4395 * table again and update type's shared key only when this is
4396 * the only dict sharing key with the type.
4397 *
4398 * This is to allow using shared key in class like this:
4399 *
4400 * class C:
4401 * def __init__(self):
4402 * # one dict resize happens
4403 * self.a, self.b, self.c = 1, 2, 3
4404 * self.d, self.e, self.f = 4, 5, 6
4405 * a = C()
4406 */
4407 if (cached->dk_refcnt == 1) {
4408 CACHED_KEYS(tp) = make_keys_shared(dict);
4409 }
4410 else {
4411 CACHED_KEYS(tp) = NULL;
4412 }
4413 DK_DECREF(cached);
4414 if (CACHED_KEYS(tp) == NULL && PyErr_Occurred())
4415 return -1;
4416 }
4417 }
4418 } else {
4419 dict = *dictptr;
4420 if (dict == NULL) {
4421 dict = PyDict_New();
4422 if (dict == NULL)
4423 return -1;
4424 *dictptr = dict;
4425 }
4426 if (value == NULL) {
4427 res = PyDict_DelItem(dict, key);
4428 } else {
4429 res = PyDict_SetItem(dict, key, value);
4430 }
4431 }
4432 return res;
4433 }
4434
4435 void
_PyDictKeys_DecRef(PyDictKeysObject * keys)4436 _PyDictKeys_DecRef(PyDictKeysObject *keys)
4437 {
4438 DK_DECREF(keys);
4439 }
4440