1
2 /* Dictionary object implementation using a hash table */
3
4 /* The distribution includes a separate file, Objects/dictnotes.txt,
5 describing explorations into dictionary design and optimization.
6 It covers typical dictionary use patterns, the parameters for
7 tuning dictionaries, and several ideas for possible optimizations.
8 */
9
10 #include "Python.h"
11
12
13 /* Set a key error with the specified argument, wrapping it in a
14 * tuple automatically so that tuple keys are not unpacked as the
15 * exception arguments. */
16 static void
set_key_error(PyObject * arg)17 set_key_error(PyObject *arg)
18 {
19 PyObject *tup;
20 tup = PyTuple_Pack(1, arg);
21 if (!tup)
22 return; /* caller will expect error to be set anyway */
23 PyErr_SetObject(PyExc_KeyError, tup);
24 Py_DECREF(tup);
25 }
26
27 /* Define this out if you don't want conversion statistics on exit. */
28 #undef SHOW_CONVERSION_COUNTS
29
30 /* See large comment block below. This must be >= 1. */
31 #define PERTURB_SHIFT 5
32
33 /*
34 Major subtleties ahead: Most hash schemes depend on having a "good" hash
35 function, in the sense of simulating randomness. Python doesn't: its most
36 important hash functions (for strings and ints) are very regular in common
37 cases:
38
39 >>> map(hash, (0, 1, 2, 3))
40 [0, 1, 2, 3]
41 >>> map(hash, ("namea", "nameb", "namec", "named"))
42 [-1658398457, -1658398460, -1658398459, -1658398462]
43 >>>
44
45 This isn't necessarily bad! To the contrary, in a table of size 2**i, taking
46 the low-order i bits as the initial table index is extremely fast, and there
47 are no collisions at all for dicts indexed by a contiguous range of ints.
48 The same is approximately true when keys are "consecutive" strings. So this
49 gives better-than-random behavior in common cases, and that's very desirable.
50
51 OTOH, when collisions occur, the tendency to fill contiguous slices of the
52 hash table makes a good collision resolution strategy crucial. Taking only
53 the last i bits of the hash code is also vulnerable: for example, consider
54 [i << 16 for i in range(20000)] as a set of keys. Since ints are their own
55 hash codes, and this fits in a dict of size 2**15, the last 15 bits of every
56 hash code are all 0: they *all* map to the same table index.
57
58 But catering to unusual cases should not slow the usual ones, so we just take
59 the last i bits anyway. It's up to collision resolution to do the rest. If
60 we *usually* find the key we're looking for on the first try (and, it turns
61 out, we usually do -- the table load factor is kept under 2/3, so the odds
62 are solidly in our favor), then it makes best sense to keep the initial index
63 computation dirt cheap.
64
65 The first half of collision resolution is to visit table indices via this
66 recurrence:
67
68 j = ((5*j) + 1) mod 2**i
69
70 For any initial j in range(2**i), repeating that 2**i times generates each
71 int in range(2**i) exactly once (see any text on random-number generation for
72 proof). By itself, this doesn't help much: like linear probing (setting
73 j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed
74 order. This would be bad, except that's not the only thing we do, and it's
75 actually *good* in the common cases where hash keys are consecutive. In an
76 example that's really too small to make this entirely clear, for a table of
77 size 2**3 the order of indices is:
78
79 0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating]
80
81 If two things come in at index 5, the first place we look after is index 2,
82 not 6, so if another comes in at index 6 the collision at 5 didn't hurt it.
83 Linear probing is deadly in this case because there the fixed probe order
84 is the *same* as the order consecutive keys are likely to arrive. But it's
85 extremely unlikely hash codes will follow a 5*j+1 recurrence by accident,
86 and certain that consecutive hash codes do not.
87
88 The other half of the strategy is to get the other bits of the hash code
89 into play. This is done by initializing a (unsigned) vrbl "perturb" to the
90 full hash code, and changing the recurrence to:
91
92 j = (5*j) + 1 + perturb;
93 perturb >>= PERTURB_SHIFT;
94 use j % 2**i as the next table index;
95
96 Now the probe sequence depends (eventually) on every bit in the hash code,
97 and the pseudo-scrambling property of recurring on 5*j+1 is more valuable,
98 because it quickly magnifies small differences in the bits that didn't affect
99 the initial index. Note that because perturb is unsigned, if the recurrence
100 is executed often enough perturb eventually becomes and remains 0. At that
101 point (very rarely reached) the recurrence is on (just) 5*j+1 again, and
102 that's certain to find an empty slot eventually (since it generates every int
103 in range(2**i), and we make sure there's always at least one empty slot).
104
105 Selecting a good value for PERTURB_SHIFT is a balancing act. You want it
106 small so that the high bits of the hash code continue to affect the probe
107 sequence across iterations; but you want it large so that in really bad cases
108 the high-order hash bits have an effect on early iterations. 5 was "the
109 best" in minimizing total collisions across experiments Tim Peters ran (on
110 both normal and pathological cases), but 4 and 6 weren't significantly worse.
111
112 Historical: Reimer Behrends contributed the idea of using a polynomial-based
113 approach, using repeated multiplication by x in GF(2**n) where an irreducible
114 polynomial for each table size was chosen such that x was a primitive root.
115 Christian Tismer later extended that to use division by x instead, as an
116 efficient way to get the high bits of the hash code into play. This scheme
117 also gave excellent collision statistics, but was more expensive: two
118 if-tests were required inside the loop; computing "the next" index took about
119 the same number of operations but without as much potential parallelism
120 (e.g., computing 5*j can go on at the same time as computing 1+perturb in the
121 above, and then shifting perturb can be done while the table index is being
122 masked); and the PyDictObject struct required a member to hold the table's
123 polynomial. In Tim's experiments the current scheme ran faster, produced
124 equally good collision statistics, needed less code & used less memory.
125
126 Theoretical Python 2.5 headache: hash codes are only C "long", but
127 sizeof(Py_ssize_t) > sizeof(long) may be possible. In that case, and if a
128 dict is genuinely huge, then only the slots directly reachable via indexing
129 by a C long can be the first slot in a probe sequence. The probe sequence
130 will still eventually reach every slot in the table, but the collision rate
131 on initial probes may be much higher than this scheme was designed for.
132 Getting a hash code as fat as Py_ssize_t is the only real cure. But in
133 practice, this probably won't make a lick of difference for many years (at
134 which point everyone will have terabytes of RAM on 64-bit boxes).
135 */
136
137 /* Object used as dummy key to fill deleted entries */
138 static PyObject *dummy = NULL; /* Initialized by first call to newPyDictObject() */
139
140 #ifdef Py_REF_DEBUG
141 PyObject *
_PyDict_Dummy(void)142 _PyDict_Dummy(void)
143 {
144 return dummy;
145 }
146 #endif
147
148 /* forward declarations */
149 static PyDictEntry *
150 lookdict_string(PyDictObject *mp, PyObject *key, long hash);
151
152 #ifdef SHOW_CONVERSION_COUNTS
153 static long created = 0L;
154 static long converted = 0L;
155
156 static void
show_counts(void)157 show_counts(void)
158 {
159 fprintf(stderr, "created %ld string dicts\n", created);
160 fprintf(stderr, "converted %ld to normal dicts\n", converted);
161 fprintf(stderr, "%.2f%% conversion rate\n", (100.0*converted)/created);
162 }
163 #endif
164
165 /* Debug statistic to compare allocations with reuse through the free list */
166 #undef SHOW_ALLOC_COUNT
167 #ifdef SHOW_ALLOC_COUNT
168 static size_t count_alloc = 0;
169 static size_t count_reuse = 0;
170
171 static void
show_alloc(void)172 show_alloc(void)
173 {
174 fprintf(stderr, "Dict allocations: %" PY_FORMAT_SIZE_T "d\n",
175 count_alloc);
176 fprintf(stderr, "Dict reuse through freelist: %" PY_FORMAT_SIZE_T
177 "d\n", count_reuse);
178 fprintf(stderr, "%.2f%% reuse rate\n\n",
179 (100.0*count_reuse/(count_alloc+count_reuse)));
180 }
181 #endif
182
183 /* Debug statistic to count GC tracking of dicts */
184 #ifdef SHOW_TRACK_COUNT
185 static Py_ssize_t count_untracked = 0;
186 static Py_ssize_t count_tracked = 0;
187
188 static void
show_track(void)189 show_track(void)
190 {
191 fprintf(stderr, "Dicts created: %" PY_FORMAT_SIZE_T "d\n",
192 count_tracked + count_untracked);
193 fprintf(stderr, "Dicts tracked by the GC: %" PY_FORMAT_SIZE_T
194 "d\n", count_tracked);
195 fprintf(stderr, "%.2f%% dict tracking rate\n\n",
196 (100.0*count_tracked/(count_untracked+count_tracked)));
197 }
198 #endif
199
200
201 /* Initialization macros.
202 There are two ways to create a dict: PyDict_New() is the main C API
203 function, and the tp_new slot maps to dict_new(). In the latter case we
204 can save a little time over what PyDict_New does because it's guaranteed
205 that the PyDictObject struct is already zeroed out.
206 Everyone except dict_new() should use EMPTY_TO_MINSIZE (unless they have
207 an excellent reason not to).
208 */
209
210 #define INIT_NONZERO_DICT_SLOTS(mp) do { \
211 (mp)->ma_table = (mp)->ma_smalltable; \
212 (mp)->ma_mask = PyDict_MINSIZE - 1; \
213 } while(0)
214
215 #define EMPTY_TO_MINSIZE(mp) do { \
216 memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable)); \
217 (mp)->ma_used = (mp)->ma_fill = 0; \
218 INIT_NONZERO_DICT_SLOTS(mp); \
219 } while(0)
220
221 /* Dictionary reuse scheme to save calls to malloc, free, and memset */
222 #ifndef PyDict_MAXFREELIST
223 #define PyDict_MAXFREELIST 80
224 #endif
225 static PyDictObject *free_list[PyDict_MAXFREELIST];
226 static int numfree = 0;
227
228 void
PyDict_Fini(void)229 PyDict_Fini(void)
230 {
231 PyDictObject *op;
232
233 while (numfree) {
234 op = free_list[--numfree];
235 assert(PyDict_CheckExact(op));
236 PyObject_GC_Del(op);
237 }
238 }
239
240 PyObject *
PyDict_New(void)241 PyDict_New(void)
242 {
243 register PyDictObject *mp;
244 if (dummy == NULL) { /* Auto-initialize dummy */
245 dummy = PyString_FromString("<dummy key>");
246 if (dummy == NULL)
247 return NULL;
248 #ifdef SHOW_CONVERSION_COUNTS
249 Py_AtExit(show_counts);
250 #endif
251 #ifdef SHOW_ALLOC_COUNT
252 Py_AtExit(show_alloc);
253 #endif
254 #ifdef SHOW_TRACK_COUNT
255 Py_AtExit(show_track);
256 #endif
257 }
258 if (numfree) {
259 mp = free_list[--numfree];
260 assert (mp != NULL);
261 assert (Py_TYPE(mp) == &PyDict_Type);
262 _Py_NewReference((PyObject *)mp);
263 if (mp->ma_fill) {
264 EMPTY_TO_MINSIZE(mp);
265 } else {
266 /* At least set ma_table and ma_mask; these are wrong
267 if an empty but presized dict is added to freelist */
268 INIT_NONZERO_DICT_SLOTS(mp);
269 }
270 assert (mp->ma_used == 0);
271 assert (mp->ma_table == mp->ma_smalltable);
272 assert (mp->ma_mask == PyDict_MINSIZE - 1);
273 #ifdef SHOW_ALLOC_COUNT
274 count_reuse++;
275 #endif
276 } else {
277 mp = PyObject_GC_New(PyDictObject, &PyDict_Type);
278 if (mp == NULL)
279 return NULL;
280 EMPTY_TO_MINSIZE(mp);
281 #ifdef SHOW_ALLOC_COUNT
282 count_alloc++;
283 #endif
284 }
285 mp->ma_lookup = lookdict_string;
286 #ifdef SHOW_TRACK_COUNT
287 count_untracked++;
288 #endif
289 #ifdef SHOW_CONVERSION_COUNTS
290 ++created;
291 #endif
292 return (PyObject *)mp;
293 }
294
295 /*
296 The basic lookup function used by all operations.
297 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
298 Open addressing is preferred over chaining since the link overhead for
299 chaining would be substantial (100% with typical malloc overhead).
300
301 The initial probe index is computed as hash mod the table size. Subsequent
302 probe indices are computed as explained earlier.
303
304 All arithmetic on hash should ignore overflow.
305
306 (The details in this version are due to Tim Peters, building on many past
307 contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
308 Christian Tismer).
309
310 lookdict() is general-purpose, and may return NULL if (and only if) a
311 comparison raises an exception (this was new in Python 2.5).
312 lookdict_string() below is specialized to string keys, comparison of which can
313 never raise an exception; that function can never return NULL. For both, when
314 the key isn't found a PyDictEntry* is returned for which the me_value field is
315 NULL; this is the slot in the dict at which the key would have been found, and
316 the caller can (if it wishes) add the <key, value> pair to the returned
317 PyDictEntry*.
318 */
319 static PyDictEntry *
lookdict(PyDictObject * mp,PyObject * key,register long hash)320 lookdict(PyDictObject *mp, PyObject *key, register long hash)
321 {
322 register size_t i;
323 register size_t perturb;
324 register PyDictEntry *freeslot;
325 register size_t mask = (size_t)mp->ma_mask;
326 PyDictEntry *ep0 = mp->ma_table;
327 register PyDictEntry *ep;
328 register int cmp;
329 PyObject *startkey;
330
331 i = (size_t)hash & mask;
332 ep = &ep0[i];
333 if (ep->me_key == NULL || ep->me_key == key)
334 return ep;
335
336 if (ep->me_key == dummy)
337 freeslot = ep;
338 else {
339 if (ep->me_hash == hash) {
340 startkey = ep->me_key;
341 Py_INCREF(startkey);
342 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
343 Py_DECREF(startkey);
344 if (cmp < 0)
345 return NULL;
346 if (ep0 == mp->ma_table && ep->me_key == startkey) {
347 if (cmp > 0)
348 return ep;
349 }
350 else {
351 /* The compare did major nasty stuff to the
352 * dict: start over.
353 * XXX A clever adversary could prevent this
354 * XXX from terminating.
355 */
356 return lookdict(mp, key, hash);
357 }
358 }
359 freeslot = NULL;
360 }
361
362 /* In the loop, me_key == dummy is by far (factor of 100s) the
363 least likely outcome, so test for that last. */
364 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
365 i = (i << 2) + i + perturb + 1;
366 ep = &ep0[i & mask];
367 if (ep->me_key == NULL)
368 return freeslot == NULL ? ep : freeslot;
369 if (ep->me_key == key)
370 return ep;
371 if (ep->me_hash == hash && ep->me_key != dummy) {
372 startkey = ep->me_key;
373 Py_INCREF(startkey);
374 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
375 Py_DECREF(startkey);
376 if (cmp < 0)
377 return NULL;
378 if (ep0 == mp->ma_table && ep->me_key == startkey) {
379 if (cmp > 0)
380 return ep;
381 }
382 else {
383 /* The compare did major nasty stuff to the
384 * dict: start over.
385 * XXX A clever adversary could prevent this
386 * XXX from terminating.
387 */
388 return lookdict(mp, key, hash);
389 }
390 }
391 else if (ep->me_key == dummy && freeslot == NULL)
392 freeslot = ep;
393 }
394 assert(0); /* NOT REACHED */
395 return 0;
396 }
397
398 /*
399 * Hacked up version of lookdict which can assume keys are always strings;
400 * this assumption allows testing for errors during PyObject_RichCompareBool()
401 * to be dropped; string-string comparisons never raise exceptions. This also
402 * means we don't need to go through PyObject_RichCompareBool(); we can always
403 * use _PyString_Eq() directly.
404 *
405 * This is valuable because dicts with only string keys are very common.
406 */
407 static PyDictEntry *
lookdict_string(PyDictObject * mp,PyObject * key,register long hash)408 lookdict_string(PyDictObject *mp, PyObject *key, register long hash)
409 {
410 register size_t i;
411 register size_t perturb;
412 register PyDictEntry *freeslot;
413 register size_t mask = (size_t)mp->ma_mask;
414 PyDictEntry *ep0 = mp->ma_table;
415 register PyDictEntry *ep;
416
417 /* Make sure this function doesn't have to handle non-string keys,
418 including subclasses of str; e.g., one reason to subclass
419 strings is to override __eq__, and for speed we don't cater to
420 that here. */
421 if (!PyString_CheckExact(key)) {
422 #ifdef SHOW_CONVERSION_COUNTS
423 ++converted;
424 #endif
425 mp->ma_lookup = lookdict;
426 return lookdict(mp, key, hash);
427 }
428 i = hash & mask;
429 ep = &ep0[i];
430 if (ep->me_key == NULL || ep->me_key == key)
431 return ep;
432 if (ep->me_key == dummy)
433 freeslot = ep;
434 else {
435 if (ep->me_hash == hash && _PyString_Eq(ep->me_key, key))
436 return ep;
437 freeslot = NULL;
438 }
439
440 /* In the loop, me_key == dummy is by far (factor of 100s) the
441 least likely outcome, so test for that last. */
442 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
443 i = (i << 2) + i + perturb + 1;
444 ep = &ep0[i & mask];
445 if (ep->me_key == NULL)
446 return freeslot == NULL ? ep : freeslot;
447 if (ep->me_key == key
448 || (ep->me_hash == hash
449 && ep->me_key != dummy
450 && _PyString_Eq(ep->me_key, key)))
451 return ep;
452 if (ep->me_key == dummy && freeslot == NULL)
453 freeslot = ep;
454 }
455 assert(0); /* NOT REACHED */
456 return 0;
457 }
458
459 #ifdef SHOW_TRACK_COUNT
460 #define INCREASE_TRACK_COUNT \
461 (count_tracked++, count_untracked--);
462 #define DECREASE_TRACK_COUNT \
463 (count_tracked--, count_untracked++);
464 #else
465 #define INCREASE_TRACK_COUNT
466 #define DECREASE_TRACK_COUNT
467 #endif
468
469 #define MAINTAIN_TRACKING(mp, key, value) \
470 do { \
471 if (!_PyObject_GC_IS_TRACKED(mp)) { \
472 if (_PyObject_GC_MAY_BE_TRACKED(key) || \
473 _PyObject_GC_MAY_BE_TRACKED(value)) { \
474 _PyObject_GC_TRACK(mp); \
475 INCREASE_TRACK_COUNT \
476 } \
477 } \
478 } while(0)
479
480 void
_PyDict_MaybeUntrack(PyObject * op)481 _PyDict_MaybeUntrack(PyObject *op)
482 {
483 PyDictObject *mp;
484 PyObject *value;
485 Py_ssize_t mask, i;
486 PyDictEntry *ep;
487
488 if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
489 return;
490
491 mp = (PyDictObject *) op;
492 ep = mp->ma_table;
493 mask = mp->ma_mask;
494 for (i = 0; i <= mask; i++) {
495 if ((value = ep[i].me_value) == NULL)
496 continue;
497 if (_PyObject_GC_MAY_BE_TRACKED(value) ||
498 _PyObject_GC_MAY_BE_TRACKED(ep[i].me_key))
499 return;
500 }
501 DECREASE_TRACK_COUNT
502 _PyObject_GC_UNTRACK(op);
503 }
504
505 /*
506 Internal routine to insert a new item into the table when you have entry object.
507 Used by insertdict.
508 */
509 static int
insertdict_by_entry(register PyDictObject * mp,PyObject * key,long hash,PyDictEntry * ep,PyObject * value)510 insertdict_by_entry(register PyDictObject *mp, PyObject *key, long hash,
511 PyDictEntry *ep, PyObject *value)
512 {
513 PyObject *old_value;
514
515 MAINTAIN_TRACKING(mp, key, value);
516 if (ep->me_value != NULL) {
517 old_value = ep->me_value;
518 ep->me_value = value;
519 Py_DECREF(old_value); /* which **CAN** re-enter */
520 Py_DECREF(key);
521 }
522 else {
523 if (ep->me_key == NULL)
524 mp->ma_fill++;
525 else {
526 assert(ep->me_key == dummy);
527 Py_DECREF(dummy);
528 }
529 ep->me_key = key;
530 ep->me_hash = (Py_ssize_t)hash;
531 ep->me_value = value;
532 mp->ma_used++;
533 }
534 return 0;
535 }
536
537
538 /*
539 Internal routine to insert a new item into the table.
540 Used both by the internal resize routine and by the public insert routine.
541 Eats a reference to key and one to value.
542 Returns -1 if an error occurred, or 0 on success.
543 */
544 static int
insertdict(register PyDictObject * mp,PyObject * key,long hash,PyObject * value)545 insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
546 {
547 register PyDictEntry *ep;
548
549 assert(mp->ma_lookup != NULL);
550 ep = mp->ma_lookup(mp, key, hash);
551 if (ep == NULL) {
552 Py_DECREF(key);
553 Py_DECREF(value);
554 return -1;
555 }
556 return insertdict_by_entry(mp, key, hash, ep, value);
557 }
558
559 /*
560 Internal routine used by dictresize() to insert an item which is
561 known to be absent from the dict. This routine also assumes that
562 the dict contains no deleted entries. Besides the performance benefit,
563 using insertdict() in dictresize() is dangerous (SF bug #1456209).
564 Note that no refcounts are changed by this routine; if needed, the caller
565 is responsible for incref'ing `key` and `value`.
566 */
567 static void
insertdict_clean(register PyDictObject * mp,PyObject * key,long hash,PyObject * value)568 insertdict_clean(register PyDictObject *mp, PyObject *key, long hash,
569 PyObject *value)
570 {
571 register size_t i;
572 register size_t perturb;
573 register size_t mask = (size_t)mp->ma_mask;
574 PyDictEntry *ep0 = mp->ma_table;
575 register PyDictEntry *ep;
576
577 MAINTAIN_TRACKING(mp, key, value);
578 i = hash & mask;
579 ep = &ep0[i];
580 for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
581 i = (i << 2) + i + perturb + 1;
582 ep = &ep0[i & mask];
583 }
584 assert(ep->me_value == NULL);
585 mp->ma_fill++;
586 ep->me_key = key;
587 ep->me_hash = (Py_ssize_t)hash;
588 ep->me_value = value;
589 mp->ma_used++;
590 }
591
592 /*
593 Restructure the table by allocating a new table and reinserting all
594 items again. When entries have been deleted, the new table may
595 actually be smaller than the old one.
596 */
597 static int
dictresize(PyDictObject * mp,Py_ssize_t minused)598 dictresize(PyDictObject *mp, Py_ssize_t minused)
599 {
600 Py_ssize_t newsize;
601 PyDictEntry *oldtable, *newtable, *ep;
602 Py_ssize_t i;
603 int is_oldtable_malloced;
604 PyDictEntry small_copy[PyDict_MINSIZE];
605
606 assert(minused >= 0);
607
608 /* Find the smallest table size > minused. */
609 for (newsize = PyDict_MINSIZE;
610 newsize <= minused && newsize > 0;
611 newsize <<= 1)
612 ;
613 if (newsize <= 0) {
614 PyErr_NoMemory();
615 return -1;
616 }
617
618 /* Get space for a new table. */
619 oldtable = mp->ma_table;
620 assert(oldtable != NULL);
621 is_oldtable_malloced = oldtable != mp->ma_smalltable;
622
623 if (newsize == PyDict_MINSIZE) {
624 /* A large table is shrinking, or we can't get any smaller. */
625 newtable = mp->ma_smalltable;
626 if (newtable == oldtable) {
627 if (mp->ma_fill == mp->ma_used) {
628 /* No dummies, so no point doing anything. */
629 return 0;
630 }
631 /* We're not going to resize it, but rebuild the
632 table anyway to purge old dummy entries.
633 Subtle: This is *necessary* if fill==size,
634 as lookdict needs at least one virgin slot to
635 terminate failing searches. If fill < size, it's
636 merely desirable, as dummies slow searches. */
637 assert(mp->ma_fill > mp->ma_used);
638 memcpy(small_copy, oldtable, sizeof(small_copy));
639 oldtable = small_copy;
640 }
641 }
642 else {
643 newtable = PyMem_NEW(PyDictEntry, newsize);
644 if (newtable == NULL) {
645 PyErr_NoMemory();
646 return -1;
647 }
648 }
649
650 /* Make the dict empty, using the new table. */
651 assert(newtable != oldtable);
652 mp->ma_table = newtable;
653 mp->ma_mask = newsize - 1;
654 memset(newtable, 0, sizeof(PyDictEntry) * newsize);
655 mp->ma_used = 0;
656 i = mp->ma_fill;
657 mp->ma_fill = 0;
658
659 /* Copy the data over; this is refcount-neutral for active entries;
660 dummy entries aren't copied over, of course */
661 for (ep = oldtable; i > 0; ep++) {
662 if (ep->me_value != NULL) { /* active entry */
663 --i;
664 insertdict_clean(mp, ep->me_key, (long)ep->me_hash,
665 ep->me_value);
666 }
667 else if (ep->me_key != NULL) { /* dummy entry */
668 --i;
669 assert(ep->me_key == dummy);
670 Py_DECREF(ep->me_key);
671 }
672 /* else key == value == NULL: nothing to do */
673 }
674
675 if (is_oldtable_malloced)
676 PyMem_DEL(oldtable);
677 return 0;
678 }
679
680 /* Create a new dictionary pre-sized to hold an estimated number of elements.
681 Underestimates are okay because the dictionary will resize as necessary.
682 Overestimates just mean the dictionary will be more sparse than usual.
683 */
684
685 PyObject *
_PyDict_NewPresized(Py_ssize_t minused)686 _PyDict_NewPresized(Py_ssize_t minused)
687 {
688 PyObject *op = PyDict_New();
689
690 if (minused>5 && op != NULL && dictresize((PyDictObject *)op, minused) == -1) {
691 Py_DECREF(op);
692 return NULL;
693 }
694 return op;
695 }
696
697 /* Note that, for historical reasons, PyDict_GetItem() suppresses all errors
698 * that may occur (originally dicts supported only string keys, and exceptions
699 * weren't possible). So, while the original intent was that a NULL return
700 * meant the key wasn't present, in reality it can mean that, or that an error
701 * (suppressed) occurred while computing the key's hash, or that some error
702 * (suppressed) occurred when comparing keys in the dict's internal probe
703 * sequence. A nasty example of the latter is when a Python-coded comparison
704 * function hits a stack-depth error, which can cause this to return NULL
705 * even if the key is present.
706 */
707 PyObject *
PyDict_GetItem(PyObject * op,PyObject * key)708 PyDict_GetItem(PyObject *op, PyObject *key)
709 {
710 long hash;
711 PyDictObject *mp = (PyDictObject *)op;
712 PyDictEntry *ep;
713 PyThreadState *tstate;
714 if (!PyDict_Check(op))
715 return NULL;
716 if (!PyString_CheckExact(key) ||
717 (hash = ((PyStringObject *) key)->ob_shash) == -1)
718 {
719 hash = PyObject_Hash(key);
720 if (hash == -1) {
721 PyErr_Clear();
722 return NULL;
723 }
724 }
725
726 /* We can arrive here with a NULL tstate during initialization: try
727 running "python -Wi" for an example related to string interning.
728 Let's just hope that no exception occurs then... This must be
729 _PyThreadState_Current and not PyThreadState_GET() because in debug
730 mode, the latter complains if tstate is NULL. */
731 tstate = _PyThreadState_Current;
732 if (tstate != NULL && tstate->curexc_type != NULL) {
733 /* preserve the existing exception */
734 PyObject *err_type, *err_value, *err_tb;
735 PyErr_Fetch(&err_type, &err_value, &err_tb);
736 ep = (mp->ma_lookup)(mp, key, hash);
737 /* ignore errors */
738 PyErr_Restore(err_type, err_value, err_tb);
739 if (ep == NULL)
740 return NULL;
741 }
742 else {
743 ep = (mp->ma_lookup)(mp, key, hash);
744 if (ep == NULL) {
745 PyErr_Clear();
746 return NULL;
747 }
748 }
749 return ep->me_value;
750 }
751
752 /* Variant of PyDict_GetItem() that doesn't suppress exceptions.
753 This returns NULL *with* an exception set if an exception occurred.
754 It returns NULL *without* an exception set if the key wasn't present.
755 */
756 PyObject *
_PyDict_GetItemWithError(PyObject * op,PyObject * key)757 _PyDict_GetItemWithError(PyObject *op, PyObject *key)
758 {
759 long hash;
760 PyDictObject *mp = (PyDictObject *)op;
761 PyDictEntry *ep;
762 if (!PyDict_Check(op)) {
763 PyErr_BadInternalCall();
764 return NULL;
765 }
766 if (!PyString_CheckExact(key) ||
767 (hash = ((PyStringObject *) key)->ob_shash) == -1)
768 {
769 hash = PyObject_Hash(key);
770 if (hash == -1) {
771 return NULL;
772 }
773 }
774
775 ep = (mp->ma_lookup)(mp, key, hash);
776 if (ep == NULL) {
777 return NULL;
778 }
779 return ep->me_value;
780 }
781
782 static int
dict_set_item_by_hash_or_entry(register PyObject * op,PyObject * key,long hash,PyDictEntry * ep,PyObject * value)783 dict_set_item_by_hash_or_entry(register PyObject *op, PyObject *key,
784 long hash, PyDictEntry *ep, PyObject *value)
785 {
786 register PyDictObject *mp;
787 register Py_ssize_t n_used;
788
789 mp = (PyDictObject *)op;
790 assert(mp->ma_fill <= mp->ma_mask); /* at least one empty slot */
791 n_used = mp->ma_used;
792 Py_INCREF(value);
793 Py_INCREF(key);
794 if (ep == NULL) {
795 if (insertdict(mp, key, hash, value) != 0)
796 return -1;
797 }
798 else {
799 if (insertdict_by_entry(mp, key, hash, ep, value) != 0)
800 return -1;
801 }
802 /* If we added a key, we can safely resize. Otherwise just return!
803 * If fill >= 2/3 size, adjust size. Normally, this doubles or
804 * quaduples the size, but it's also possible for the dict to shrink
805 * (if ma_fill is much larger than ma_used, meaning a lot of dict
806 * keys have been * deleted).
807 *
808 * Quadrupling the size improves average dictionary sparseness
809 * (reducing collisions) at the cost of some memory and iteration
810 * speed (which loops over every possible entry). It also halves
811 * the number of expensive resize operations in a growing dictionary.
812 *
813 * Very large dictionaries (over 50K items) use doubling instead.
814 * This may help applications with severe memory constraints.
815 */
816 if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
817 return 0;
818 return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
819 }
820
821 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
822 * dictionary if it's merely replacing the value for an existing key.
823 * This means that it's safe to loop over a dictionary with PyDict_Next()
824 * and occasionally replace a value -- but you can't insert new keys or
825 * remove them.
826 */
827 int
PyDict_SetItem(register PyObject * op,PyObject * key,PyObject * value)828 PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
829 {
830 register long hash;
831
832 if (!PyDict_Check(op)) {
833 PyErr_BadInternalCall();
834 return -1;
835 }
836 assert(key);
837 assert(value);
838 if (PyString_CheckExact(key)) {
839 hash = ((PyStringObject *)key)->ob_shash;
840 if (hash == -1)
841 hash = PyObject_Hash(key);
842 }
843 else {
844 hash = PyObject_Hash(key);
845 if (hash == -1)
846 return -1;
847 }
848 return dict_set_item_by_hash_or_entry(op, key, hash, NULL, value);
849 }
850
851 int
PyDict_DelItem(PyObject * op,PyObject * key)852 PyDict_DelItem(PyObject *op, PyObject *key)
853 {
854 register PyDictObject *mp;
855 register long hash;
856 register PyDictEntry *ep;
857 PyObject *old_value, *old_key;
858
859 if (!PyDict_Check(op)) {
860 PyErr_BadInternalCall();
861 return -1;
862 }
863 assert(key);
864 if (!PyString_CheckExact(key) ||
865 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
866 hash = PyObject_Hash(key);
867 if (hash == -1)
868 return -1;
869 }
870 mp = (PyDictObject *)op;
871 ep = (mp->ma_lookup)(mp, key, hash);
872 if (ep == NULL)
873 return -1;
874 if (ep->me_value == NULL) {
875 set_key_error(key);
876 return -1;
877 }
878 old_key = ep->me_key;
879 Py_INCREF(dummy);
880 ep->me_key = dummy;
881 old_value = ep->me_value;
882 ep->me_value = NULL;
883 mp->ma_used--;
884 Py_DECREF(old_value);
885 Py_DECREF(old_key);
886 return 0;
887 }
888
889 void
PyDict_Clear(PyObject * op)890 PyDict_Clear(PyObject *op)
891 {
892 PyDictObject *mp;
893 PyDictEntry *ep, *table;
894 int table_is_malloced;
895 Py_ssize_t fill;
896 PyDictEntry small_copy[PyDict_MINSIZE];
897 #ifdef Py_DEBUG
898 Py_ssize_t i, n;
899 #endif
900
901 if (!PyDict_Check(op))
902 return;
903 mp = (PyDictObject *)op;
904 #ifdef Py_DEBUG
905 n = mp->ma_mask + 1;
906 i = 0;
907 #endif
908
909 table = mp->ma_table;
910 assert(table != NULL);
911 table_is_malloced = table != mp->ma_smalltable;
912
913 /* This is delicate. During the process of clearing the dict,
914 * decrefs can cause the dict to mutate. To avoid fatal confusion
915 * (voice of experience), we have to make the dict empty before
916 * clearing the slots, and never refer to anything via mp->xxx while
917 * clearing.
918 */
919 fill = mp->ma_fill;
920 if (table_is_malloced)
921 EMPTY_TO_MINSIZE(mp);
922
923 else if (fill > 0) {
924 /* It's a small table with something that needs to be cleared.
925 * Afraid the only safe way is to copy the dict entries into
926 * another small table first.
927 */
928 memcpy(small_copy, table, sizeof(small_copy));
929 table = small_copy;
930 EMPTY_TO_MINSIZE(mp);
931 }
932 /* else it's a small table that's already empty */
933
934 /* Now we can finally clear things. If C had refcounts, we could
935 * assert that the refcount on table is 1 now, i.e. that this function
936 * has unique access to it, so decref side-effects can't alter it.
937 */
938 for (ep = table; fill > 0; ++ep) {
939 #ifdef Py_DEBUG
940 assert(i < n);
941 ++i;
942 #endif
943 if (ep->me_key) {
944 --fill;
945 Py_DECREF(ep->me_key);
946 Py_XDECREF(ep->me_value);
947 }
948 #ifdef Py_DEBUG
949 else
950 assert(ep->me_value == NULL);
951 #endif
952 }
953
954 if (table_is_malloced)
955 PyMem_DEL(table);
956 }
957
958 /*
959 * Iterate over a dict. Use like so:
960 *
961 * Py_ssize_t i;
962 * PyObject *key, *value;
963 * i = 0; # important! i should not otherwise be changed by you
964 * while (PyDict_Next(yourdict, &i, &key, &value)) {
965 * Refer to borrowed references in key and value.
966 * }
967 *
968 * CAUTION: In general, it isn't safe to use PyDict_Next in a loop that
969 * mutates the dict. One exception: it is safe if the loop merely changes
970 * the values associated with the keys (but doesn't insert new keys or
971 * delete keys), via PyDict_SetItem().
972 */
973 int
PyDict_Next(PyObject * op,Py_ssize_t * ppos,PyObject ** pkey,PyObject ** pvalue)974 PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
975 {
976 register Py_ssize_t i;
977 register Py_ssize_t mask;
978 register PyDictEntry *ep;
979
980 if (!PyDict_Check(op))
981 return 0;
982 i = *ppos;
983 if (i < 0)
984 return 0;
985 ep = ((PyDictObject *)op)->ma_table;
986 mask = ((PyDictObject *)op)->ma_mask;
987 while (i <= mask && ep[i].me_value == NULL)
988 i++;
989 *ppos = i+1;
990 if (i > mask)
991 return 0;
992 if (pkey)
993 *pkey = ep[i].me_key;
994 if (pvalue)
995 *pvalue = ep[i].me_value;
996 return 1;
997 }
998
999 /* Internal version of PyDict_Next that returns a hash value in addition to the key and value.*/
1000 int
_PyDict_Next(PyObject * op,Py_ssize_t * ppos,PyObject ** pkey,PyObject ** pvalue,long * phash)1001 _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue, long *phash)
1002 {
1003 register Py_ssize_t i;
1004 register Py_ssize_t mask;
1005 register PyDictEntry *ep;
1006
1007 if (!PyDict_Check(op))
1008 return 0;
1009 i = *ppos;
1010 if (i < 0)
1011 return 0;
1012 ep = ((PyDictObject *)op)->ma_table;
1013 mask = ((PyDictObject *)op)->ma_mask;
1014 while (i <= mask && ep[i].me_value == NULL)
1015 i++;
1016 *ppos = i+1;
1017 if (i > mask)
1018 return 0;
1019 *phash = (long)(ep[i].me_hash);
1020 if (pkey)
1021 *pkey = ep[i].me_key;
1022 if (pvalue)
1023 *pvalue = ep[i].me_value;
1024 return 1;
1025 }
1026
1027 /* Methods */
1028
1029 static void
dict_dealloc(register PyDictObject * mp)1030 dict_dealloc(register PyDictObject *mp)
1031 {
1032 register PyDictEntry *ep;
1033 Py_ssize_t fill = mp->ma_fill;
1034 PyObject_GC_UnTrack(mp);
1035 Py_TRASHCAN_SAFE_BEGIN(mp)
1036 for (ep = mp->ma_table; fill > 0; ep++) {
1037 if (ep->me_key) {
1038 --fill;
1039 Py_DECREF(ep->me_key);
1040 Py_XDECREF(ep->me_value);
1041 }
1042 }
1043 if (mp->ma_table != mp->ma_smalltable)
1044 PyMem_DEL(mp->ma_table);
1045 if (numfree < PyDict_MAXFREELIST && Py_TYPE(mp) == &PyDict_Type)
1046 free_list[numfree++] = mp;
1047 else
1048 Py_TYPE(mp)->tp_free((PyObject *)mp);
1049 Py_TRASHCAN_SAFE_END(mp)
1050 }
1051
1052 static int
dict_print(register PyDictObject * mp,register FILE * fp,register int flags)1053 dict_print(register PyDictObject *mp, register FILE *fp, register int flags)
1054 {
1055 register Py_ssize_t i;
1056 register Py_ssize_t any;
1057 int status;
1058
1059 status = Py_ReprEnter((PyObject*)mp);
1060 if (status != 0) {
1061 if (status < 0)
1062 return status;
1063 Py_BEGIN_ALLOW_THREADS
1064 fprintf(fp, "{...}");
1065 Py_END_ALLOW_THREADS
1066 return 0;
1067 }
1068
1069 Py_BEGIN_ALLOW_THREADS
1070 fprintf(fp, "{");
1071 Py_END_ALLOW_THREADS
1072 any = 0;
1073 for (i = 0; i <= mp->ma_mask; i++) {
1074 PyDictEntry *ep = mp->ma_table + i;
1075 PyObject *pvalue = ep->me_value;
1076 if (pvalue != NULL) {
1077 /* Prevent PyObject_Repr from deleting value during
1078 key format */
1079 Py_INCREF(pvalue);
1080 if (any++ > 0) {
1081 Py_BEGIN_ALLOW_THREADS
1082 fprintf(fp, ", ");
1083 Py_END_ALLOW_THREADS
1084 }
1085 if (PyObject_Print((PyObject *)ep->me_key, fp, 0)!=0) {
1086 Py_DECREF(pvalue);
1087 Py_ReprLeave((PyObject*)mp);
1088 return -1;
1089 }
1090 Py_BEGIN_ALLOW_THREADS
1091 fprintf(fp, ": ");
1092 Py_END_ALLOW_THREADS
1093 if (PyObject_Print(pvalue, fp, 0) != 0) {
1094 Py_DECREF(pvalue);
1095 Py_ReprLeave((PyObject*)mp);
1096 return -1;
1097 }
1098 Py_DECREF(pvalue);
1099 }
1100 }
1101 Py_BEGIN_ALLOW_THREADS
1102 fprintf(fp, "}");
1103 Py_END_ALLOW_THREADS
1104 Py_ReprLeave((PyObject*)mp);
1105 return 0;
1106 }
1107
1108 static PyObject *
dict_repr(PyDictObject * mp)1109 dict_repr(PyDictObject *mp)
1110 {
1111 Py_ssize_t i;
1112 PyObject *s, *temp, *colon = NULL;
1113 PyObject *pieces = NULL, *result = NULL;
1114 PyObject *key, *value;
1115
1116 i = Py_ReprEnter((PyObject *)mp);
1117 if (i != 0) {
1118 return i > 0 ? PyString_FromString("{...}") : NULL;
1119 }
1120
1121 if (mp->ma_used == 0) {
1122 result = PyString_FromString("{}");
1123 goto Done;
1124 }
1125
1126 pieces = PyList_New(0);
1127 if (pieces == NULL)
1128 goto Done;
1129
1130 colon = PyString_FromString(": ");
1131 if (colon == NULL)
1132 goto Done;
1133
1134 /* Do repr() on each key+value pair, and insert ": " between them.
1135 Note that repr may mutate the dict. */
1136 i = 0;
1137 while (PyDict_Next((PyObject *)mp, &i, &key, &value)) {
1138 int status;
1139 /* Prevent repr from deleting value during key format. */
1140 Py_INCREF(value);
1141 s = PyObject_Repr(key);
1142 PyString_Concat(&s, colon);
1143 PyString_ConcatAndDel(&s, PyObject_Repr(value));
1144 Py_DECREF(value);
1145 if (s == NULL)
1146 goto Done;
1147 status = PyList_Append(pieces, s);
1148 Py_DECREF(s); /* append created a new ref */
1149 if (status < 0)
1150 goto Done;
1151 }
1152
1153 /* Add "{}" decorations to the first and last items. */
1154 assert(PyList_GET_SIZE(pieces) > 0);
1155 s = PyString_FromString("{");
1156 if (s == NULL)
1157 goto Done;
1158 temp = PyList_GET_ITEM(pieces, 0);
1159 PyString_ConcatAndDel(&s, temp);
1160 PyList_SET_ITEM(pieces, 0, s);
1161 if (s == NULL)
1162 goto Done;
1163
1164 s = PyString_FromString("}");
1165 if (s == NULL)
1166 goto Done;
1167 temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
1168 PyString_ConcatAndDel(&temp, s);
1169 PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
1170 if (temp == NULL)
1171 goto Done;
1172
1173 /* Paste them all together with ", " between. */
1174 s = PyString_FromString(", ");
1175 if (s == NULL)
1176 goto Done;
1177 result = _PyString_Join(s, pieces);
1178 Py_DECREF(s);
1179
1180 Done:
1181 Py_XDECREF(pieces);
1182 Py_XDECREF(colon);
1183 Py_ReprLeave((PyObject *)mp);
1184 return result;
1185 }
1186
1187 static Py_ssize_t
dict_length(PyDictObject * mp)1188 dict_length(PyDictObject *mp)
1189 {
1190 return mp->ma_used;
1191 }
1192
1193 static PyObject *
dict_subscript(PyDictObject * mp,register PyObject * key)1194 dict_subscript(PyDictObject *mp, register PyObject *key)
1195 {
1196 PyObject *v;
1197 long hash;
1198 PyDictEntry *ep;
1199 assert(mp->ma_table != NULL);
1200 if (!PyString_CheckExact(key) ||
1201 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1202 hash = PyObject_Hash(key);
1203 if (hash == -1)
1204 return NULL;
1205 }
1206 ep = (mp->ma_lookup)(mp, key, hash);
1207 if (ep == NULL)
1208 return NULL;
1209 v = ep->me_value;
1210 if (v == NULL) {
1211 if (!PyDict_CheckExact(mp)) {
1212 /* Look up __missing__ method if we're a subclass. */
1213 PyObject *missing, *res;
1214 static PyObject *missing_str = NULL;
1215 missing = _PyObject_LookupSpecial((PyObject *)mp,
1216 "__missing__",
1217 &missing_str);
1218 if (missing != NULL) {
1219 res = PyObject_CallFunctionObjArgs(missing,
1220 key, NULL);
1221 Py_DECREF(missing);
1222 return res;
1223 }
1224 else if (PyErr_Occurred())
1225 return NULL;
1226 }
1227 set_key_error(key);
1228 return NULL;
1229 }
1230 else
1231 Py_INCREF(v);
1232 return v;
1233 }
1234
1235 static int
dict_ass_sub(PyDictObject * mp,PyObject * v,PyObject * w)1236 dict_ass_sub(PyDictObject *mp, PyObject *v, PyObject *w)
1237 {
1238 if (w == NULL)
1239 return PyDict_DelItem((PyObject *)mp, v);
1240 else
1241 return PyDict_SetItem((PyObject *)mp, v, w);
1242 }
1243
1244 static PyMappingMethods dict_as_mapping = {
1245 (lenfunc)dict_length, /*mp_length*/
1246 (binaryfunc)dict_subscript, /*mp_subscript*/
1247 (objobjargproc)dict_ass_sub, /*mp_ass_subscript*/
1248 };
1249
1250 static PyObject *
dict_keys(register PyDictObject * mp)1251 dict_keys(register PyDictObject *mp)
1252 {
1253 register PyObject *v;
1254 register Py_ssize_t i, j;
1255 PyDictEntry *ep;
1256 Py_ssize_t mask, n;
1257
1258 again:
1259 n = mp->ma_used;
1260 v = PyList_New(n);
1261 if (v == NULL)
1262 return NULL;
1263 if (n != mp->ma_used) {
1264 /* Durnit. The allocations caused the dict to resize.
1265 * Just start over, this shouldn't normally happen.
1266 */
1267 Py_DECREF(v);
1268 goto again;
1269 }
1270 ep = mp->ma_table;
1271 mask = mp->ma_mask;
1272 for (i = 0, j = 0; i <= mask; i++) {
1273 if (ep[i].me_value != NULL) {
1274 PyObject *key = ep[i].me_key;
1275 Py_INCREF(key);
1276 PyList_SET_ITEM(v, j, key);
1277 j++;
1278 }
1279 }
1280 assert(j == n);
1281 return v;
1282 }
1283
1284 static PyObject *
dict_values(register PyDictObject * mp)1285 dict_values(register PyDictObject *mp)
1286 {
1287 register PyObject *v;
1288 register Py_ssize_t i, j;
1289 PyDictEntry *ep;
1290 Py_ssize_t mask, n;
1291
1292 again:
1293 n = mp->ma_used;
1294 v = PyList_New(n);
1295 if (v == NULL)
1296 return NULL;
1297 if (n != mp->ma_used) {
1298 /* Durnit. The allocations caused the dict to resize.
1299 * Just start over, this shouldn't normally happen.
1300 */
1301 Py_DECREF(v);
1302 goto again;
1303 }
1304 ep = mp->ma_table;
1305 mask = mp->ma_mask;
1306 for (i = 0, j = 0; i <= mask; i++) {
1307 if (ep[i].me_value != NULL) {
1308 PyObject *value = ep[i].me_value;
1309 Py_INCREF(value);
1310 PyList_SET_ITEM(v, j, value);
1311 j++;
1312 }
1313 }
1314 assert(j == n);
1315 return v;
1316 }
1317
1318 static PyObject *
dict_items(register PyDictObject * mp)1319 dict_items(register PyDictObject *mp)
1320 {
1321 register PyObject *v;
1322 register Py_ssize_t i, j, n;
1323 Py_ssize_t mask;
1324 PyObject *item, *key, *value;
1325 PyDictEntry *ep;
1326
1327 /* Preallocate the list of tuples, to avoid allocations during
1328 * the loop over the items, which could trigger GC, which
1329 * could resize the dict. :-(
1330 */
1331 again:
1332 n = mp->ma_used;
1333 v = PyList_New(n);
1334 if (v == NULL)
1335 return NULL;
1336 for (i = 0; i < n; i++) {
1337 item = PyTuple_New(2);
1338 if (item == NULL) {
1339 Py_DECREF(v);
1340 return NULL;
1341 }
1342 PyList_SET_ITEM(v, i, item);
1343 }
1344 if (n != mp->ma_used) {
1345 /* Durnit. The allocations caused the dict to resize.
1346 * Just start over, this shouldn't normally happen.
1347 */
1348 Py_DECREF(v);
1349 goto again;
1350 }
1351 /* Nothing we do below makes any function calls. */
1352 ep = mp->ma_table;
1353 mask = mp->ma_mask;
1354 for (i = 0, j = 0; i <= mask; i++) {
1355 if ((value=ep[i].me_value) != NULL) {
1356 key = ep[i].me_key;
1357 item = PyList_GET_ITEM(v, j);
1358 Py_INCREF(key);
1359 PyTuple_SET_ITEM(item, 0, key);
1360 Py_INCREF(value);
1361 PyTuple_SET_ITEM(item, 1, value);
1362 j++;
1363 }
1364 }
1365 assert(j == n);
1366 return v;
1367 }
1368
1369 static PyObject *
dict_fromkeys(PyObject * cls,PyObject * args)1370 dict_fromkeys(PyObject *cls, PyObject *args)
1371 {
1372 PyObject *seq;
1373 PyObject *value = Py_None;
1374 PyObject *it; /* iter(seq) */
1375 PyObject *key;
1376 PyObject *d;
1377 int status;
1378
1379 if (!PyArg_UnpackTuple(args, "fromkeys", 1, 2, &seq, &value))
1380 return NULL;
1381
1382 d = PyObject_CallObject(cls, NULL);
1383 if (d == NULL)
1384 return NULL;
1385
1386 if (PyDict_CheckExact(d) && ((PyDictObject *)d)->ma_used == 0) {
1387 if (PyDict_CheckExact(seq)) {
1388 PyDictObject *mp = (PyDictObject *)d;
1389 PyObject *oldvalue;
1390 Py_ssize_t pos = 0;
1391 PyObject *key;
1392 long hash;
1393
1394 if (dictresize(mp, Py_SIZE(seq) / 2 * 3)) {
1395 Py_DECREF(d);
1396 return NULL;
1397 }
1398
1399 while (_PyDict_Next(seq, &pos, &key, &oldvalue, &hash)) {
1400 Py_INCREF(key);
1401 Py_INCREF(value);
1402 if (insertdict(mp, key, hash, value)) {
1403 Py_DECREF(d);
1404 return NULL;
1405 }
1406 }
1407 return d;
1408 }
1409 if (PyAnySet_CheckExact(seq)) {
1410 PyDictObject *mp = (PyDictObject *)d;
1411 Py_ssize_t pos = 0;
1412 PyObject *key;
1413 long hash;
1414
1415 if (dictresize(mp, PySet_GET_SIZE(seq) / 2 * 3)) {
1416 Py_DECREF(d);
1417 return NULL;
1418 }
1419
1420 while (_PySet_NextEntry(seq, &pos, &key, &hash)) {
1421 Py_INCREF(key);
1422 Py_INCREF(value);
1423 if (insertdict(mp, key, hash, value)) {
1424 Py_DECREF(d);
1425 return NULL;
1426 }
1427 }
1428 return d;
1429 }
1430 }
1431
1432 it = PyObject_GetIter(seq);
1433 if (it == NULL){
1434 Py_DECREF(d);
1435 return NULL;
1436 }
1437
1438 if (PyDict_CheckExact(d)) {
1439 while ((key = PyIter_Next(it)) != NULL) {
1440 status = PyDict_SetItem(d, key, value);
1441 Py_DECREF(key);
1442 if (status < 0)
1443 goto Fail;
1444 }
1445 } else {
1446 while ((key = PyIter_Next(it)) != NULL) {
1447 status = PyObject_SetItem(d, key, value);
1448 Py_DECREF(key);
1449 if (status < 0)
1450 goto Fail;
1451 }
1452 }
1453
1454 if (PyErr_Occurred())
1455 goto Fail;
1456 Py_DECREF(it);
1457 return d;
1458
1459 Fail:
1460 Py_DECREF(it);
1461 Py_DECREF(d);
1462 return NULL;
1463 }
1464
1465 static int
dict_update_common(PyObject * self,PyObject * args,PyObject * kwds,char * methname)1466 dict_update_common(PyObject *self, PyObject *args, PyObject *kwds, char *methname)
1467 {
1468 PyObject *arg = NULL;
1469 int result = 0;
1470
1471 if (!PyArg_UnpackTuple(args, methname, 0, 1, &arg))
1472 result = -1;
1473
1474 else if (arg != NULL) {
1475 if (PyObject_HasAttrString(arg, "keys"))
1476 result = PyDict_Merge(self, arg, 1);
1477 else
1478 result = PyDict_MergeFromSeq2(self, arg, 1);
1479 }
1480 if (result == 0 && kwds != NULL)
1481 result = PyDict_Merge(self, kwds, 1);
1482 return result;
1483 }
1484
1485 static PyObject *
dict_update(PyObject * self,PyObject * args,PyObject * kwds)1486 dict_update(PyObject *self, PyObject *args, PyObject *kwds)
1487 {
1488 if (dict_update_common(self, args, kwds, "update") != -1)
1489 Py_RETURN_NONE;
1490 return NULL;
1491 }
1492
1493 /* Update unconditionally replaces existing items.
1494 Merge has a 3rd argument 'override'; if set, it acts like Update,
1495 otherwise it leaves existing items unchanged.
1496
1497 PyDict_{Update,Merge} update/merge from a mapping object.
1498
1499 PyDict_MergeFromSeq2 updates/merges from any iterable object
1500 producing iterable objects of length 2.
1501 */
1502
1503 int
PyDict_MergeFromSeq2(PyObject * d,PyObject * seq2,int override)1504 PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
1505 {
1506 PyObject *it; /* iter(seq2) */
1507 Py_ssize_t i; /* index into seq2 of current element */
1508 PyObject *item; /* seq2[i] */
1509 PyObject *fast; /* item as a 2-tuple or 2-list */
1510
1511 assert(d != NULL);
1512 assert(PyDict_Check(d));
1513 assert(seq2 != NULL);
1514
1515 it = PyObject_GetIter(seq2);
1516 if (it == NULL)
1517 return -1;
1518
1519 for (i = 0; ; ++i) {
1520 PyObject *key, *value;
1521 Py_ssize_t n;
1522
1523 fast = NULL;
1524 item = PyIter_Next(it);
1525 if (item == NULL) {
1526 if (PyErr_Occurred())
1527 goto Fail;
1528 break;
1529 }
1530
1531 /* Convert item to sequence, and verify length 2. */
1532 fast = PySequence_Fast(item, "");
1533 if (fast == NULL) {
1534 if (PyErr_ExceptionMatches(PyExc_TypeError))
1535 PyErr_Format(PyExc_TypeError,
1536 "cannot convert dictionary update "
1537 "sequence element #%zd to a sequence",
1538 i);
1539 goto Fail;
1540 }
1541 n = PySequence_Fast_GET_SIZE(fast);
1542 if (n != 2) {
1543 PyErr_Format(PyExc_ValueError,
1544 "dictionary update sequence element #%zd "
1545 "has length %zd; 2 is required",
1546 i, n);
1547 goto Fail;
1548 }
1549
1550 /* Update/merge with this (key, value) pair. */
1551 key = PySequence_Fast_GET_ITEM(fast, 0);
1552 value = PySequence_Fast_GET_ITEM(fast, 1);
1553 if (override || PyDict_GetItem(d, key) == NULL) {
1554 int status = PyDict_SetItem(d, key, value);
1555 if (status < 0)
1556 goto Fail;
1557 }
1558 Py_DECREF(fast);
1559 Py_DECREF(item);
1560 }
1561
1562 i = 0;
1563 goto Return;
1564 Fail:
1565 Py_XDECREF(item);
1566 Py_XDECREF(fast);
1567 i = -1;
1568 Return:
1569 Py_DECREF(it);
1570 return Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
1571 }
1572
1573 int
PyDict_Update(PyObject * a,PyObject * b)1574 PyDict_Update(PyObject *a, PyObject *b)
1575 {
1576 return PyDict_Merge(a, b, 1);
1577 }
1578
1579 int
PyDict_Merge(PyObject * a,PyObject * b,int override)1580 PyDict_Merge(PyObject *a, PyObject *b, int override)
1581 {
1582 register PyDictObject *mp, *other;
1583 register Py_ssize_t i;
1584 PyDictEntry *entry;
1585
1586 /* We accept for the argument either a concrete dictionary object,
1587 * or an abstract "mapping" object. For the former, we can do
1588 * things quite efficiently. For the latter, we only require that
1589 * PyMapping_Keys() and PyObject_GetItem() be supported.
1590 */
1591 if (a == NULL || !PyDict_Check(a) || b == NULL) {
1592 PyErr_BadInternalCall();
1593 return -1;
1594 }
1595 mp = (PyDictObject*)a;
1596 if (PyDict_Check(b)) {
1597 other = (PyDictObject*)b;
1598 if (other == mp || other->ma_used == 0)
1599 /* a.update(a) or a.update({}); nothing to do */
1600 return 0;
1601 if (mp->ma_used == 0)
1602 /* Since the target dict is empty, PyDict_GetItem()
1603 * always returns NULL. Setting override to 1
1604 * skips the unnecessary test.
1605 */
1606 override = 1;
1607 /* Do one big resize at the start, rather than
1608 * incrementally resizing as we insert new items. Expect
1609 * that there will be no (or few) overlapping keys.
1610 */
1611 if ((mp->ma_fill + other->ma_used)*3 >= (mp->ma_mask+1)*2) {
1612 if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0)
1613 return -1;
1614 }
1615 for (i = 0; i <= other->ma_mask; i++) {
1616 entry = &other->ma_table[i];
1617 if (entry->me_value != NULL &&
1618 (override ||
1619 PyDict_GetItem(a, entry->me_key) == NULL)) {
1620 Py_INCREF(entry->me_key);
1621 Py_INCREF(entry->me_value);
1622 if (insertdict(mp, entry->me_key,
1623 (long)entry->me_hash,
1624 entry->me_value) != 0)
1625 return -1;
1626 }
1627 }
1628 }
1629 else {
1630 /* Do it the generic, slower way */
1631 PyObject *keys = PyMapping_Keys(b);
1632 PyObject *iter;
1633 PyObject *key, *value;
1634 int status;
1635
1636 if (keys == NULL)
1637 /* Docstring says this is equivalent to E.keys() so
1638 * if E doesn't have a .keys() method we want
1639 * AttributeError to percolate up. Might as well
1640 * do the same for any other error.
1641 */
1642 return -1;
1643
1644 iter = PyObject_GetIter(keys);
1645 Py_DECREF(keys);
1646 if (iter == NULL)
1647 return -1;
1648
1649 for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) {
1650 if (!override && PyDict_GetItem(a, key) != NULL) {
1651 Py_DECREF(key);
1652 continue;
1653 }
1654 value = PyObject_GetItem(b, key);
1655 if (value == NULL) {
1656 Py_DECREF(iter);
1657 Py_DECREF(key);
1658 return -1;
1659 }
1660 status = PyDict_SetItem(a, key, value);
1661 Py_DECREF(key);
1662 Py_DECREF(value);
1663 if (status < 0) {
1664 Py_DECREF(iter);
1665 return -1;
1666 }
1667 }
1668 Py_DECREF(iter);
1669 if (PyErr_Occurred())
1670 /* Iterator completed, via error */
1671 return -1;
1672 }
1673 return 0;
1674 }
1675
1676 static PyObject *
dict_copy(register PyDictObject * mp)1677 dict_copy(register PyDictObject *mp)
1678 {
1679 return PyDict_Copy((PyObject*)mp);
1680 }
1681
1682 PyObject *
PyDict_Copy(PyObject * o)1683 PyDict_Copy(PyObject *o)
1684 {
1685 PyObject *copy;
1686
1687 if (o == NULL || !PyDict_Check(o)) {
1688 PyErr_BadInternalCall();
1689 return NULL;
1690 }
1691 copy = PyDict_New();
1692 if (copy == NULL)
1693 return NULL;
1694 if (PyDict_Merge(copy, o, 1) == 0)
1695 return copy;
1696 Py_DECREF(copy);
1697 return NULL;
1698 }
1699
1700 Py_ssize_t
PyDict_Size(PyObject * mp)1701 PyDict_Size(PyObject *mp)
1702 {
1703 if (mp == NULL || !PyDict_Check(mp)) {
1704 PyErr_BadInternalCall();
1705 return -1;
1706 }
1707 return ((PyDictObject *)mp)->ma_used;
1708 }
1709
1710 PyObject *
PyDict_Keys(PyObject * mp)1711 PyDict_Keys(PyObject *mp)
1712 {
1713 if (mp == NULL || !PyDict_Check(mp)) {
1714 PyErr_BadInternalCall();
1715 return NULL;
1716 }
1717 return dict_keys((PyDictObject *)mp);
1718 }
1719
1720 PyObject *
PyDict_Values(PyObject * mp)1721 PyDict_Values(PyObject *mp)
1722 {
1723 if (mp == NULL || !PyDict_Check(mp)) {
1724 PyErr_BadInternalCall();
1725 return NULL;
1726 }
1727 return dict_values((PyDictObject *)mp);
1728 }
1729
1730 PyObject *
PyDict_Items(PyObject * mp)1731 PyDict_Items(PyObject *mp)
1732 {
1733 if (mp == NULL || !PyDict_Check(mp)) {
1734 PyErr_BadInternalCall();
1735 return NULL;
1736 }
1737 return dict_items((PyDictObject *)mp);
1738 }
1739
1740 /* Subroutine which returns the smallest key in a for which b's value
1741 is different or absent. The value is returned too, through the
1742 pval argument. Both are NULL if no key in a is found for which b's status
1743 differs. The refcounts on (and only on) non-NULL *pval and function return
1744 values must be decremented by the caller (characterize() increments them
1745 to ensure that mutating comparison and PyDict_GetItem calls can't delete
1746 them before the caller is done looking at them). */
1747
1748 static PyObject *
characterize(PyDictObject * a,PyDictObject * b,PyObject ** pval)1749 characterize(PyDictObject *a, PyDictObject *b, PyObject **pval)
1750 {
1751 PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */
1752 PyObject *aval = NULL; /* a[akey] */
1753 Py_ssize_t i;
1754 int cmp;
1755
1756 for (i = 0; i <= a->ma_mask; i++) {
1757 PyObject *thiskey, *thisaval, *thisbval;
1758 if (a->ma_table[i].me_value == NULL)
1759 continue;
1760 thiskey = a->ma_table[i].me_key;
1761 Py_INCREF(thiskey); /* keep alive across compares */
1762 if (akey != NULL) {
1763 cmp = PyObject_RichCompareBool(akey, thiskey, Py_LT);
1764 if (cmp < 0) {
1765 Py_DECREF(thiskey);
1766 goto Fail;
1767 }
1768 if (cmp > 0 ||
1769 i > a->ma_mask ||
1770 a->ma_table[i].me_value == NULL)
1771 {
1772 /* Not the *smallest* a key; or maybe it is
1773 * but the compare shrunk the dict so we can't
1774 * find its associated value anymore; or
1775 * maybe it is but the compare deleted the
1776 * a[thiskey] entry.
1777 */
1778 Py_DECREF(thiskey);
1779 continue;
1780 }
1781 }
1782
1783 /* Compare a[thiskey] to b[thiskey]; cmp <- true iff equal. */
1784 thisaval = a->ma_table[i].me_value;
1785 assert(thisaval);
1786 Py_INCREF(thisaval); /* keep alive */
1787 thisbval = PyDict_GetItem((PyObject *)b, thiskey);
1788 if (thisbval == NULL)
1789 cmp = 0;
1790 else {
1791 /* both dicts have thiskey: same values? */
1792 cmp = PyObject_RichCompareBool(
1793 thisaval, thisbval, Py_EQ);
1794 if (cmp < 0) {
1795 Py_DECREF(thiskey);
1796 Py_DECREF(thisaval);
1797 goto Fail;
1798 }
1799 }
1800 if (cmp == 0) {
1801 /* New winner. */
1802 Py_XDECREF(akey);
1803 Py_XDECREF(aval);
1804 akey = thiskey;
1805 aval = thisaval;
1806 }
1807 else {
1808 Py_DECREF(thiskey);
1809 Py_DECREF(thisaval);
1810 }
1811 }
1812 *pval = aval;
1813 return akey;
1814
1815 Fail:
1816 Py_XDECREF(akey);
1817 Py_XDECREF(aval);
1818 *pval = NULL;
1819 return NULL;
1820 }
1821
1822 static int
dict_compare(PyDictObject * a,PyDictObject * b)1823 dict_compare(PyDictObject *a, PyDictObject *b)
1824 {
1825 PyObject *adiff, *bdiff, *aval, *bval;
1826 int res;
1827
1828 /* Compare lengths first */
1829 if (a->ma_used < b->ma_used)
1830 return -1; /* a is shorter */
1831 else if (a->ma_used > b->ma_used)
1832 return 1; /* b is shorter */
1833
1834 /* Same length -- check all keys */
1835 bdiff = bval = NULL;
1836 adiff = characterize(a, b, &aval);
1837 if (adiff == NULL) {
1838 assert(!aval);
1839 /* Either an error, or a is a subset with the same length so
1840 * must be equal.
1841 */
1842 res = PyErr_Occurred() ? -1 : 0;
1843 goto Finished;
1844 }
1845 bdiff = characterize(b, a, &bval);
1846 if (bdiff == NULL && PyErr_Occurred()) {
1847 assert(!bval);
1848 res = -1;
1849 goto Finished;
1850 }
1851 res = 0;
1852 if (bdiff) {
1853 /* bdiff == NULL "should be" impossible now, but perhaps
1854 * the last comparison done by the characterize() on a had
1855 * the side effect of making the dicts equal!
1856 */
1857 res = PyObject_Compare(adiff, bdiff);
1858 }
1859 if (res == 0 && bval != NULL)
1860 res = PyObject_Compare(aval, bval);
1861
1862 Finished:
1863 Py_XDECREF(adiff);
1864 Py_XDECREF(bdiff);
1865 Py_XDECREF(aval);
1866 Py_XDECREF(bval);
1867 return res;
1868 }
1869
1870 /* Return 1 if dicts equal, 0 if not, -1 if error.
1871 * Gets out as soon as any difference is detected.
1872 * Uses only Py_EQ comparison.
1873 */
1874 static int
dict_equal(PyDictObject * a,PyDictObject * b)1875 dict_equal(PyDictObject *a, PyDictObject *b)
1876 {
1877 Py_ssize_t i;
1878
1879 if (a->ma_used != b->ma_used)
1880 /* can't be equal if # of entries differ */
1881 return 0;
1882
1883 /* Same # of entries -- check all of 'em. Exit early on any diff. */
1884 for (i = 0; i <= a->ma_mask; i++) {
1885 PyObject *aval = a->ma_table[i].me_value;
1886 if (aval != NULL) {
1887 int cmp;
1888 PyObject *bval;
1889 PyObject *key = a->ma_table[i].me_key;
1890 /* temporarily bump aval's refcount to ensure it stays
1891 alive until we're done with it */
1892 Py_INCREF(aval);
1893 /* ditto for key */
1894 Py_INCREF(key);
1895 bval = PyDict_GetItem((PyObject *)b, key);
1896 Py_DECREF(key);
1897 if (bval == NULL) {
1898 Py_DECREF(aval);
1899 return 0;
1900 }
1901 cmp = PyObject_RichCompareBool(aval, bval, Py_EQ);
1902 Py_DECREF(aval);
1903 if (cmp <= 0) /* error or not equal */
1904 return cmp;
1905 }
1906 }
1907 return 1;
1908 }
1909
1910 static PyObject *
dict_richcompare(PyObject * v,PyObject * w,int op)1911 dict_richcompare(PyObject *v, PyObject *w, int op)
1912 {
1913 int cmp;
1914 PyObject *res;
1915
1916 if (!PyDict_Check(v) || !PyDict_Check(w)) {
1917 res = Py_NotImplemented;
1918 }
1919 else if (op == Py_EQ || op == Py_NE) {
1920 cmp = dict_equal((PyDictObject *)v, (PyDictObject *)w);
1921 if (cmp < 0)
1922 return NULL;
1923 res = (cmp == (op == Py_EQ)) ? Py_True : Py_False;
1924 }
1925 else {
1926 /* Py3K warning if comparison isn't == or != */
1927 if (PyErr_WarnPy3k("dict inequality comparisons not supported "
1928 "in 3.x", 1) < 0) {
1929 return NULL;
1930 }
1931 res = Py_NotImplemented;
1932 }
1933 Py_INCREF(res);
1934 return res;
1935 }
1936
1937 static PyObject *
dict_contains(register PyDictObject * mp,PyObject * key)1938 dict_contains(register PyDictObject *mp, PyObject *key)
1939 {
1940 long hash;
1941 PyDictEntry *ep;
1942
1943 if (!PyString_CheckExact(key) ||
1944 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1945 hash = PyObject_Hash(key);
1946 if (hash == -1)
1947 return NULL;
1948 }
1949 ep = (mp->ma_lookup)(mp, key, hash);
1950 if (ep == NULL)
1951 return NULL;
1952 return PyBool_FromLong(ep->me_value != NULL);
1953 }
1954
1955 static PyObject *
dict_has_key(register PyDictObject * mp,PyObject * key)1956 dict_has_key(register PyDictObject *mp, PyObject *key)
1957 {
1958 if (PyErr_WarnPy3k("dict.has_key() not supported in 3.x; "
1959 "use the in operator", 1) < 0)
1960 return NULL;
1961 return dict_contains(mp, key);
1962 }
1963
1964 static PyObject *
dict_get(register PyDictObject * mp,PyObject * args)1965 dict_get(register PyDictObject *mp, PyObject *args)
1966 {
1967 PyObject *key;
1968 PyObject *failobj = Py_None;
1969 PyObject *val = NULL;
1970 long hash;
1971 PyDictEntry *ep;
1972
1973 if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
1974 return NULL;
1975
1976 if (!PyString_CheckExact(key) ||
1977 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1978 hash = PyObject_Hash(key);
1979 if (hash == -1)
1980 return NULL;
1981 }
1982 ep = (mp->ma_lookup)(mp, key, hash);
1983 if (ep == NULL)
1984 return NULL;
1985 val = ep->me_value;
1986 if (val == NULL)
1987 val = failobj;
1988 Py_INCREF(val);
1989 return val;
1990 }
1991
1992
1993 static PyObject *
dict_setdefault(register PyDictObject * mp,PyObject * args)1994 dict_setdefault(register PyDictObject *mp, PyObject *args)
1995 {
1996 PyObject *key;
1997 PyObject *failobj = Py_None;
1998 PyObject *val = NULL;
1999 long hash;
2000 PyDictEntry *ep;
2001
2002 if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj))
2003 return NULL;
2004
2005 if (!PyString_CheckExact(key) ||
2006 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
2007 hash = PyObject_Hash(key);
2008 if (hash == -1)
2009 return NULL;
2010 }
2011 ep = (mp->ma_lookup)(mp, key, hash);
2012 if (ep == NULL)
2013 return NULL;
2014 val = ep->me_value;
2015 if (val == NULL) {
2016 if (dict_set_item_by_hash_or_entry((PyObject*)mp, key, hash, ep,
2017 failobj) == 0)
2018 val = failobj;
2019 }
2020 Py_XINCREF(val);
2021 return val;
2022 }
2023
2024
2025 static PyObject *
dict_clear(register PyDictObject * mp)2026 dict_clear(register PyDictObject *mp)
2027 {
2028 PyDict_Clear((PyObject *)mp);
2029 Py_RETURN_NONE;
2030 }
2031
2032 static PyObject *
dict_pop(PyDictObject * mp,PyObject * args)2033 dict_pop(PyDictObject *mp, PyObject *args)
2034 {
2035 long hash;
2036 PyDictEntry *ep;
2037 PyObject *old_value, *old_key;
2038 PyObject *key, *deflt = NULL;
2039
2040 if(!PyArg_UnpackTuple(args, "pop", 1, 2, &key, &deflt))
2041 return NULL;
2042 if (mp->ma_used == 0) {
2043 if (deflt) {
2044 Py_INCREF(deflt);
2045 return deflt;
2046 }
2047 set_key_error(key);
2048 return NULL;
2049 }
2050 if (!PyString_CheckExact(key) ||
2051 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
2052 hash = PyObject_Hash(key);
2053 if (hash == -1)
2054 return NULL;
2055 }
2056 ep = (mp->ma_lookup)(mp, key, hash);
2057 if (ep == NULL)
2058 return NULL;
2059 if (ep->me_value == NULL) {
2060 if (deflt) {
2061 Py_INCREF(deflt);
2062 return deflt;
2063 }
2064 set_key_error(key);
2065 return NULL;
2066 }
2067 old_key = ep->me_key;
2068 Py_INCREF(dummy);
2069 ep->me_key = dummy;
2070 old_value = ep->me_value;
2071 ep->me_value = NULL;
2072 mp->ma_used--;
2073 Py_DECREF(old_key);
2074 return old_value;
2075 }
2076
2077 static PyObject *
dict_popitem(PyDictObject * mp)2078 dict_popitem(PyDictObject *mp)
2079 {
2080 Py_ssize_t i = 0;
2081 PyDictEntry *ep;
2082 PyObject *res;
2083
2084 /* Allocate the result tuple before checking the size. Believe it
2085 * or not, this allocation could trigger a garbage collection which
2086 * could empty the dict, so if we checked the size first and that
2087 * happened, the result would be an infinite loop (searching for an
2088 * entry that no longer exists). Note that the usual popitem()
2089 * idiom is "while d: k, v = d.popitem()". so needing to throw the
2090 * tuple away if the dict *is* empty isn't a significant
2091 * inefficiency -- possible, but unlikely in practice.
2092 */
2093 res = PyTuple_New(2);
2094 if (res == NULL)
2095 return NULL;
2096 if (mp->ma_used == 0) {
2097 Py_DECREF(res);
2098 PyErr_SetString(PyExc_KeyError,
2099 "popitem(): dictionary is empty");
2100 return NULL;
2101 }
2102 /* Set ep to "the first" dict entry with a value. We abuse the hash
2103 * field of slot 0 to hold a search finger:
2104 * If slot 0 has a value, use slot 0.
2105 * Else slot 0 is being used to hold a search finger,
2106 * and we use its hash value as the first index to look.
2107 */
2108 ep = &mp->ma_table[0];
2109 if (ep->me_value == NULL) {
2110 i = ep->me_hash;
2111 /* The hash field may be a real hash value, or it may be a
2112 * legit search finger, or it may be a once-legit search
2113 * finger that's out of bounds now because it wrapped around
2114 * or the table shrunk -- simply make sure it's in bounds now.
2115 */
2116 if (i > mp->ma_mask || i < 1)
2117 i = 1; /* skip slot 0 */
2118 while ((ep = &mp->ma_table[i])->me_value == NULL) {
2119 i++;
2120 if (i > mp->ma_mask)
2121 i = 1;
2122 }
2123 }
2124 PyTuple_SET_ITEM(res, 0, ep->me_key);
2125 PyTuple_SET_ITEM(res, 1, ep->me_value);
2126 Py_INCREF(dummy);
2127 ep->me_key = dummy;
2128 ep->me_value = NULL;
2129 mp->ma_used--;
2130 assert(mp->ma_table[0].me_value == NULL);
2131 mp->ma_table[0].me_hash = i + 1; /* next place to start */
2132 return res;
2133 }
2134
2135 static int
dict_traverse(PyObject * op,visitproc visit,void * arg)2136 dict_traverse(PyObject *op, visitproc visit, void *arg)
2137 {
2138 Py_ssize_t i = 0;
2139 PyObject *pk;
2140 PyObject *pv;
2141
2142 while (PyDict_Next(op, &i, &pk, &pv)) {
2143 Py_VISIT(pk);
2144 Py_VISIT(pv);
2145 }
2146 return 0;
2147 }
2148
2149 static int
dict_tp_clear(PyObject * op)2150 dict_tp_clear(PyObject *op)
2151 {
2152 PyDict_Clear(op);
2153 return 0;
2154 }
2155
2156
2157 extern PyTypeObject PyDictIterKey_Type; /* Forward */
2158 extern PyTypeObject PyDictIterValue_Type; /* Forward */
2159 extern PyTypeObject PyDictIterItem_Type; /* Forward */
2160 static PyObject *dictiter_new(PyDictObject *, PyTypeObject *);
2161
2162 static PyObject *
dict_iterkeys(PyDictObject * dict)2163 dict_iterkeys(PyDictObject *dict)
2164 {
2165 return dictiter_new(dict, &PyDictIterKey_Type);
2166 }
2167
2168 static PyObject *
dict_itervalues(PyDictObject * dict)2169 dict_itervalues(PyDictObject *dict)
2170 {
2171 return dictiter_new(dict, &PyDictIterValue_Type);
2172 }
2173
2174 static PyObject *
dict_iteritems(PyDictObject * dict)2175 dict_iteritems(PyDictObject *dict)
2176 {
2177 return dictiter_new(dict, &PyDictIterItem_Type);
2178 }
2179
2180 static PyObject *
dict_sizeof(PyDictObject * mp)2181 dict_sizeof(PyDictObject *mp)
2182 {
2183 Py_ssize_t res;
2184
2185 res = _PyObject_SIZE(Py_TYPE(mp));
2186 if (mp->ma_table != mp->ma_smalltable)
2187 res = res + (mp->ma_mask + 1) * sizeof(PyDictEntry);
2188 return PyInt_FromSsize_t(res);
2189 }
2190
2191 PyDoc_STRVAR(has_key__doc__,
2192 "D.has_key(k) -> True if D has a key k, else False");
2193
2194 PyDoc_STRVAR(contains__doc__,
2195 "D.__contains__(k) -> True if D has a key k, else False");
2196
2197 PyDoc_STRVAR(getitem__doc__, "x.__getitem__(y) <==> x[y]");
2198
2199 PyDoc_STRVAR(sizeof__doc__,
2200 "D.__sizeof__() -> size of D in memory, in bytes");
2201
2202 PyDoc_STRVAR(get__doc__,
2203 "D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.");
2204
2205 PyDoc_STRVAR(setdefault_doc__,
2206 "D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D");
2207
2208 PyDoc_STRVAR(pop__doc__,
2209 "D.pop(k[,d]) -> v, remove specified key and return the corresponding value.\n\
2210 If key is not found, d is returned if given, otherwise KeyError is raised");
2211
2212 PyDoc_STRVAR(popitem__doc__,
2213 "D.popitem() -> (k, v), remove and return some (key, value) pair as a\n\
2214 2-tuple; but raise KeyError if D is empty.");
2215
2216 PyDoc_STRVAR(keys__doc__,
2217 "D.keys() -> list of D's keys");
2218
2219 PyDoc_STRVAR(items__doc__,
2220 "D.items() -> list of D's (key, value) pairs, as 2-tuples");
2221
2222 PyDoc_STRVAR(values__doc__,
2223 "D.values() -> list of D's values");
2224
2225 PyDoc_STRVAR(update__doc__,
2226 "D.update([E, ]**F) -> None. Update D from dict/iterable E and F.\n"
2227 "If E present and has a .keys() method, does: for k in E: D[k] = E[k]\n\
2228 If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v\n\
2229 In either case, this is followed by: for k in F: D[k] = F[k]");
2230
2231 PyDoc_STRVAR(fromkeys__doc__,
2232 "dict.fromkeys(S[,v]) -> New dict with keys from S and values equal to v.\n\
2233 v defaults to None.");
2234
2235 PyDoc_STRVAR(clear__doc__,
2236 "D.clear() -> None. Remove all items from D.");
2237
2238 PyDoc_STRVAR(copy__doc__,
2239 "D.copy() -> a shallow copy of D");
2240
2241 PyDoc_STRVAR(iterkeys__doc__,
2242 "D.iterkeys() -> an iterator over the keys of D");
2243
2244 PyDoc_STRVAR(itervalues__doc__,
2245 "D.itervalues() -> an iterator over the values of D");
2246
2247 PyDoc_STRVAR(iteritems__doc__,
2248 "D.iteritems() -> an iterator over the (key, value) items of D");
2249
2250 /* Forward */
2251 static PyObject *dictkeys_new(PyObject *);
2252 static PyObject *dictitems_new(PyObject *);
2253 static PyObject *dictvalues_new(PyObject *);
2254
2255 PyDoc_STRVAR(viewkeys__doc__,
2256 "D.viewkeys() -> a set-like object providing a view on D's keys");
2257 PyDoc_STRVAR(viewitems__doc__,
2258 "D.viewitems() -> a set-like object providing a view on D's items");
2259 PyDoc_STRVAR(viewvalues__doc__,
2260 "D.viewvalues() -> an object providing a view on D's values");
2261
2262 static PyMethodDef mapp_methods[] = {
2263 {"__contains__",(PyCFunction)dict_contains, METH_O | METH_COEXIST,
2264 contains__doc__},
2265 {"__getitem__", (PyCFunction)dict_subscript, METH_O | METH_COEXIST,
2266 getitem__doc__},
2267 {"__sizeof__", (PyCFunction)dict_sizeof, METH_NOARGS,
2268 sizeof__doc__},
2269 {"has_key", (PyCFunction)dict_has_key, METH_O,
2270 has_key__doc__},
2271 {"get", (PyCFunction)dict_get, METH_VARARGS,
2272 get__doc__},
2273 {"setdefault", (PyCFunction)dict_setdefault, METH_VARARGS,
2274 setdefault_doc__},
2275 {"pop", (PyCFunction)dict_pop, METH_VARARGS,
2276 pop__doc__},
2277 {"popitem", (PyCFunction)dict_popitem, METH_NOARGS,
2278 popitem__doc__},
2279 {"keys", (PyCFunction)dict_keys, METH_NOARGS,
2280 keys__doc__},
2281 {"items", (PyCFunction)dict_items, METH_NOARGS,
2282 items__doc__},
2283 {"values", (PyCFunction)dict_values, METH_NOARGS,
2284 values__doc__},
2285 {"viewkeys", (PyCFunction)dictkeys_new, METH_NOARGS,
2286 viewkeys__doc__},
2287 {"viewitems", (PyCFunction)dictitems_new, METH_NOARGS,
2288 viewitems__doc__},
2289 {"viewvalues", (PyCFunction)dictvalues_new, METH_NOARGS,
2290 viewvalues__doc__},
2291 {"update", (PyCFunction)dict_update, METH_VARARGS | METH_KEYWORDS,
2292 update__doc__},
2293 {"fromkeys", (PyCFunction)dict_fromkeys, METH_VARARGS | METH_CLASS,
2294 fromkeys__doc__},
2295 {"clear", (PyCFunction)dict_clear, METH_NOARGS,
2296 clear__doc__},
2297 {"copy", (PyCFunction)dict_copy, METH_NOARGS,
2298 copy__doc__},
2299 {"iterkeys", (PyCFunction)dict_iterkeys, METH_NOARGS,
2300 iterkeys__doc__},
2301 {"itervalues", (PyCFunction)dict_itervalues, METH_NOARGS,
2302 itervalues__doc__},
2303 {"iteritems", (PyCFunction)dict_iteritems, METH_NOARGS,
2304 iteritems__doc__},
2305 {NULL, NULL} /* sentinel */
2306 };
2307
2308 /* Return 1 if `key` is in dict `op`, 0 if not, and -1 on error. */
2309 int
PyDict_Contains(PyObject * op,PyObject * key)2310 PyDict_Contains(PyObject *op, PyObject *key)
2311 {
2312 long hash;
2313 PyDictObject *mp = (PyDictObject *)op;
2314 PyDictEntry *ep;
2315
2316 if (!PyString_CheckExact(key) ||
2317 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
2318 hash = PyObject_Hash(key);
2319 if (hash == -1)
2320 return -1;
2321 }
2322 ep = (mp->ma_lookup)(mp, key, hash);
2323 return ep == NULL ? -1 : (ep->me_value != NULL);
2324 }
2325
2326 /* Internal version of PyDict_Contains used when the hash value is already known */
2327 int
_PyDict_Contains(PyObject * op,PyObject * key,long hash)2328 _PyDict_Contains(PyObject *op, PyObject *key, long hash)
2329 {
2330 PyDictObject *mp = (PyDictObject *)op;
2331 PyDictEntry *ep;
2332
2333 ep = (mp->ma_lookup)(mp, key, hash);
2334 return ep == NULL ? -1 : (ep->me_value != NULL);
2335 }
2336
2337 /* Hack to implement "key in dict" */
2338 static PySequenceMethods dict_as_sequence = {
2339 0, /* sq_length */
2340 0, /* sq_concat */
2341 0, /* sq_repeat */
2342 0, /* sq_item */
2343 0, /* sq_slice */
2344 0, /* sq_ass_item */
2345 0, /* sq_ass_slice */
2346 PyDict_Contains, /* sq_contains */
2347 0, /* sq_inplace_concat */
2348 0, /* sq_inplace_repeat */
2349 };
2350
2351 static PyObject *
dict_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2352 dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2353 {
2354 PyObject *self;
2355
2356 assert(type != NULL && type->tp_alloc != NULL);
2357 self = type->tp_alloc(type, 0);
2358 if (self != NULL) {
2359 PyDictObject *d = (PyDictObject *)self;
2360 /* It's guaranteed that tp->alloc zeroed out the struct. */
2361 assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0);
2362 INIT_NONZERO_DICT_SLOTS(d);
2363 d->ma_lookup = lookdict_string;
2364 /* The object has been implicitly tracked by tp_alloc */
2365 if (type == &PyDict_Type)
2366 _PyObject_GC_UNTRACK(d);
2367 #ifdef SHOW_CONVERSION_COUNTS
2368 ++created;
2369 #endif
2370 #ifdef SHOW_TRACK_COUNT
2371 if (_PyObject_GC_IS_TRACKED(d))
2372 count_tracked++;
2373 else
2374 count_untracked++;
2375 #endif
2376 }
2377 return self;
2378 }
2379
2380 static int
dict_init(PyObject * self,PyObject * args,PyObject * kwds)2381 dict_init(PyObject *self, PyObject *args, PyObject *kwds)
2382 {
2383 return dict_update_common(self, args, kwds, "dict");
2384 }
2385
2386 static PyObject *
dict_iter(PyDictObject * dict)2387 dict_iter(PyDictObject *dict)
2388 {
2389 return dictiter_new(dict, &PyDictIterKey_Type);
2390 }
2391
2392 PyDoc_STRVAR(dictionary_doc,
2393 "dict() -> new empty dictionary\n"
2394 "dict(mapping) -> new dictionary initialized from a mapping object's\n"
2395 " (key, value) pairs\n"
2396 "dict(iterable) -> new dictionary initialized as if via:\n"
2397 " d = {}\n"
2398 " for k, v in iterable:\n"
2399 " d[k] = v\n"
2400 "dict(**kwargs) -> new dictionary initialized with the name=value pairs\n"
2401 " in the keyword argument list. For example: dict(one=1, two=2)");
2402
2403 PyTypeObject PyDict_Type = {
2404 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2405 "dict",
2406 sizeof(PyDictObject),
2407 0,
2408 (destructor)dict_dealloc, /* tp_dealloc */
2409 (printfunc)dict_print, /* tp_print */
2410 0, /* tp_getattr */
2411 0, /* tp_setattr */
2412 (cmpfunc)dict_compare, /* tp_compare */
2413 (reprfunc)dict_repr, /* tp_repr */
2414 0, /* tp_as_number */
2415 &dict_as_sequence, /* tp_as_sequence */
2416 &dict_as_mapping, /* tp_as_mapping */
2417 (hashfunc)PyObject_HashNotImplemented, /* tp_hash */
2418 0, /* tp_call */
2419 0, /* tp_str */
2420 PyObject_GenericGetAttr, /* tp_getattro */
2421 0, /* tp_setattro */
2422 0, /* tp_as_buffer */
2423 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2424 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_DICT_SUBCLASS, /* tp_flags */
2425 dictionary_doc, /* tp_doc */
2426 dict_traverse, /* tp_traverse */
2427 dict_tp_clear, /* tp_clear */
2428 dict_richcompare, /* tp_richcompare */
2429 0, /* tp_weaklistoffset */
2430 (getiterfunc)dict_iter, /* tp_iter */
2431 0, /* tp_iternext */
2432 mapp_methods, /* tp_methods */
2433 0, /* tp_members */
2434 0, /* tp_getset */
2435 0, /* tp_base */
2436 0, /* tp_dict */
2437 0, /* tp_descr_get */
2438 0, /* tp_descr_set */
2439 0, /* tp_dictoffset */
2440 dict_init, /* tp_init */
2441 PyType_GenericAlloc, /* tp_alloc */
2442 dict_new, /* tp_new */
2443 PyObject_GC_Del, /* tp_free */
2444 };
2445
2446 /* For backward compatibility with old dictionary interface */
2447
2448 PyObject *
PyDict_GetItemString(PyObject * v,const char * key)2449 PyDict_GetItemString(PyObject *v, const char *key)
2450 {
2451 PyObject *kv, *rv;
2452 kv = PyString_FromString(key);
2453 if (kv == NULL)
2454 return NULL;
2455 rv = PyDict_GetItem(v, kv);
2456 Py_DECREF(kv);
2457 return rv;
2458 }
2459
2460 int
PyDict_SetItemString(PyObject * v,const char * key,PyObject * item)2461 PyDict_SetItemString(PyObject *v, const char *key, PyObject *item)
2462 {
2463 PyObject *kv;
2464 int err;
2465 kv = PyString_FromString(key);
2466 if (kv == NULL)
2467 return -1;
2468 PyString_InternInPlace(&kv); /* XXX Should we really? */
2469 err = PyDict_SetItem(v, kv, item);
2470 Py_DECREF(kv);
2471 return err;
2472 }
2473
2474 int
PyDict_DelItemString(PyObject * v,const char * key)2475 PyDict_DelItemString(PyObject *v, const char *key)
2476 {
2477 PyObject *kv;
2478 int err;
2479 kv = PyString_FromString(key);
2480 if (kv == NULL)
2481 return -1;
2482 err = PyDict_DelItem(v, kv);
2483 Py_DECREF(kv);
2484 return err;
2485 }
2486
2487 /* Dictionary iterator types */
2488
2489 typedef struct {
2490 PyObject_HEAD
2491 PyDictObject *di_dict; /* Set to NULL when iterator is exhausted */
2492 Py_ssize_t di_used;
2493 Py_ssize_t di_pos;
2494 PyObject* di_result; /* reusable result tuple for iteritems */
2495 Py_ssize_t len;
2496 } dictiterobject;
2497
2498 static PyObject *
dictiter_new(PyDictObject * dict,PyTypeObject * itertype)2499 dictiter_new(PyDictObject *dict, PyTypeObject *itertype)
2500 {
2501 dictiterobject *di;
2502 di = PyObject_GC_New(dictiterobject, itertype);
2503 if (di == NULL)
2504 return NULL;
2505 Py_INCREF(dict);
2506 di->di_dict = dict;
2507 di->di_used = dict->ma_used;
2508 di->di_pos = 0;
2509 di->len = dict->ma_used;
2510 if (itertype == &PyDictIterItem_Type) {
2511 di->di_result = PyTuple_Pack(2, Py_None, Py_None);
2512 if (di->di_result == NULL) {
2513 Py_DECREF(di);
2514 return NULL;
2515 }
2516 }
2517 else
2518 di->di_result = NULL;
2519 _PyObject_GC_TRACK(di);
2520 return (PyObject *)di;
2521 }
2522
2523 static void
dictiter_dealloc(dictiterobject * di)2524 dictiter_dealloc(dictiterobject *di)
2525 {
2526 Py_XDECREF(di->di_dict);
2527 Py_XDECREF(di->di_result);
2528 PyObject_GC_Del(di);
2529 }
2530
2531 static int
dictiter_traverse(dictiterobject * di,visitproc visit,void * arg)2532 dictiter_traverse(dictiterobject *di, visitproc visit, void *arg)
2533 {
2534 Py_VISIT(di->di_dict);
2535 Py_VISIT(di->di_result);
2536 return 0;
2537 }
2538
2539 static PyObject *
dictiter_len(dictiterobject * di)2540 dictiter_len(dictiterobject *di)
2541 {
2542 Py_ssize_t len = 0;
2543 if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used)
2544 len = di->len;
2545 return PyInt_FromSize_t(len);
2546 }
2547
2548 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
2549
2550 static PyMethodDef dictiter_methods[] = {
2551 {"__length_hint__", (PyCFunction)dictiter_len, METH_NOARGS, length_hint_doc},
2552 {NULL, NULL} /* sentinel */
2553 };
2554
dictiter_iternextkey(dictiterobject * di)2555 static PyObject *dictiter_iternextkey(dictiterobject *di)
2556 {
2557 PyObject *key;
2558 register Py_ssize_t i, mask;
2559 register PyDictEntry *ep;
2560 PyDictObject *d = di->di_dict;
2561
2562 if (d == NULL)
2563 return NULL;
2564 assert (PyDict_Check(d));
2565
2566 if (di->di_used != d->ma_used) {
2567 PyErr_SetString(PyExc_RuntimeError,
2568 "dictionary changed size during iteration");
2569 di->di_used = -1; /* Make this state sticky */
2570 return NULL;
2571 }
2572
2573 i = di->di_pos;
2574 if (i < 0)
2575 goto fail;
2576 ep = d->ma_table;
2577 mask = d->ma_mask;
2578 while (i <= mask && ep[i].me_value == NULL)
2579 i++;
2580 di->di_pos = i+1;
2581 if (i > mask)
2582 goto fail;
2583 di->len--;
2584 key = ep[i].me_key;
2585 Py_INCREF(key);
2586 return key;
2587
2588 fail:
2589 di->di_dict = NULL;
2590 Py_DECREF(d);
2591 return NULL;
2592 }
2593
2594 PyTypeObject PyDictIterKey_Type = {
2595 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2596 "dictionary-keyiterator", /* tp_name */
2597 sizeof(dictiterobject), /* tp_basicsize */
2598 0, /* tp_itemsize */
2599 /* methods */
2600 (destructor)dictiter_dealloc, /* tp_dealloc */
2601 0, /* tp_print */
2602 0, /* tp_getattr */
2603 0, /* tp_setattr */
2604 0, /* tp_compare */
2605 0, /* tp_repr */
2606 0, /* tp_as_number */
2607 0, /* tp_as_sequence */
2608 0, /* tp_as_mapping */
2609 0, /* tp_hash */
2610 0, /* tp_call */
2611 0, /* tp_str */
2612 PyObject_GenericGetAttr, /* tp_getattro */
2613 0, /* tp_setattro */
2614 0, /* tp_as_buffer */
2615 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2616 0, /* tp_doc */
2617 (traverseproc)dictiter_traverse, /* tp_traverse */
2618 0, /* tp_clear */
2619 0, /* tp_richcompare */
2620 0, /* tp_weaklistoffset */
2621 PyObject_SelfIter, /* tp_iter */
2622 (iternextfunc)dictiter_iternextkey, /* tp_iternext */
2623 dictiter_methods, /* tp_methods */
2624 0,
2625 };
2626
dictiter_iternextvalue(dictiterobject * di)2627 static PyObject *dictiter_iternextvalue(dictiterobject *di)
2628 {
2629 PyObject *value;
2630 register Py_ssize_t i, mask;
2631 register PyDictEntry *ep;
2632 PyDictObject *d = di->di_dict;
2633
2634 if (d == NULL)
2635 return NULL;
2636 assert (PyDict_Check(d));
2637
2638 if (di->di_used != d->ma_used) {
2639 PyErr_SetString(PyExc_RuntimeError,
2640 "dictionary changed size during iteration");
2641 di->di_used = -1; /* Make this state sticky */
2642 return NULL;
2643 }
2644
2645 i = di->di_pos;
2646 mask = d->ma_mask;
2647 if (i < 0 || i > mask)
2648 goto fail;
2649 ep = d->ma_table;
2650 while ((value=ep[i].me_value) == NULL) {
2651 i++;
2652 if (i > mask)
2653 goto fail;
2654 }
2655 di->di_pos = i+1;
2656 di->len--;
2657 Py_INCREF(value);
2658 return value;
2659
2660 fail:
2661 di->di_dict = NULL;
2662 Py_DECREF(d);
2663 return NULL;
2664 }
2665
2666 PyTypeObject PyDictIterValue_Type = {
2667 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2668 "dictionary-valueiterator", /* tp_name */
2669 sizeof(dictiterobject), /* tp_basicsize */
2670 0, /* tp_itemsize */
2671 /* methods */
2672 (destructor)dictiter_dealloc, /* tp_dealloc */
2673 0, /* tp_print */
2674 0, /* tp_getattr */
2675 0, /* tp_setattr */
2676 0, /* tp_compare */
2677 0, /* tp_repr */
2678 0, /* tp_as_number */
2679 0, /* tp_as_sequence */
2680 0, /* tp_as_mapping */
2681 0, /* tp_hash */
2682 0, /* tp_call */
2683 0, /* tp_str */
2684 PyObject_GenericGetAttr, /* tp_getattro */
2685 0, /* tp_setattro */
2686 0, /* tp_as_buffer */
2687 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2688 0, /* tp_doc */
2689 (traverseproc)dictiter_traverse, /* tp_traverse */
2690 0, /* tp_clear */
2691 0, /* tp_richcompare */
2692 0, /* tp_weaklistoffset */
2693 PyObject_SelfIter, /* tp_iter */
2694 (iternextfunc)dictiter_iternextvalue, /* tp_iternext */
2695 dictiter_methods, /* tp_methods */
2696 0,
2697 };
2698
dictiter_iternextitem(dictiterobject * di)2699 static PyObject *dictiter_iternextitem(dictiterobject *di)
2700 {
2701 PyObject *key, *value, *result = di->di_result;
2702 register Py_ssize_t i, mask;
2703 register PyDictEntry *ep;
2704 PyDictObject *d = di->di_dict;
2705
2706 if (d == NULL)
2707 return NULL;
2708 assert (PyDict_Check(d));
2709
2710 if (di->di_used != d->ma_used) {
2711 PyErr_SetString(PyExc_RuntimeError,
2712 "dictionary changed size during iteration");
2713 di->di_used = -1; /* Make this state sticky */
2714 return NULL;
2715 }
2716
2717 i = di->di_pos;
2718 if (i < 0)
2719 goto fail;
2720 ep = d->ma_table;
2721 mask = d->ma_mask;
2722 while (i <= mask && ep[i].me_value == NULL)
2723 i++;
2724 di->di_pos = i+1;
2725 if (i > mask)
2726 goto fail;
2727
2728 if (result->ob_refcnt == 1) {
2729 Py_INCREF(result);
2730 Py_DECREF(PyTuple_GET_ITEM(result, 0));
2731 Py_DECREF(PyTuple_GET_ITEM(result, 1));
2732 } else {
2733 result = PyTuple_New(2);
2734 if (result == NULL)
2735 return NULL;
2736 }
2737 di->len--;
2738 key = ep[i].me_key;
2739 value = ep[i].me_value;
2740 Py_INCREF(key);
2741 Py_INCREF(value);
2742 PyTuple_SET_ITEM(result, 0, key);
2743 PyTuple_SET_ITEM(result, 1, value);
2744 return result;
2745
2746 fail:
2747 di->di_dict = NULL;
2748 Py_DECREF(d);
2749 return NULL;
2750 }
2751
2752 PyTypeObject PyDictIterItem_Type = {
2753 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2754 "dictionary-itemiterator", /* tp_name */
2755 sizeof(dictiterobject), /* tp_basicsize */
2756 0, /* tp_itemsize */
2757 /* methods */
2758 (destructor)dictiter_dealloc, /* tp_dealloc */
2759 0, /* tp_print */
2760 0, /* tp_getattr */
2761 0, /* tp_setattr */
2762 0, /* tp_compare */
2763 0, /* tp_repr */
2764 0, /* tp_as_number */
2765 0, /* tp_as_sequence */
2766 0, /* tp_as_mapping */
2767 0, /* tp_hash */
2768 0, /* tp_call */
2769 0, /* tp_str */
2770 PyObject_GenericGetAttr, /* tp_getattro */
2771 0, /* tp_setattro */
2772 0, /* tp_as_buffer */
2773 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2774 0, /* tp_doc */
2775 (traverseproc)dictiter_traverse, /* tp_traverse */
2776 0, /* tp_clear */
2777 0, /* tp_richcompare */
2778 0, /* tp_weaklistoffset */
2779 PyObject_SelfIter, /* tp_iter */
2780 (iternextfunc)dictiter_iternextitem, /* tp_iternext */
2781 dictiter_methods, /* tp_methods */
2782 0,
2783 };
2784
2785 /***********************************************/
2786 /* View objects for keys(), items(), values(). */
2787 /***********************************************/
2788
2789 /* The instance lay-out is the same for all three; but the type differs. */
2790
2791 typedef struct {
2792 PyObject_HEAD
2793 PyDictObject *dv_dict;
2794 } dictviewobject;
2795
2796
2797 static void
dictview_dealloc(dictviewobject * dv)2798 dictview_dealloc(dictviewobject *dv)
2799 {
2800 Py_XDECREF(dv->dv_dict);
2801 PyObject_GC_Del(dv);
2802 }
2803
2804 static int
dictview_traverse(dictviewobject * dv,visitproc visit,void * arg)2805 dictview_traverse(dictviewobject *dv, visitproc visit, void *arg)
2806 {
2807 Py_VISIT(dv->dv_dict);
2808 return 0;
2809 }
2810
2811 static Py_ssize_t
dictview_len(dictviewobject * dv)2812 dictview_len(dictviewobject *dv)
2813 {
2814 Py_ssize_t len = 0;
2815 if (dv->dv_dict != NULL)
2816 len = dv->dv_dict->ma_used;
2817 return len;
2818 }
2819
2820 static PyObject *
dictview_new(PyObject * dict,PyTypeObject * type)2821 dictview_new(PyObject *dict, PyTypeObject *type)
2822 {
2823 dictviewobject *dv;
2824 if (dict == NULL) {
2825 PyErr_BadInternalCall();
2826 return NULL;
2827 }
2828 if (!PyDict_Check(dict)) {
2829 /* XXX Get rid of this restriction later */
2830 PyErr_Format(PyExc_TypeError,
2831 "%s() requires a dict argument, not '%s'",
2832 type->tp_name, dict->ob_type->tp_name);
2833 return NULL;
2834 }
2835 dv = PyObject_GC_New(dictviewobject, type);
2836 if (dv == NULL)
2837 return NULL;
2838 Py_INCREF(dict);
2839 dv->dv_dict = (PyDictObject *)dict;
2840 _PyObject_GC_TRACK(dv);
2841 return (PyObject *)dv;
2842 }
2843
2844 /* TODO(guido): The views objects are not complete:
2845
2846 * support more set operations
2847 * support arbitrary mappings?
2848 - either these should be static or exported in dictobject.h
2849 - if public then they should probably be in builtins
2850 */
2851
2852 /* Return 1 if self is a subset of other, iterating over self;
2853 0 if not; -1 if an error occurred. */
2854 static int
all_contained_in(PyObject * self,PyObject * other)2855 all_contained_in(PyObject *self, PyObject *other)
2856 {
2857 PyObject *iter = PyObject_GetIter(self);
2858 int ok = 1;
2859
2860 if (iter == NULL)
2861 return -1;
2862 for (;;) {
2863 PyObject *next = PyIter_Next(iter);
2864 if (next == NULL) {
2865 if (PyErr_Occurred())
2866 ok = -1;
2867 break;
2868 }
2869 ok = PySequence_Contains(other, next);
2870 Py_DECREF(next);
2871 if (ok <= 0)
2872 break;
2873 }
2874 Py_DECREF(iter);
2875 return ok;
2876 }
2877
2878 static PyObject *
dictview_richcompare(PyObject * self,PyObject * other,int op)2879 dictview_richcompare(PyObject *self, PyObject *other, int op)
2880 {
2881 Py_ssize_t len_self, len_other;
2882 int ok;
2883 PyObject *result;
2884
2885 assert(self != NULL);
2886 assert(PyDictViewSet_Check(self));
2887 assert(other != NULL);
2888
2889 if (!PyAnySet_Check(other) && !PyDictViewSet_Check(other)) {
2890 Py_INCREF(Py_NotImplemented);
2891 return Py_NotImplemented;
2892 }
2893
2894 len_self = PyObject_Size(self);
2895 if (len_self < 0)
2896 return NULL;
2897 len_other = PyObject_Size(other);
2898 if (len_other < 0)
2899 return NULL;
2900
2901 ok = 0;
2902 switch(op) {
2903
2904 case Py_NE:
2905 case Py_EQ:
2906 if (len_self == len_other)
2907 ok = all_contained_in(self, other);
2908 if (op == Py_NE && ok >= 0)
2909 ok = !ok;
2910 break;
2911
2912 case Py_LT:
2913 if (len_self < len_other)
2914 ok = all_contained_in(self, other);
2915 break;
2916
2917 case Py_LE:
2918 if (len_self <= len_other)
2919 ok = all_contained_in(self, other);
2920 break;
2921
2922 case Py_GT:
2923 if (len_self > len_other)
2924 ok = all_contained_in(other, self);
2925 break;
2926
2927 case Py_GE:
2928 if (len_self >= len_other)
2929 ok = all_contained_in(other, self);
2930 break;
2931
2932 }
2933 if (ok < 0)
2934 return NULL;
2935 result = ok ? Py_True : Py_False;
2936 Py_INCREF(result);
2937 return result;
2938 }
2939
2940 static PyObject *
dictview_repr(dictviewobject * dv)2941 dictview_repr(dictviewobject *dv)
2942 {
2943 PyObject *seq;
2944 PyObject *seq_str;
2945 PyObject *result;
2946
2947 seq = PySequence_List((PyObject *)dv);
2948 if (seq == NULL)
2949 return NULL;
2950
2951 seq_str = PyObject_Repr(seq);
2952 if (seq_str == NULL) {
2953 Py_DECREF(seq);
2954 return NULL;
2955 }
2956 result = PyString_FromFormat("%s(%s)", Py_TYPE(dv)->tp_name,
2957 PyString_AS_STRING(seq_str));
2958 Py_DECREF(seq_str);
2959 Py_DECREF(seq);
2960 return result;
2961 }
2962
2963 /*** dict_keys ***/
2964
2965 static PyObject *
dictkeys_iter(dictviewobject * dv)2966 dictkeys_iter(dictviewobject *dv)
2967 {
2968 if (dv->dv_dict == NULL) {
2969 Py_RETURN_NONE;
2970 }
2971 return dictiter_new(dv->dv_dict, &PyDictIterKey_Type);
2972 }
2973
2974 static int
dictkeys_contains(dictviewobject * dv,PyObject * obj)2975 dictkeys_contains(dictviewobject *dv, PyObject *obj)
2976 {
2977 if (dv->dv_dict == NULL)
2978 return 0;
2979 return PyDict_Contains((PyObject *)dv->dv_dict, obj);
2980 }
2981
2982 static PySequenceMethods dictkeys_as_sequence = {
2983 (lenfunc)dictview_len, /* sq_length */
2984 0, /* sq_concat */
2985 0, /* sq_repeat */
2986 0, /* sq_item */
2987 0, /* sq_slice */
2988 0, /* sq_ass_item */
2989 0, /* sq_ass_slice */
2990 (objobjproc)dictkeys_contains, /* sq_contains */
2991 };
2992
2993 static PyObject*
dictviews_sub(PyObject * self,PyObject * other)2994 dictviews_sub(PyObject* self, PyObject *other)
2995 {
2996 PyObject *result = PySet_New(self);
2997 PyObject *tmp;
2998 if (result == NULL)
2999 return NULL;
3000
3001
3002 tmp = PyObject_CallMethod(result, "difference_update", "(O)", other);
3003 if (tmp == NULL) {
3004 Py_DECREF(result);
3005 return NULL;
3006 }
3007
3008 Py_DECREF(tmp);
3009 return result;
3010 }
3011
3012 static PyObject*
dictviews_and(PyObject * self,PyObject * other)3013 dictviews_and(PyObject* self, PyObject *other)
3014 {
3015 PyObject *result = PySet_New(self);
3016 PyObject *tmp;
3017 if (result == NULL)
3018 return NULL;
3019
3020 tmp = PyObject_CallMethod(result, "intersection_update", "(O)", other);
3021 if (tmp == NULL) {
3022 Py_DECREF(result);
3023 return NULL;
3024 }
3025
3026 Py_DECREF(tmp);
3027 return result;
3028 }
3029
3030 static PyObject*
dictviews_or(PyObject * self,PyObject * other)3031 dictviews_or(PyObject* self, PyObject *other)
3032 {
3033 PyObject *result = PySet_New(self);
3034 PyObject *tmp;
3035 if (result == NULL)
3036 return NULL;
3037
3038 tmp = PyObject_CallMethod(result, "update", "(O)", other);
3039 if (tmp == NULL) {
3040 Py_DECREF(result);
3041 return NULL;
3042 }
3043
3044 Py_DECREF(tmp);
3045 return result;
3046 }
3047
3048 static PyObject*
dictviews_xor(PyObject * self,PyObject * other)3049 dictviews_xor(PyObject* self, PyObject *other)
3050 {
3051 PyObject *result = PySet_New(self);
3052 PyObject *tmp;
3053 if (result == NULL)
3054 return NULL;
3055
3056 tmp = PyObject_CallMethod(result, "symmetric_difference_update", "(O)", other);
3057 if (tmp == NULL) {
3058 Py_DECREF(result);
3059 return NULL;
3060 }
3061
3062 Py_DECREF(tmp);
3063 return result;
3064 }
3065
3066 static PyNumberMethods dictviews_as_number = {
3067 0, /*nb_add*/
3068 (binaryfunc)dictviews_sub, /*nb_subtract*/
3069 0, /*nb_multiply*/
3070 0, /*nb_divide*/
3071 0, /*nb_remainder*/
3072 0, /*nb_divmod*/
3073 0, /*nb_power*/
3074 0, /*nb_negative*/
3075 0, /*nb_positive*/
3076 0, /*nb_absolute*/
3077 0, /*nb_nonzero*/
3078 0, /*nb_invert*/
3079 0, /*nb_lshift*/
3080 0, /*nb_rshift*/
3081 (binaryfunc)dictviews_and, /*nb_and*/
3082 (binaryfunc)dictviews_xor, /*nb_xor*/
3083 (binaryfunc)dictviews_or, /*nb_or*/
3084 };
3085
3086 static PyMethodDef dictkeys_methods[] = {
3087 {NULL, NULL} /* sentinel */
3088 };
3089
3090 PyTypeObject PyDictKeys_Type = {
3091 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3092 "dict_keys", /* tp_name */
3093 sizeof(dictviewobject), /* tp_basicsize */
3094 0, /* tp_itemsize */
3095 /* methods */
3096 (destructor)dictview_dealloc, /* tp_dealloc */
3097 0, /* tp_print */
3098 0, /* tp_getattr */
3099 0, /* tp_setattr */
3100 0, /* tp_reserved */
3101 (reprfunc)dictview_repr, /* tp_repr */
3102 &dictviews_as_number, /* tp_as_number */
3103 &dictkeys_as_sequence, /* tp_as_sequence */
3104 0, /* tp_as_mapping */
3105 0, /* tp_hash */
3106 0, /* tp_call */
3107 0, /* tp_str */
3108 PyObject_GenericGetAttr, /* tp_getattro */
3109 0, /* tp_setattro */
3110 0, /* tp_as_buffer */
3111 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
3112 Py_TPFLAGS_CHECKTYPES, /* tp_flags */
3113 0, /* tp_doc */
3114 (traverseproc)dictview_traverse, /* tp_traverse */
3115 0, /* tp_clear */
3116 dictview_richcompare, /* tp_richcompare */
3117 0, /* tp_weaklistoffset */
3118 (getiterfunc)dictkeys_iter, /* tp_iter */
3119 0, /* tp_iternext */
3120 dictkeys_methods, /* tp_methods */
3121 0,
3122 };
3123
3124 static PyObject *
dictkeys_new(PyObject * dict)3125 dictkeys_new(PyObject *dict)
3126 {
3127 return dictview_new(dict, &PyDictKeys_Type);
3128 }
3129
3130 /*** dict_items ***/
3131
3132 static PyObject *
dictitems_iter(dictviewobject * dv)3133 dictitems_iter(dictviewobject *dv)
3134 {
3135 if (dv->dv_dict == NULL) {
3136 Py_RETURN_NONE;
3137 }
3138 return dictiter_new(dv->dv_dict, &PyDictIterItem_Type);
3139 }
3140
3141 static int
dictitems_contains(dictviewobject * dv,PyObject * obj)3142 dictitems_contains(dictviewobject *dv, PyObject *obj)
3143 {
3144 PyObject *key, *value, *found;
3145 if (dv->dv_dict == NULL)
3146 return 0;
3147 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 2)
3148 return 0;
3149 key = PyTuple_GET_ITEM(obj, 0);
3150 value = PyTuple_GET_ITEM(obj, 1);
3151 found = PyDict_GetItem((PyObject *)dv->dv_dict, key);
3152 if (found == NULL) {
3153 if (PyErr_Occurred())
3154 return -1;
3155 return 0;
3156 }
3157 return PyObject_RichCompareBool(value, found, Py_EQ);
3158 }
3159
3160 static PySequenceMethods dictitems_as_sequence = {
3161 (lenfunc)dictview_len, /* sq_length */
3162 0, /* sq_concat */
3163 0, /* sq_repeat */
3164 0, /* sq_item */
3165 0, /* sq_slice */
3166 0, /* sq_ass_item */
3167 0, /* sq_ass_slice */
3168 (objobjproc)dictitems_contains, /* sq_contains */
3169 };
3170
3171 static PyMethodDef dictitems_methods[] = {
3172 {NULL, NULL} /* sentinel */
3173 };
3174
3175 PyTypeObject PyDictItems_Type = {
3176 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3177 "dict_items", /* tp_name */
3178 sizeof(dictviewobject), /* tp_basicsize */
3179 0, /* tp_itemsize */
3180 /* methods */
3181 (destructor)dictview_dealloc, /* tp_dealloc */
3182 0, /* tp_print */
3183 0, /* tp_getattr */
3184 0, /* tp_setattr */
3185 0, /* tp_reserved */
3186 (reprfunc)dictview_repr, /* tp_repr */
3187 &dictviews_as_number, /* tp_as_number */
3188 &dictitems_as_sequence, /* tp_as_sequence */
3189 0, /* tp_as_mapping */
3190 0, /* tp_hash */
3191 0, /* tp_call */
3192 0, /* tp_str */
3193 PyObject_GenericGetAttr, /* tp_getattro */
3194 0, /* tp_setattro */
3195 0, /* tp_as_buffer */
3196 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
3197 Py_TPFLAGS_CHECKTYPES, /* tp_flags */
3198 0, /* tp_doc */
3199 (traverseproc)dictview_traverse, /* tp_traverse */
3200 0, /* tp_clear */
3201 dictview_richcompare, /* tp_richcompare */
3202 0, /* tp_weaklistoffset */
3203 (getiterfunc)dictitems_iter, /* tp_iter */
3204 0, /* tp_iternext */
3205 dictitems_methods, /* tp_methods */
3206 0,
3207 };
3208
3209 static PyObject *
dictitems_new(PyObject * dict)3210 dictitems_new(PyObject *dict)
3211 {
3212 return dictview_new(dict, &PyDictItems_Type);
3213 }
3214
3215 /*** dict_values ***/
3216
3217 static PyObject *
dictvalues_iter(dictviewobject * dv)3218 dictvalues_iter(dictviewobject *dv)
3219 {
3220 if (dv->dv_dict == NULL) {
3221 Py_RETURN_NONE;
3222 }
3223 return dictiter_new(dv->dv_dict, &PyDictIterValue_Type);
3224 }
3225
3226 static PySequenceMethods dictvalues_as_sequence = {
3227 (lenfunc)dictview_len, /* sq_length */
3228 0, /* sq_concat */
3229 0, /* sq_repeat */
3230 0, /* sq_item */
3231 0, /* sq_slice */
3232 0, /* sq_ass_item */
3233 0, /* sq_ass_slice */
3234 (objobjproc)0, /* sq_contains */
3235 };
3236
3237 static PyMethodDef dictvalues_methods[] = {
3238 {NULL, NULL} /* sentinel */
3239 };
3240
3241 PyTypeObject PyDictValues_Type = {
3242 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3243 "dict_values", /* tp_name */
3244 sizeof(dictviewobject), /* tp_basicsize */
3245 0, /* tp_itemsize */
3246 /* methods */
3247 (destructor)dictview_dealloc, /* tp_dealloc */
3248 0, /* tp_print */
3249 0, /* tp_getattr */
3250 0, /* tp_setattr */
3251 0, /* tp_reserved */
3252 (reprfunc)dictview_repr, /* tp_repr */
3253 0, /* tp_as_number */
3254 &dictvalues_as_sequence, /* tp_as_sequence */
3255 0, /* tp_as_mapping */
3256 0, /* tp_hash */
3257 0, /* tp_call */
3258 0, /* tp_str */
3259 PyObject_GenericGetAttr, /* tp_getattro */
3260 0, /* tp_setattro */
3261 0, /* tp_as_buffer */
3262 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3263 0, /* tp_doc */
3264 (traverseproc)dictview_traverse, /* tp_traverse */
3265 0, /* tp_clear */
3266 0, /* tp_richcompare */
3267 0, /* tp_weaklistoffset */
3268 (getiterfunc)dictvalues_iter, /* tp_iter */
3269 0, /* tp_iternext */
3270 dictvalues_methods, /* tp_methods */
3271 0,
3272 };
3273
3274 static PyObject *
dictvalues_new(PyObject * dict)3275 dictvalues_new(PyObject *dict)
3276 {
3277 return dictview_new(dict, &PyDictValues_Type);
3278 }
3279