1
2 /* Dictionary object implementation using a hash table */
3
4 /* The distribution includes a separate file, Objects/dictnotes.txt,
5 describing explorations into dictionary design and optimization.
6 It covers typical dictionary use patterns, the parameters for
7 tuning dictionaries, and several ideas for possible optimizations.
8 */
9
10 #include "Python.h"
11
12
13 /* Set a key error with the specified argument, wrapping it in a
14 * tuple automatically so that tuple keys are not unpacked as the
15 * exception arguments. */
16 static void
set_key_error(PyObject * arg)17 set_key_error(PyObject *arg)
18 {
19 PyObject *tup;
20 tup = PyTuple_Pack(1, arg);
21 if (!tup)
22 return; /* caller will expect error to be set anyway */
23 PyErr_SetObject(PyExc_KeyError, tup);
24 Py_DECREF(tup);
25 }
26
27 /* Define this out if you don't want conversion statistics on exit. */
28 #undef SHOW_CONVERSION_COUNTS
29
30 /* See large comment block below. This must be >= 1. */
31 #define PERTURB_SHIFT 5
32
33 /*
34 Major subtleties ahead: Most hash schemes depend on having a "good" hash
35 function, in the sense of simulating randomness. Python doesn't: its most
36 important hash functions (for strings and ints) are very regular in common
37 cases:
38
39 >>> map(hash, (0, 1, 2, 3))
40 [0, 1, 2, 3]
41 >>> map(hash, ("namea", "nameb", "namec", "named"))
42 [-1658398457, -1658398460, -1658398459, -1658398462]
43 >>>
44
45 This isn't necessarily bad! To the contrary, in a table of size 2**i, taking
46 the low-order i bits as the initial table index is extremely fast, and there
47 are no collisions at all for dicts indexed by a contiguous range of ints.
48 The same is approximately true when keys are "consecutive" strings. So this
49 gives better-than-random behavior in common cases, and that's very desirable.
50
51 OTOH, when collisions occur, the tendency to fill contiguous slices of the
52 hash table makes a good collision resolution strategy crucial. Taking only
53 the last i bits of the hash code is also vulnerable: for example, consider
54 [i << 16 for i in range(20000)] as a set of keys. Since ints are their own
55 hash codes, and this fits in a dict of size 2**15, the last 15 bits of every
56 hash code are all 0: they *all* map to the same table index.
57
58 But catering to unusual cases should not slow the usual ones, so we just take
59 the last i bits anyway. It's up to collision resolution to do the rest. If
60 we *usually* find the key we're looking for on the first try (and, it turns
61 out, we usually do -- the table load factor is kept under 2/3, so the odds
62 are solidly in our favor), then it makes best sense to keep the initial index
63 computation dirt cheap.
64
65 The first half of collision resolution is to visit table indices via this
66 recurrence:
67
68 j = ((5*j) + 1) mod 2**i
69
70 For any initial j in range(2**i), repeating that 2**i times generates each
71 int in range(2**i) exactly once (see any text on random-number generation for
72 proof). By itself, this doesn't help much: like linear probing (setting
73 j += 1, or j -= 1, on each loop trip), it scans the table entries in a fixed
74 order. This would be bad, except that's not the only thing we do, and it's
75 actually *good* in the common cases where hash keys are consecutive. In an
76 example that's really too small to make this entirely clear, for a table of
77 size 2**3 the order of indices is:
78
79 0 -> 1 -> 6 -> 7 -> 4 -> 5 -> 2 -> 3 -> 0 [and here it's repeating]
80
81 If two things come in at index 5, the first place we look after is index 2,
82 not 6, so if another comes in at index 6 the collision at 5 didn't hurt it.
83 Linear probing is deadly in this case because there the fixed probe order
84 is the *same* as the order consecutive keys are likely to arrive. But it's
85 extremely unlikely hash codes will follow a 5*j+1 recurrence by accident,
86 and certain that consecutive hash codes do not.
87
88 The other half of the strategy is to get the other bits of the hash code
89 into play. This is done by initializing a (unsigned) vrbl "perturb" to the
90 full hash code, and changing the recurrence to:
91
92 j = (5*j) + 1 + perturb;
93 perturb >>= PERTURB_SHIFT;
94 use j % 2**i as the next table index;
95
96 Now the probe sequence depends (eventually) on every bit in the hash code,
97 and the pseudo-scrambling property of recurring on 5*j+1 is more valuable,
98 because it quickly magnifies small differences in the bits that didn't affect
99 the initial index. Note that because perturb is unsigned, if the recurrence
100 is executed often enough perturb eventually becomes and remains 0. At that
101 point (very rarely reached) the recurrence is on (just) 5*j+1 again, and
102 that's certain to find an empty slot eventually (since it generates every int
103 in range(2**i), and we make sure there's always at least one empty slot).
104
105 Selecting a good value for PERTURB_SHIFT is a balancing act. You want it
106 small so that the high bits of the hash code continue to affect the probe
107 sequence across iterations; but you want it large so that in really bad cases
108 the high-order hash bits have an effect on early iterations. 5 was "the
109 best" in minimizing total collisions across experiments Tim Peters ran (on
110 both normal and pathological cases), but 4 and 6 weren't significantly worse.
111
112 Historical: Reimer Behrends contributed the idea of using a polynomial-based
113 approach, using repeated multiplication by x in GF(2**n) where an irreducible
114 polynomial for each table size was chosen such that x was a primitive root.
115 Christian Tismer later extended that to use division by x instead, as an
116 efficient way to get the high bits of the hash code into play. This scheme
117 also gave excellent collision statistics, but was more expensive: two
118 if-tests were required inside the loop; computing "the next" index took about
119 the same number of operations but without as much potential parallelism
120 (e.g., computing 5*j can go on at the same time as computing 1+perturb in the
121 above, and then shifting perturb can be done while the table index is being
122 masked); and the PyDictObject struct required a member to hold the table's
123 polynomial. In Tim's experiments the current scheme ran faster, produced
124 equally good collision statistics, needed less code & used less memory.
125
126 Theoretical Python 2.5 headache: hash codes are only C "long", but
127 sizeof(Py_ssize_t) > sizeof(long) may be possible. In that case, and if a
128 dict is genuinely huge, then only the slots directly reachable via indexing
129 by a C long can be the first slot in a probe sequence. The probe sequence
130 will still eventually reach every slot in the table, but the collision rate
131 on initial probes may be much higher than this scheme was designed for.
132 Getting a hash code as fat as Py_ssize_t is the only real cure. But in
133 practice, this probably won't make a lick of difference for many years (at
134 which point everyone will have terabytes of RAM on 64-bit boxes).
135 */
136
137 /* Object used as dummy key to fill deleted entries */
138 static PyObject *dummy = NULL; /* Initialized by first call to newPyDictObject() */
139
140 #ifdef Py_REF_DEBUG
141 PyObject *
_PyDict_Dummy(void)142 _PyDict_Dummy(void)
143 {
144 return dummy;
145 }
146 #endif
147
148 /* forward declarations */
149 static PyDictEntry *
150 lookdict_string(PyDictObject *mp, PyObject *key, long hash);
151
152 #ifdef SHOW_CONVERSION_COUNTS
153 static long created = 0L;
154 static long converted = 0L;
155
156 static void
show_counts(void)157 show_counts(void)
158 {
159 fprintf(stderr, "created %ld string dicts\n", created);
160 fprintf(stderr, "converted %ld to normal dicts\n", converted);
161 fprintf(stderr, "%.2f%% conversion rate\n", (100.0*converted)/created);
162 }
163 #endif
164
165 /* Debug statistic to compare allocations with reuse through the free list */
166 #undef SHOW_ALLOC_COUNT
167 #ifdef SHOW_ALLOC_COUNT
168 static size_t count_alloc = 0;
169 static size_t count_reuse = 0;
170
171 static void
show_alloc(void)172 show_alloc(void)
173 {
174 fprintf(stderr, "Dict allocations: %" PY_FORMAT_SIZE_T "d\n",
175 count_alloc);
176 fprintf(stderr, "Dict reuse through freelist: %" PY_FORMAT_SIZE_T
177 "d\n", count_reuse);
178 fprintf(stderr, "%.2f%% reuse rate\n\n",
179 (100.0*count_reuse/(count_alloc+count_reuse)));
180 }
181 #endif
182
183 /* Debug statistic to count GC tracking of dicts */
184 #ifdef SHOW_TRACK_COUNT
185 static Py_ssize_t count_untracked = 0;
186 static Py_ssize_t count_tracked = 0;
187
188 static void
show_track(void)189 show_track(void)
190 {
191 fprintf(stderr, "Dicts created: %" PY_FORMAT_SIZE_T "d\n",
192 count_tracked + count_untracked);
193 fprintf(stderr, "Dicts tracked by the GC: %" PY_FORMAT_SIZE_T
194 "d\n", count_tracked);
195 fprintf(stderr, "%.2f%% dict tracking rate\n\n",
196 (100.0*count_tracked/(count_untracked+count_tracked)));
197 }
198 #endif
199
200
201 /* Initialization macros.
202 There are two ways to create a dict: PyDict_New() is the main C API
203 function, and the tp_new slot maps to dict_new(). In the latter case we
204 can save a little time over what PyDict_New does because it's guaranteed
205 that the PyDictObject struct is already zeroed out.
206 Everyone except dict_new() should use EMPTY_TO_MINSIZE (unless they have
207 an excellent reason not to).
208 */
209
210 #define INIT_NONZERO_DICT_SLOTS(mp) do { \
211 (mp)->ma_table = (mp)->ma_smalltable; \
212 (mp)->ma_mask = PyDict_MINSIZE - 1; \
213 } while(0)
214
215 #define EMPTY_TO_MINSIZE(mp) do { \
216 memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable)); \
217 (mp)->ma_used = (mp)->ma_fill = 0; \
218 INIT_NONZERO_DICT_SLOTS(mp); \
219 } while(0)
220
221 /* Dictionary reuse scheme to save calls to malloc, free, and memset */
222 #ifndef PyDict_MAXFREELIST
223 #define PyDict_MAXFREELIST 80
224 #endif
225 static PyDictObject *free_list[PyDict_MAXFREELIST];
226 static int numfree = 0;
227
228 void
PyDict_Fini(void)229 PyDict_Fini(void)
230 {
231 PyDictObject *op;
232
233 while (numfree) {
234 op = free_list[--numfree];
235 assert(PyDict_CheckExact(op));
236 PyObject_GC_Del(op);
237 }
238 }
239
240 PyObject *
PyDict_New(void)241 PyDict_New(void)
242 {
243 register PyDictObject *mp;
244 if (dummy == NULL) { /* Auto-initialize dummy */
245 dummy = PyString_FromString("<dummy key>");
246 if (dummy == NULL)
247 return NULL;
248 #ifdef SHOW_CONVERSION_COUNTS
249 Py_AtExit(show_counts);
250 #endif
251 #ifdef SHOW_ALLOC_COUNT
252 Py_AtExit(show_alloc);
253 #endif
254 #ifdef SHOW_TRACK_COUNT
255 Py_AtExit(show_track);
256 #endif
257 }
258 if (numfree) {
259 mp = free_list[--numfree];
260 assert (mp != NULL);
261 assert (Py_TYPE(mp) == &PyDict_Type);
262 _Py_NewReference((PyObject *)mp);
263 if (mp->ma_fill) {
264 EMPTY_TO_MINSIZE(mp);
265 } else {
266 /* At least set ma_table and ma_mask; these are wrong
267 if an empty but presized dict is added to freelist */
268 INIT_NONZERO_DICT_SLOTS(mp);
269 }
270 assert (mp->ma_used == 0);
271 assert (mp->ma_table == mp->ma_smalltable);
272 assert (mp->ma_mask == PyDict_MINSIZE - 1);
273 #ifdef SHOW_ALLOC_COUNT
274 count_reuse++;
275 #endif
276 } else {
277 mp = PyObject_GC_New(PyDictObject, &PyDict_Type);
278 if (mp == NULL)
279 return NULL;
280 EMPTY_TO_MINSIZE(mp);
281 #ifdef SHOW_ALLOC_COUNT
282 count_alloc++;
283 #endif
284 }
285 mp->ma_lookup = lookdict_string;
286 #ifdef SHOW_TRACK_COUNT
287 count_untracked++;
288 #endif
289 #ifdef SHOW_CONVERSION_COUNTS
290 ++created;
291 #endif
292 return (PyObject *)mp;
293 }
294
295 /*
296 The basic lookup function used by all operations.
297 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
298 Open addressing is preferred over chaining since the link overhead for
299 chaining would be substantial (100% with typical malloc overhead).
300
301 The initial probe index is computed as hash mod the table size. Subsequent
302 probe indices are computed as explained earlier.
303
304 All arithmetic on hash should ignore overflow.
305
306 (The details in this version are due to Tim Peters, building on many past
307 contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
308 Christian Tismer).
309
310 lookdict() is general-purpose, and may return NULL if (and only if) a
311 comparison raises an exception (this was new in Python 2.5).
312 lookdict_string() below is specialized to string keys, comparison of which can
313 never raise an exception; that function can never return NULL. For both, when
314 the key isn't found a PyDictEntry* is returned for which the me_value field is
315 NULL; this is the slot in the dict at which the key would have been found, and
316 the caller can (if it wishes) add the <key, value> pair to the returned
317 PyDictEntry*.
318 */
319 static PyDictEntry *
lookdict(PyDictObject * mp,PyObject * key,register long hash)320 lookdict(PyDictObject *mp, PyObject *key, register long hash)
321 {
322 register size_t i;
323 register size_t perturb;
324 register PyDictEntry *freeslot;
325 register size_t mask = (size_t)mp->ma_mask;
326 PyDictEntry *ep0 = mp->ma_table;
327 register PyDictEntry *ep;
328 register int cmp;
329 PyObject *startkey;
330
331 i = (size_t)hash & mask;
332 ep = &ep0[i];
333 if (ep->me_key == NULL || ep->me_key == key)
334 return ep;
335
336 if (ep->me_key == dummy)
337 freeslot = ep;
338 else {
339 if (ep->me_hash == hash) {
340 startkey = ep->me_key;
341 Py_INCREF(startkey);
342 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
343 Py_DECREF(startkey);
344 if (cmp < 0)
345 return NULL;
346 if (ep0 == mp->ma_table && ep->me_key == startkey) {
347 if (cmp > 0)
348 return ep;
349 }
350 else {
351 /* The compare did major nasty stuff to the
352 * dict: start over.
353 * XXX A clever adversary could prevent this
354 * XXX from terminating.
355 */
356 return lookdict(mp, key, hash);
357 }
358 }
359 freeslot = NULL;
360 }
361
362 /* In the loop, me_key == dummy is by far (factor of 100s) the
363 least likely outcome, so test for that last. */
364 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
365 i = (i << 2) + i + perturb + 1;
366 ep = &ep0[i & mask];
367 if (ep->me_key == NULL)
368 return freeslot == NULL ? ep : freeslot;
369 if (ep->me_key == key)
370 return ep;
371 if (ep->me_hash == hash && ep->me_key != dummy) {
372 startkey = ep->me_key;
373 Py_INCREF(startkey);
374 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
375 Py_DECREF(startkey);
376 if (cmp < 0)
377 return NULL;
378 if (ep0 == mp->ma_table && ep->me_key == startkey) {
379 if (cmp > 0)
380 return ep;
381 }
382 else {
383 /* The compare did major nasty stuff to the
384 * dict: start over.
385 * XXX A clever adversary could prevent this
386 * XXX from terminating.
387 */
388 return lookdict(mp, key, hash);
389 }
390 }
391 else if (ep->me_key == dummy && freeslot == NULL)
392 freeslot = ep;
393 }
394 assert(0); /* NOT REACHED */
395 return 0;
396 }
397
398 /*
399 * Hacked up version of lookdict which can assume keys are always strings;
400 * this assumption allows testing for errors during PyObject_RichCompareBool()
401 * to be dropped; string-string comparisons never raise exceptions. This also
402 * means we don't need to go through PyObject_RichCompareBool(); we can always
403 * use _PyString_Eq() directly.
404 *
405 * This is valuable because dicts with only string keys are very common.
406 */
407 static PyDictEntry *
lookdict_string(PyDictObject * mp,PyObject * key,register long hash)408 lookdict_string(PyDictObject *mp, PyObject *key, register long hash)
409 {
410 register size_t i;
411 register size_t perturb;
412 register PyDictEntry *freeslot;
413 register size_t mask = (size_t)mp->ma_mask;
414 PyDictEntry *ep0 = mp->ma_table;
415 register PyDictEntry *ep;
416
417 /* Make sure this function doesn't have to handle non-string keys,
418 including subclasses of str; e.g., one reason to subclass
419 strings is to override __eq__, and for speed we don't cater to
420 that here. */
421 if (!PyString_CheckExact(key)) {
422 #ifdef SHOW_CONVERSION_COUNTS
423 ++converted;
424 #endif
425 mp->ma_lookup = lookdict;
426 return lookdict(mp, key, hash);
427 }
428 i = hash & mask;
429 ep = &ep0[i];
430 if (ep->me_key == NULL || ep->me_key == key)
431 return ep;
432 if (ep->me_key == dummy)
433 freeslot = ep;
434 else {
435 if (ep->me_hash == hash && _PyString_Eq(ep->me_key, key))
436 return ep;
437 freeslot = NULL;
438 }
439
440 /* In the loop, me_key == dummy is by far (factor of 100s) the
441 least likely outcome, so test for that last. */
442 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
443 i = (i << 2) + i + perturb + 1;
444 ep = &ep0[i & mask];
445 if (ep->me_key == NULL)
446 return freeslot == NULL ? ep : freeslot;
447 if (ep->me_key == key
448 || (ep->me_hash == hash
449 && ep->me_key != dummy
450 && _PyString_Eq(ep->me_key, key)))
451 return ep;
452 if (ep->me_key == dummy && freeslot == NULL)
453 freeslot = ep;
454 }
455 assert(0); /* NOT REACHED */
456 return 0;
457 }
458
459 #ifdef SHOW_TRACK_COUNT
460 #define INCREASE_TRACK_COUNT \
461 (count_tracked++, count_untracked--);
462 #define DECREASE_TRACK_COUNT \
463 (count_tracked--, count_untracked++);
464 #else
465 #define INCREASE_TRACK_COUNT
466 #define DECREASE_TRACK_COUNT
467 #endif
468
469 #define MAINTAIN_TRACKING(mp, key, value) \
470 do { \
471 if (!_PyObject_GC_IS_TRACKED(mp)) { \
472 if (_PyObject_GC_MAY_BE_TRACKED(key) || \
473 _PyObject_GC_MAY_BE_TRACKED(value)) { \
474 _PyObject_GC_TRACK(mp); \
475 INCREASE_TRACK_COUNT \
476 } \
477 } \
478 } while(0)
479
480 void
_PyDict_MaybeUntrack(PyObject * op)481 _PyDict_MaybeUntrack(PyObject *op)
482 {
483 PyDictObject *mp;
484 PyObject *value;
485 Py_ssize_t mask, i;
486 PyDictEntry *ep;
487
488 if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
489 return;
490
491 mp = (PyDictObject *) op;
492 ep = mp->ma_table;
493 mask = mp->ma_mask;
494 for (i = 0; i <= mask; i++) {
495 if ((value = ep[i].me_value) == NULL)
496 continue;
497 if (_PyObject_GC_MAY_BE_TRACKED(value) ||
498 _PyObject_GC_MAY_BE_TRACKED(ep[i].me_key))
499 return;
500 }
501 DECREASE_TRACK_COUNT
502 _PyObject_GC_UNTRACK(op);
503 }
504
505 /*
506 Internal routine to insert a new item into the table when you have entry object.
507 Used by insertdict.
508 */
509 static int
insertdict_by_entry(register PyDictObject * mp,PyObject * key,long hash,PyDictEntry * ep,PyObject * value)510 insertdict_by_entry(register PyDictObject *mp, PyObject *key, long hash,
511 PyDictEntry *ep, PyObject *value)
512 {
513 PyObject *old_value;
514
515 MAINTAIN_TRACKING(mp, key, value);
516 if (ep->me_value != NULL) {
517 old_value = ep->me_value;
518 ep->me_value = value;
519 Py_DECREF(old_value); /* which **CAN** re-enter */
520 Py_DECREF(key);
521 }
522 else {
523 if (ep->me_key == NULL)
524 mp->ma_fill++;
525 else {
526 assert(ep->me_key == dummy);
527 Py_DECREF(dummy);
528 }
529 ep->me_key = key;
530 ep->me_hash = (Py_ssize_t)hash;
531 ep->me_value = value;
532 mp->ma_used++;
533 }
534 return 0;
535 }
536
537
538 /*
539 Internal routine to insert a new item into the table.
540 Used both by the internal resize routine and by the public insert routine.
541 Eats a reference to key and one to value.
542 Returns -1 if an error occurred, or 0 on success.
543 */
544 static int
insertdict(register PyDictObject * mp,PyObject * key,long hash,PyObject * value)545 insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
546 {
547 register PyDictEntry *ep;
548
549 assert(mp->ma_lookup != NULL);
550 ep = mp->ma_lookup(mp, key, hash);
551 if (ep == NULL) {
552 Py_DECREF(key);
553 Py_DECREF(value);
554 return -1;
555 }
556 return insertdict_by_entry(mp, key, hash, ep, value);
557 }
558
559 /*
560 Internal routine used by dictresize() to insert an item which is
561 known to be absent from the dict. This routine also assumes that
562 the dict contains no deleted entries. Besides the performance benefit,
563 using insertdict() in dictresize() is dangerous (SF bug #1456209).
564 Note that no refcounts are changed by this routine; if needed, the caller
565 is responsible for incref'ing `key` and `value`.
566 */
567 static void
insertdict_clean(register PyDictObject * mp,PyObject * key,long hash,PyObject * value)568 insertdict_clean(register PyDictObject *mp, PyObject *key, long hash,
569 PyObject *value)
570 {
571 register size_t i;
572 register size_t perturb;
573 register size_t mask = (size_t)mp->ma_mask;
574 PyDictEntry *ep0 = mp->ma_table;
575 register PyDictEntry *ep;
576
577 MAINTAIN_TRACKING(mp, key, value);
578 i = hash & mask;
579 ep = &ep0[i];
580 for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
581 i = (i << 2) + i + perturb + 1;
582 ep = &ep0[i & mask];
583 }
584 assert(ep->me_value == NULL);
585 mp->ma_fill++;
586 ep->me_key = key;
587 ep->me_hash = (Py_ssize_t)hash;
588 ep->me_value = value;
589 mp->ma_used++;
590 }
591
592 /*
593 Restructure the table by allocating a new table and reinserting all
594 items again. When entries have been deleted, the new table may
595 actually be smaller than the old one.
596 */
597 static int
dictresize(PyDictObject * mp,Py_ssize_t minused)598 dictresize(PyDictObject *mp, Py_ssize_t minused)
599 {
600 Py_ssize_t newsize;
601 PyDictEntry *oldtable, *newtable, *ep;
602 Py_ssize_t i;
603 int is_oldtable_malloced;
604 PyDictEntry small_copy[PyDict_MINSIZE];
605
606 assert(minused >= 0);
607
608 /* Find the smallest table size > minused. */
609 for (newsize = PyDict_MINSIZE;
610 newsize <= minused && newsize > 0;
611 newsize <<= 1)
612 ;
613 if (newsize <= 0) {
614 PyErr_NoMemory();
615 return -1;
616 }
617
618 /* Get space for a new table. */
619 oldtable = mp->ma_table;
620 assert(oldtable != NULL);
621 is_oldtable_malloced = oldtable != mp->ma_smalltable;
622
623 if (newsize == PyDict_MINSIZE) {
624 /* A large table is shrinking, or we can't get any smaller. */
625 newtable = mp->ma_smalltable;
626 if (newtable == oldtable) {
627 if (mp->ma_fill == mp->ma_used) {
628 /* No dummies, so no point doing anything. */
629 return 0;
630 }
631 /* We're not going to resize it, but rebuild the
632 table anyway to purge old dummy entries.
633 Subtle: This is *necessary* if fill==size,
634 as lookdict needs at least one virgin slot to
635 terminate failing searches. If fill < size, it's
636 merely desirable, as dummies slow searches. */
637 assert(mp->ma_fill > mp->ma_used);
638 memcpy(small_copy, oldtable, sizeof(small_copy));
639 oldtable = small_copy;
640 }
641 }
642 else {
643 newtable = PyMem_NEW(PyDictEntry, newsize);
644 if (newtable == NULL) {
645 PyErr_NoMemory();
646 return -1;
647 }
648 }
649
650 /* Make the dict empty, using the new table. */
651 assert(newtable != oldtable);
652 mp->ma_table = newtable;
653 mp->ma_mask = newsize - 1;
654 memset(newtable, 0, sizeof(PyDictEntry) * newsize);
655 mp->ma_used = 0;
656 i = mp->ma_fill;
657 mp->ma_fill = 0;
658
659 /* Copy the data over; this is refcount-neutral for active entries;
660 dummy entries aren't copied over, of course */
661 for (ep = oldtable; i > 0; ep++) {
662 if (ep->me_value != NULL) { /* active entry */
663 --i;
664 insertdict_clean(mp, ep->me_key, (long)ep->me_hash,
665 ep->me_value);
666 }
667 else if (ep->me_key != NULL) { /* dummy entry */
668 --i;
669 assert(ep->me_key == dummy);
670 Py_DECREF(ep->me_key);
671 }
672 /* else key == value == NULL: nothing to do */
673 }
674
675 if (is_oldtable_malloced)
676 PyMem_DEL(oldtable);
677 return 0;
678 }
679
680 /* Create a new dictionary pre-sized to hold an estimated number of elements.
681 Underestimates are okay because the dictionary will resize as necessary.
682 Overestimates just mean the dictionary will be more sparse than usual.
683 */
684
685 PyObject *
_PyDict_NewPresized(Py_ssize_t minused)686 _PyDict_NewPresized(Py_ssize_t minused)
687 {
688 PyObject *op = PyDict_New();
689
690 if (minused>5 && op != NULL && dictresize((PyDictObject *)op, minused) == -1) {
691 Py_DECREF(op);
692 return NULL;
693 }
694 return op;
695 }
696
697 /* Note that, for historical reasons, PyDict_GetItem() suppresses all errors
698 * that may occur (originally dicts supported only string keys, and exceptions
699 * weren't possible). So, while the original intent was that a NULL return
700 * meant the key wasn't present, in reality it can mean that, or that an error
701 * (suppressed) occurred while computing the key's hash, or that some error
702 * (suppressed) occurred when comparing keys in the dict's internal probe
703 * sequence. A nasty example of the latter is when a Python-coded comparison
704 * function hits a stack-depth error, which can cause this to return NULL
705 * even if the key is present.
706 */
707 PyObject *
PyDict_GetItem(PyObject * op,PyObject * key)708 PyDict_GetItem(PyObject *op, PyObject *key)
709 {
710 long hash;
711 PyDictObject *mp = (PyDictObject *)op;
712 PyDictEntry *ep;
713 PyThreadState *tstate;
714 if (!PyDict_Check(op))
715 return NULL;
716 if (!PyString_CheckExact(key) ||
717 (hash = ((PyStringObject *) key)->ob_shash) == -1)
718 {
719 hash = PyObject_Hash(key);
720 if (hash == -1) {
721 PyErr_Clear();
722 return NULL;
723 }
724 }
725
726 /* We can arrive here with a NULL tstate during initialization: try
727 running "python -Wi" for an example related to string interning.
728 Let's just hope that no exception occurs then... This must be
729 _PyThreadState_Current and not PyThreadState_GET() because in debug
730 mode, the latter complains if tstate is NULL. */
731 tstate = _PyThreadState_Current;
732 if (tstate != NULL && tstate->curexc_type != NULL) {
733 /* preserve the existing exception */
734 PyObject *err_type, *err_value, *err_tb;
735 PyErr_Fetch(&err_type, &err_value, &err_tb);
736 ep = (mp->ma_lookup)(mp, key, hash);
737 /* ignore errors */
738 PyErr_Restore(err_type, err_value, err_tb);
739 if (ep == NULL)
740 return NULL;
741 }
742 else {
743 ep = (mp->ma_lookup)(mp, key, hash);
744 if (ep == NULL) {
745 PyErr_Clear();
746 return NULL;
747 }
748 }
749 return ep->me_value;
750 }
751
752 static int
dict_set_item_by_hash_or_entry(register PyObject * op,PyObject * key,long hash,PyDictEntry * ep,PyObject * value)753 dict_set_item_by_hash_or_entry(register PyObject *op, PyObject *key,
754 long hash, PyDictEntry *ep, PyObject *value)
755 {
756 register PyDictObject *mp;
757 register Py_ssize_t n_used;
758
759 mp = (PyDictObject *)op;
760 assert(mp->ma_fill <= mp->ma_mask); /* at least one empty slot */
761 n_used = mp->ma_used;
762 Py_INCREF(value);
763 Py_INCREF(key);
764 if (ep == NULL) {
765 if (insertdict(mp, key, hash, value) != 0)
766 return -1;
767 }
768 else {
769 if (insertdict_by_entry(mp, key, hash, ep, value) != 0)
770 return -1;
771 }
772 /* If we added a key, we can safely resize. Otherwise just return!
773 * If fill >= 2/3 size, adjust size. Normally, this doubles or
774 * quaduples the size, but it's also possible for the dict to shrink
775 * (if ma_fill is much larger than ma_used, meaning a lot of dict
776 * keys have been * deleted).
777 *
778 * Quadrupling the size improves average dictionary sparseness
779 * (reducing collisions) at the cost of some memory and iteration
780 * speed (which loops over every possible entry). It also halves
781 * the number of expensive resize operations in a growing dictionary.
782 *
783 * Very large dictionaries (over 50K items) use doubling instead.
784 * This may help applications with severe memory constraints.
785 */
786 if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
787 return 0;
788 return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
789 }
790
791 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
792 * dictionary if it's merely replacing the value for an existing key.
793 * This means that it's safe to loop over a dictionary with PyDict_Next()
794 * and occasionally replace a value -- but you can't insert new keys or
795 * remove them.
796 */
797 int
PyDict_SetItem(register PyObject * op,PyObject * key,PyObject * value)798 PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
799 {
800 register long hash;
801
802 if (!PyDict_Check(op)) {
803 PyErr_BadInternalCall();
804 return -1;
805 }
806 assert(key);
807 assert(value);
808 if (PyString_CheckExact(key)) {
809 hash = ((PyStringObject *)key)->ob_shash;
810 if (hash == -1)
811 hash = PyObject_Hash(key);
812 }
813 else {
814 hash = PyObject_Hash(key);
815 if (hash == -1)
816 return -1;
817 }
818 return dict_set_item_by_hash_or_entry(op, key, hash, NULL, value);
819 }
820
821 int
PyDict_DelItem(PyObject * op,PyObject * key)822 PyDict_DelItem(PyObject *op, PyObject *key)
823 {
824 register PyDictObject *mp;
825 register long hash;
826 register PyDictEntry *ep;
827 PyObject *old_value, *old_key;
828
829 if (!PyDict_Check(op)) {
830 PyErr_BadInternalCall();
831 return -1;
832 }
833 assert(key);
834 if (!PyString_CheckExact(key) ||
835 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
836 hash = PyObject_Hash(key);
837 if (hash == -1)
838 return -1;
839 }
840 mp = (PyDictObject *)op;
841 ep = (mp->ma_lookup)(mp, key, hash);
842 if (ep == NULL)
843 return -1;
844 if (ep->me_value == NULL) {
845 set_key_error(key);
846 return -1;
847 }
848 old_key = ep->me_key;
849 Py_INCREF(dummy);
850 ep->me_key = dummy;
851 old_value = ep->me_value;
852 ep->me_value = NULL;
853 mp->ma_used--;
854 Py_DECREF(old_value);
855 Py_DECREF(old_key);
856 return 0;
857 }
858
859 void
PyDict_Clear(PyObject * op)860 PyDict_Clear(PyObject *op)
861 {
862 PyDictObject *mp;
863 PyDictEntry *ep, *table;
864 int table_is_malloced;
865 Py_ssize_t fill;
866 PyDictEntry small_copy[PyDict_MINSIZE];
867 #ifdef Py_DEBUG
868 Py_ssize_t i, n;
869 #endif
870
871 if (!PyDict_Check(op))
872 return;
873 mp = (PyDictObject *)op;
874 #ifdef Py_DEBUG
875 n = mp->ma_mask + 1;
876 i = 0;
877 #endif
878
879 table = mp->ma_table;
880 assert(table != NULL);
881 table_is_malloced = table != mp->ma_smalltable;
882
883 /* This is delicate. During the process of clearing the dict,
884 * decrefs can cause the dict to mutate. To avoid fatal confusion
885 * (voice of experience), we have to make the dict empty before
886 * clearing the slots, and never refer to anything via mp->xxx while
887 * clearing.
888 */
889 fill = mp->ma_fill;
890 if (table_is_malloced)
891 EMPTY_TO_MINSIZE(mp);
892
893 else if (fill > 0) {
894 /* It's a small table with something that needs to be cleared.
895 * Afraid the only safe way is to copy the dict entries into
896 * another small table first.
897 */
898 memcpy(small_copy, table, sizeof(small_copy));
899 table = small_copy;
900 EMPTY_TO_MINSIZE(mp);
901 }
902 /* else it's a small table that's already empty */
903
904 /* Now we can finally clear things. If C had refcounts, we could
905 * assert that the refcount on table is 1 now, i.e. that this function
906 * has unique access to it, so decref side-effects can't alter it.
907 */
908 for (ep = table; fill > 0; ++ep) {
909 #ifdef Py_DEBUG
910 assert(i < n);
911 ++i;
912 #endif
913 if (ep->me_key) {
914 --fill;
915 Py_DECREF(ep->me_key);
916 Py_XDECREF(ep->me_value);
917 }
918 #ifdef Py_DEBUG
919 else
920 assert(ep->me_value == NULL);
921 #endif
922 }
923
924 if (table_is_malloced)
925 PyMem_DEL(table);
926 }
927
928 /*
929 * Iterate over a dict. Use like so:
930 *
931 * Py_ssize_t i;
932 * PyObject *key, *value;
933 * i = 0; # important! i should not otherwise be changed by you
934 * while (PyDict_Next(yourdict, &i, &key, &value)) {
935 * Refer to borrowed references in key and value.
936 * }
937 *
938 * CAUTION: In general, it isn't safe to use PyDict_Next in a loop that
939 * mutates the dict. One exception: it is safe if the loop merely changes
940 * the values associated with the keys (but doesn't insert new keys or
941 * delete keys), via PyDict_SetItem().
942 */
943 int
PyDict_Next(PyObject * op,Py_ssize_t * ppos,PyObject ** pkey,PyObject ** pvalue)944 PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
945 {
946 register Py_ssize_t i;
947 register Py_ssize_t mask;
948 register PyDictEntry *ep;
949
950 if (!PyDict_Check(op))
951 return 0;
952 i = *ppos;
953 if (i < 0)
954 return 0;
955 ep = ((PyDictObject *)op)->ma_table;
956 mask = ((PyDictObject *)op)->ma_mask;
957 while (i <= mask && ep[i].me_value == NULL)
958 i++;
959 *ppos = i+1;
960 if (i > mask)
961 return 0;
962 if (pkey)
963 *pkey = ep[i].me_key;
964 if (pvalue)
965 *pvalue = ep[i].me_value;
966 return 1;
967 }
968
969 /* Internal version of PyDict_Next that returns a hash value in addition to the key and value.*/
970 int
_PyDict_Next(PyObject * op,Py_ssize_t * ppos,PyObject ** pkey,PyObject ** pvalue,long * phash)971 _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue, long *phash)
972 {
973 register Py_ssize_t i;
974 register Py_ssize_t mask;
975 register PyDictEntry *ep;
976
977 if (!PyDict_Check(op))
978 return 0;
979 i = *ppos;
980 if (i < 0)
981 return 0;
982 ep = ((PyDictObject *)op)->ma_table;
983 mask = ((PyDictObject *)op)->ma_mask;
984 while (i <= mask && ep[i].me_value == NULL)
985 i++;
986 *ppos = i+1;
987 if (i > mask)
988 return 0;
989 *phash = (long)(ep[i].me_hash);
990 if (pkey)
991 *pkey = ep[i].me_key;
992 if (pvalue)
993 *pvalue = ep[i].me_value;
994 return 1;
995 }
996
997 /* Methods */
998
999 static void
dict_dealloc(register PyDictObject * mp)1000 dict_dealloc(register PyDictObject *mp)
1001 {
1002 register PyDictEntry *ep;
1003 Py_ssize_t fill = mp->ma_fill;
1004 PyObject_GC_UnTrack(mp);
1005 Py_TRASHCAN_SAFE_BEGIN(mp)
1006 for (ep = mp->ma_table; fill > 0; ep++) {
1007 if (ep->me_key) {
1008 --fill;
1009 Py_DECREF(ep->me_key);
1010 Py_XDECREF(ep->me_value);
1011 }
1012 }
1013 if (mp->ma_table != mp->ma_smalltable)
1014 PyMem_DEL(mp->ma_table);
1015 if (numfree < PyDict_MAXFREELIST && Py_TYPE(mp) == &PyDict_Type)
1016 free_list[numfree++] = mp;
1017 else
1018 Py_TYPE(mp)->tp_free((PyObject *)mp);
1019 Py_TRASHCAN_SAFE_END(mp)
1020 }
1021
1022 static int
dict_print(register PyDictObject * mp,register FILE * fp,register int flags)1023 dict_print(register PyDictObject *mp, register FILE *fp, register int flags)
1024 {
1025 register Py_ssize_t i;
1026 register Py_ssize_t any;
1027 int status;
1028
1029 status = Py_ReprEnter((PyObject*)mp);
1030 if (status != 0) {
1031 if (status < 0)
1032 return status;
1033 Py_BEGIN_ALLOW_THREADS
1034 fprintf(fp, "{...}");
1035 Py_END_ALLOW_THREADS
1036 return 0;
1037 }
1038
1039 Py_BEGIN_ALLOW_THREADS
1040 fprintf(fp, "{");
1041 Py_END_ALLOW_THREADS
1042 any = 0;
1043 for (i = 0; i <= mp->ma_mask; i++) {
1044 PyDictEntry *ep = mp->ma_table + i;
1045 PyObject *pvalue = ep->me_value;
1046 if (pvalue != NULL) {
1047 /* Prevent PyObject_Repr from deleting value during
1048 key format */
1049 Py_INCREF(pvalue);
1050 if (any++ > 0) {
1051 Py_BEGIN_ALLOW_THREADS
1052 fprintf(fp, ", ");
1053 Py_END_ALLOW_THREADS
1054 }
1055 if (PyObject_Print((PyObject *)ep->me_key, fp, 0)!=0) {
1056 Py_DECREF(pvalue);
1057 Py_ReprLeave((PyObject*)mp);
1058 return -1;
1059 }
1060 Py_BEGIN_ALLOW_THREADS
1061 fprintf(fp, ": ");
1062 Py_END_ALLOW_THREADS
1063 if (PyObject_Print(pvalue, fp, 0) != 0) {
1064 Py_DECREF(pvalue);
1065 Py_ReprLeave((PyObject*)mp);
1066 return -1;
1067 }
1068 Py_DECREF(pvalue);
1069 }
1070 }
1071 Py_BEGIN_ALLOW_THREADS
1072 fprintf(fp, "}");
1073 Py_END_ALLOW_THREADS
1074 Py_ReprLeave((PyObject*)mp);
1075 return 0;
1076 }
1077
1078 static PyObject *
dict_repr(PyDictObject * mp)1079 dict_repr(PyDictObject *mp)
1080 {
1081 Py_ssize_t i;
1082 PyObject *s, *temp, *colon = NULL;
1083 PyObject *pieces = NULL, *result = NULL;
1084 PyObject *key, *value;
1085
1086 i = Py_ReprEnter((PyObject *)mp);
1087 if (i != 0) {
1088 return i > 0 ? PyString_FromString("{...}") : NULL;
1089 }
1090
1091 if (mp->ma_used == 0) {
1092 result = PyString_FromString("{}");
1093 goto Done;
1094 }
1095
1096 pieces = PyList_New(0);
1097 if (pieces == NULL)
1098 goto Done;
1099
1100 colon = PyString_FromString(": ");
1101 if (colon == NULL)
1102 goto Done;
1103
1104 /* Do repr() on each key+value pair, and insert ": " between them.
1105 Note that repr may mutate the dict. */
1106 i = 0;
1107 while (PyDict_Next((PyObject *)mp, &i, &key, &value)) {
1108 int status;
1109 /* Prevent repr from deleting value during key format. */
1110 Py_INCREF(value);
1111 s = PyObject_Repr(key);
1112 PyString_Concat(&s, colon);
1113 PyString_ConcatAndDel(&s, PyObject_Repr(value));
1114 Py_DECREF(value);
1115 if (s == NULL)
1116 goto Done;
1117 status = PyList_Append(pieces, s);
1118 Py_DECREF(s); /* append created a new ref */
1119 if (status < 0)
1120 goto Done;
1121 }
1122
1123 /* Add "{}" decorations to the first and last items. */
1124 assert(PyList_GET_SIZE(pieces) > 0);
1125 s = PyString_FromString("{");
1126 if (s == NULL)
1127 goto Done;
1128 temp = PyList_GET_ITEM(pieces, 0);
1129 PyString_ConcatAndDel(&s, temp);
1130 PyList_SET_ITEM(pieces, 0, s);
1131 if (s == NULL)
1132 goto Done;
1133
1134 s = PyString_FromString("}");
1135 if (s == NULL)
1136 goto Done;
1137 temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
1138 PyString_ConcatAndDel(&temp, s);
1139 PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
1140 if (temp == NULL)
1141 goto Done;
1142
1143 /* Paste them all together with ", " between. */
1144 s = PyString_FromString(", ");
1145 if (s == NULL)
1146 goto Done;
1147 result = _PyString_Join(s, pieces);
1148 Py_DECREF(s);
1149
1150 Done:
1151 Py_XDECREF(pieces);
1152 Py_XDECREF(colon);
1153 Py_ReprLeave((PyObject *)mp);
1154 return result;
1155 }
1156
1157 static Py_ssize_t
dict_length(PyDictObject * mp)1158 dict_length(PyDictObject *mp)
1159 {
1160 return mp->ma_used;
1161 }
1162
1163 static PyObject *
dict_subscript(PyDictObject * mp,register PyObject * key)1164 dict_subscript(PyDictObject *mp, register PyObject *key)
1165 {
1166 PyObject *v;
1167 long hash;
1168 PyDictEntry *ep;
1169 assert(mp->ma_table != NULL);
1170 if (!PyString_CheckExact(key) ||
1171 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1172 hash = PyObject_Hash(key);
1173 if (hash == -1)
1174 return NULL;
1175 }
1176 ep = (mp->ma_lookup)(mp, key, hash);
1177 if (ep == NULL)
1178 return NULL;
1179 v = ep->me_value;
1180 if (v == NULL) {
1181 if (!PyDict_CheckExact(mp)) {
1182 /* Look up __missing__ method if we're a subclass. */
1183 PyObject *missing, *res;
1184 static PyObject *missing_str = NULL;
1185 missing = _PyObject_LookupSpecial((PyObject *)mp,
1186 "__missing__",
1187 &missing_str);
1188 if (missing != NULL) {
1189 res = PyObject_CallFunctionObjArgs(missing,
1190 key, NULL);
1191 Py_DECREF(missing);
1192 return res;
1193 }
1194 else if (PyErr_Occurred())
1195 return NULL;
1196 }
1197 set_key_error(key);
1198 return NULL;
1199 }
1200 else
1201 Py_INCREF(v);
1202 return v;
1203 }
1204
1205 static int
dict_ass_sub(PyDictObject * mp,PyObject * v,PyObject * w)1206 dict_ass_sub(PyDictObject *mp, PyObject *v, PyObject *w)
1207 {
1208 if (w == NULL)
1209 return PyDict_DelItem((PyObject *)mp, v);
1210 else
1211 return PyDict_SetItem((PyObject *)mp, v, w);
1212 }
1213
1214 static PyMappingMethods dict_as_mapping = {
1215 (lenfunc)dict_length, /*mp_length*/
1216 (binaryfunc)dict_subscript, /*mp_subscript*/
1217 (objobjargproc)dict_ass_sub, /*mp_ass_subscript*/
1218 };
1219
1220 static PyObject *
dict_keys(register PyDictObject * mp)1221 dict_keys(register PyDictObject *mp)
1222 {
1223 register PyObject *v;
1224 register Py_ssize_t i, j;
1225 PyDictEntry *ep;
1226 Py_ssize_t mask, n;
1227
1228 again:
1229 n = mp->ma_used;
1230 v = PyList_New(n);
1231 if (v == NULL)
1232 return NULL;
1233 if (n != mp->ma_used) {
1234 /* Durnit. The allocations caused the dict to resize.
1235 * Just start over, this shouldn't normally happen.
1236 */
1237 Py_DECREF(v);
1238 goto again;
1239 }
1240 ep = mp->ma_table;
1241 mask = mp->ma_mask;
1242 for (i = 0, j = 0; i <= mask; i++) {
1243 if (ep[i].me_value != NULL) {
1244 PyObject *key = ep[i].me_key;
1245 Py_INCREF(key);
1246 PyList_SET_ITEM(v, j, key);
1247 j++;
1248 }
1249 }
1250 assert(j == n);
1251 return v;
1252 }
1253
1254 static PyObject *
dict_values(register PyDictObject * mp)1255 dict_values(register PyDictObject *mp)
1256 {
1257 register PyObject *v;
1258 register Py_ssize_t i, j;
1259 PyDictEntry *ep;
1260 Py_ssize_t mask, n;
1261
1262 again:
1263 n = mp->ma_used;
1264 v = PyList_New(n);
1265 if (v == NULL)
1266 return NULL;
1267 if (n != mp->ma_used) {
1268 /* Durnit. The allocations caused the dict to resize.
1269 * Just start over, this shouldn't normally happen.
1270 */
1271 Py_DECREF(v);
1272 goto again;
1273 }
1274 ep = mp->ma_table;
1275 mask = mp->ma_mask;
1276 for (i = 0, j = 0; i <= mask; i++) {
1277 if (ep[i].me_value != NULL) {
1278 PyObject *value = ep[i].me_value;
1279 Py_INCREF(value);
1280 PyList_SET_ITEM(v, j, value);
1281 j++;
1282 }
1283 }
1284 assert(j == n);
1285 return v;
1286 }
1287
1288 static PyObject *
dict_items(register PyDictObject * mp)1289 dict_items(register PyDictObject *mp)
1290 {
1291 register PyObject *v;
1292 register Py_ssize_t i, j, n;
1293 Py_ssize_t mask;
1294 PyObject *item, *key, *value;
1295 PyDictEntry *ep;
1296
1297 /* Preallocate the list of tuples, to avoid allocations during
1298 * the loop over the items, which could trigger GC, which
1299 * could resize the dict. :-(
1300 */
1301 again:
1302 n = mp->ma_used;
1303 v = PyList_New(n);
1304 if (v == NULL)
1305 return NULL;
1306 for (i = 0; i < n; i++) {
1307 item = PyTuple_New(2);
1308 if (item == NULL) {
1309 Py_DECREF(v);
1310 return NULL;
1311 }
1312 PyList_SET_ITEM(v, i, item);
1313 }
1314 if (n != mp->ma_used) {
1315 /* Durnit. The allocations caused the dict to resize.
1316 * Just start over, this shouldn't normally happen.
1317 */
1318 Py_DECREF(v);
1319 goto again;
1320 }
1321 /* Nothing we do below makes any function calls. */
1322 ep = mp->ma_table;
1323 mask = mp->ma_mask;
1324 for (i = 0, j = 0; i <= mask; i++) {
1325 if ((value=ep[i].me_value) != NULL) {
1326 key = ep[i].me_key;
1327 item = PyList_GET_ITEM(v, j);
1328 Py_INCREF(key);
1329 PyTuple_SET_ITEM(item, 0, key);
1330 Py_INCREF(value);
1331 PyTuple_SET_ITEM(item, 1, value);
1332 j++;
1333 }
1334 }
1335 assert(j == n);
1336 return v;
1337 }
1338
1339 static PyObject *
dict_fromkeys(PyObject * cls,PyObject * args)1340 dict_fromkeys(PyObject *cls, PyObject *args)
1341 {
1342 PyObject *seq;
1343 PyObject *value = Py_None;
1344 PyObject *it; /* iter(seq) */
1345 PyObject *key;
1346 PyObject *d;
1347 int status;
1348
1349 if (!PyArg_UnpackTuple(args, "fromkeys", 1, 2, &seq, &value))
1350 return NULL;
1351
1352 d = PyObject_CallObject(cls, NULL);
1353 if (d == NULL)
1354 return NULL;
1355
1356 if (PyDict_CheckExact(d) && ((PyDictObject *)d)->ma_used == 0) {
1357 if (PyDict_CheckExact(seq)) {
1358 PyDictObject *mp = (PyDictObject *)d;
1359 PyObject *oldvalue;
1360 Py_ssize_t pos = 0;
1361 PyObject *key;
1362 long hash;
1363
1364 if (dictresize(mp, Py_SIZE(seq))) {
1365 Py_DECREF(d);
1366 return NULL;
1367 }
1368
1369 while (_PyDict_Next(seq, &pos, &key, &oldvalue, &hash)) {
1370 Py_INCREF(key);
1371 Py_INCREF(value);
1372 if (insertdict(mp, key, hash, value)) {
1373 Py_DECREF(d);
1374 return NULL;
1375 }
1376 }
1377 return d;
1378 }
1379 if (PyAnySet_CheckExact(seq)) {
1380 PyDictObject *mp = (PyDictObject *)d;
1381 Py_ssize_t pos = 0;
1382 PyObject *key;
1383 long hash;
1384
1385 if (dictresize(mp, PySet_GET_SIZE(seq))) {
1386 Py_DECREF(d);
1387 return NULL;
1388 }
1389
1390 while (_PySet_NextEntry(seq, &pos, &key, &hash)) {
1391 Py_INCREF(key);
1392 Py_INCREF(value);
1393 if (insertdict(mp, key, hash, value)) {
1394 Py_DECREF(d);
1395 return NULL;
1396 }
1397 }
1398 return d;
1399 }
1400 }
1401
1402 it = PyObject_GetIter(seq);
1403 if (it == NULL){
1404 Py_DECREF(d);
1405 return NULL;
1406 }
1407
1408 if (PyDict_CheckExact(d)) {
1409 while ((key = PyIter_Next(it)) != NULL) {
1410 status = PyDict_SetItem(d, key, value);
1411 Py_DECREF(key);
1412 if (status < 0)
1413 goto Fail;
1414 }
1415 } else {
1416 while ((key = PyIter_Next(it)) != NULL) {
1417 status = PyObject_SetItem(d, key, value);
1418 Py_DECREF(key);
1419 if (status < 0)
1420 goto Fail;
1421 }
1422 }
1423
1424 if (PyErr_Occurred())
1425 goto Fail;
1426 Py_DECREF(it);
1427 return d;
1428
1429 Fail:
1430 Py_DECREF(it);
1431 Py_DECREF(d);
1432 return NULL;
1433 }
1434
1435 static int
dict_update_common(PyObject * self,PyObject * args,PyObject * kwds,char * methname)1436 dict_update_common(PyObject *self, PyObject *args, PyObject *kwds, char *methname)
1437 {
1438 PyObject *arg = NULL;
1439 int result = 0;
1440
1441 if (!PyArg_UnpackTuple(args, methname, 0, 1, &arg))
1442 result = -1;
1443
1444 else if (arg != NULL) {
1445 if (PyObject_HasAttrString(arg, "keys"))
1446 result = PyDict_Merge(self, arg, 1);
1447 else
1448 result = PyDict_MergeFromSeq2(self, arg, 1);
1449 }
1450 if (result == 0 && kwds != NULL)
1451 result = PyDict_Merge(self, kwds, 1);
1452 return result;
1453 }
1454
1455 static PyObject *
dict_update(PyObject * self,PyObject * args,PyObject * kwds)1456 dict_update(PyObject *self, PyObject *args, PyObject *kwds)
1457 {
1458 if (dict_update_common(self, args, kwds, "update") != -1)
1459 Py_RETURN_NONE;
1460 return NULL;
1461 }
1462
1463 /* Update unconditionally replaces existing items.
1464 Merge has a 3rd argument 'override'; if set, it acts like Update,
1465 otherwise it leaves existing items unchanged.
1466
1467 PyDict_{Update,Merge} update/merge from a mapping object.
1468
1469 PyDict_MergeFromSeq2 updates/merges from any iterable object
1470 producing iterable objects of length 2.
1471 */
1472
1473 int
PyDict_MergeFromSeq2(PyObject * d,PyObject * seq2,int override)1474 PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
1475 {
1476 PyObject *it; /* iter(seq2) */
1477 Py_ssize_t i; /* index into seq2 of current element */
1478 PyObject *item; /* seq2[i] */
1479 PyObject *fast; /* item as a 2-tuple or 2-list */
1480
1481 assert(d != NULL);
1482 assert(PyDict_Check(d));
1483 assert(seq2 != NULL);
1484
1485 it = PyObject_GetIter(seq2);
1486 if (it == NULL)
1487 return -1;
1488
1489 for (i = 0; ; ++i) {
1490 PyObject *key, *value;
1491 Py_ssize_t n;
1492
1493 fast = NULL;
1494 item = PyIter_Next(it);
1495 if (item == NULL) {
1496 if (PyErr_Occurred())
1497 goto Fail;
1498 break;
1499 }
1500
1501 /* Convert item to sequence, and verify length 2. */
1502 fast = PySequence_Fast(item, "");
1503 if (fast == NULL) {
1504 if (PyErr_ExceptionMatches(PyExc_TypeError))
1505 PyErr_Format(PyExc_TypeError,
1506 "cannot convert dictionary update "
1507 "sequence element #%zd to a sequence",
1508 i);
1509 goto Fail;
1510 }
1511 n = PySequence_Fast_GET_SIZE(fast);
1512 if (n != 2) {
1513 PyErr_Format(PyExc_ValueError,
1514 "dictionary update sequence element #%zd "
1515 "has length %zd; 2 is required",
1516 i, n);
1517 goto Fail;
1518 }
1519
1520 /* Update/merge with this (key, value) pair. */
1521 key = PySequence_Fast_GET_ITEM(fast, 0);
1522 value = PySequence_Fast_GET_ITEM(fast, 1);
1523 if (override || PyDict_GetItem(d, key) == NULL) {
1524 int status = PyDict_SetItem(d, key, value);
1525 if (status < 0)
1526 goto Fail;
1527 }
1528 Py_DECREF(fast);
1529 Py_DECREF(item);
1530 }
1531
1532 i = 0;
1533 goto Return;
1534 Fail:
1535 Py_XDECREF(item);
1536 Py_XDECREF(fast);
1537 i = -1;
1538 Return:
1539 Py_DECREF(it);
1540 return Py_SAFE_DOWNCAST(i, Py_ssize_t, int);
1541 }
1542
1543 int
PyDict_Update(PyObject * a,PyObject * b)1544 PyDict_Update(PyObject *a, PyObject *b)
1545 {
1546 return PyDict_Merge(a, b, 1);
1547 }
1548
1549 int
PyDict_Merge(PyObject * a,PyObject * b,int override)1550 PyDict_Merge(PyObject *a, PyObject *b, int override)
1551 {
1552 register PyDictObject *mp, *other;
1553 register Py_ssize_t i;
1554 PyDictEntry *entry;
1555
1556 /* We accept for the argument either a concrete dictionary object,
1557 * or an abstract "mapping" object. For the former, we can do
1558 * things quite efficiently. For the latter, we only require that
1559 * PyMapping_Keys() and PyObject_GetItem() be supported.
1560 */
1561 if (a == NULL || !PyDict_Check(a) || b == NULL) {
1562 PyErr_BadInternalCall();
1563 return -1;
1564 }
1565 mp = (PyDictObject*)a;
1566 if (PyDict_Check(b)) {
1567 other = (PyDictObject*)b;
1568 if (other == mp || other->ma_used == 0)
1569 /* a.update(a) or a.update({}); nothing to do */
1570 return 0;
1571 if (mp->ma_used == 0)
1572 /* Since the target dict is empty, PyDict_GetItem()
1573 * always returns NULL. Setting override to 1
1574 * skips the unnecessary test.
1575 */
1576 override = 1;
1577 /* Do one big resize at the start, rather than
1578 * incrementally resizing as we insert new items. Expect
1579 * that there will be no (or few) overlapping keys.
1580 */
1581 if ((mp->ma_fill + other->ma_used)*3 >= (mp->ma_mask+1)*2) {
1582 if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0)
1583 return -1;
1584 }
1585 for (i = 0; i <= other->ma_mask; i++) {
1586 entry = &other->ma_table[i];
1587 if (entry->me_value != NULL &&
1588 (override ||
1589 PyDict_GetItem(a, entry->me_key) == NULL)) {
1590 Py_INCREF(entry->me_key);
1591 Py_INCREF(entry->me_value);
1592 if (insertdict(mp, entry->me_key,
1593 (long)entry->me_hash,
1594 entry->me_value) != 0)
1595 return -1;
1596 }
1597 }
1598 }
1599 else {
1600 /* Do it the generic, slower way */
1601 PyObject *keys = PyMapping_Keys(b);
1602 PyObject *iter;
1603 PyObject *key, *value;
1604 int status;
1605
1606 if (keys == NULL)
1607 /* Docstring says this is equivalent to E.keys() so
1608 * if E doesn't have a .keys() method we want
1609 * AttributeError to percolate up. Might as well
1610 * do the same for any other error.
1611 */
1612 return -1;
1613
1614 iter = PyObject_GetIter(keys);
1615 Py_DECREF(keys);
1616 if (iter == NULL)
1617 return -1;
1618
1619 for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) {
1620 if (!override && PyDict_GetItem(a, key) != NULL) {
1621 Py_DECREF(key);
1622 continue;
1623 }
1624 value = PyObject_GetItem(b, key);
1625 if (value == NULL) {
1626 Py_DECREF(iter);
1627 Py_DECREF(key);
1628 return -1;
1629 }
1630 status = PyDict_SetItem(a, key, value);
1631 Py_DECREF(key);
1632 Py_DECREF(value);
1633 if (status < 0) {
1634 Py_DECREF(iter);
1635 return -1;
1636 }
1637 }
1638 Py_DECREF(iter);
1639 if (PyErr_Occurred())
1640 /* Iterator completed, via error */
1641 return -1;
1642 }
1643 return 0;
1644 }
1645
1646 static PyObject *
dict_copy(register PyDictObject * mp)1647 dict_copy(register PyDictObject *mp)
1648 {
1649 return PyDict_Copy((PyObject*)mp);
1650 }
1651
1652 PyObject *
PyDict_Copy(PyObject * o)1653 PyDict_Copy(PyObject *o)
1654 {
1655 PyObject *copy;
1656
1657 if (o == NULL || !PyDict_Check(o)) {
1658 PyErr_BadInternalCall();
1659 return NULL;
1660 }
1661 copy = PyDict_New();
1662 if (copy == NULL)
1663 return NULL;
1664 if (PyDict_Merge(copy, o, 1) == 0)
1665 return copy;
1666 Py_DECREF(copy);
1667 return NULL;
1668 }
1669
1670 Py_ssize_t
PyDict_Size(PyObject * mp)1671 PyDict_Size(PyObject *mp)
1672 {
1673 if (mp == NULL || !PyDict_Check(mp)) {
1674 PyErr_BadInternalCall();
1675 return -1;
1676 }
1677 return ((PyDictObject *)mp)->ma_used;
1678 }
1679
1680 PyObject *
PyDict_Keys(PyObject * mp)1681 PyDict_Keys(PyObject *mp)
1682 {
1683 if (mp == NULL || !PyDict_Check(mp)) {
1684 PyErr_BadInternalCall();
1685 return NULL;
1686 }
1687 return dict_keys((PyDictObject *)mp);
1688 }
1689
1690 PyObject *
PyDict_Values(PyObject * mp)1691 PyDict_Values(PyObject *mp)
1692 {
1693 if (mp == NULL || !PyDict_Check(mp)) {
1694 PyErr_BadInternalCall();
1695 return NULL;
1696 }
1697 return dict_values((PyDictObject *)mp);
1698 }
1699
1700 PyObject *
PyDict_Items(PyObject * mp)1701 PyDict_Items(PyObject *mp)
1702 {
1703 if (mp == NULL || !PyDict_Check(mp)) {
1704 PyErr_BadInternalCall();
1705 return NULL;
1706 }
1707 return dict_items((PyDictObject *)mp);
1708 }
1709
1710 /* Subroutine which returns the smallest key in a for which b's value
1711 is different or absent. The value is returned too, through the
1712 pval argument. Both are NULL if no key in a is found for which b's status
1713 differs. The refcounts on (and only on) non-NULL *pval and function return
1714 values must be decremented by the caller (characterize() increments them
1715 to ensure that mutating comparison and PyDict_GetItem calls can't delete
1716 them before the caller is done looking at them). */
1717
1718 static PyObject *
characterize(PyDictObject * a,PyDictObject * b,PyObject ** pval)1719 characterize(PyDictObject *a, PyDictObject *b, PyObject **pval)
1720 {
1721 PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */
1722 PyObject *aval = NULL; /* a[akey] */
1723 Py_ssize_t i;
1724 int cmp;
1725
1726 for (i = 0; i <= a->ma_mask; i++) {
1727 PyObject *thiskey, *thisaval, *thisbval;
1728 if (a->ma_table[i].me_value == NULL)
1729 continue;
1730 thiskey = a->ma_table[i].me_key;
1731 Py_INCREF(thiskey); /* keep alive across compares */
1732 if (akey != NULL) {
1733 cmp = PyObject_RichCompareBool(akey, thiskey, Py_LT);
1734 if (cmp < 0) {
1735 Py_DECREF(thiskey);
1736 goto Fail;
1737 }
1738 if (cmp > 0 ||
1739 i > a->ma_mask ||
1740 a->ma_table[i].me_value == NULL)
1741 {
1742 /* Not the *smallest* a key; or maybe it is
1743 * but the compare shrunk the dict so we can't
1744 * find its associated value anymore; or
1745 * maybe it is but the compare deleted the
1746 * a[thiskey] entry.
1747 */
1748 Py_DECREF(thiskey);
1749 continue;
1750 }
1751 }
1752
1753 /* Compare a[thiskey] to b[thiskey]; cmp <- true iff equal. */
1754 thisaval = a->ma_table[i].me_value;
1755 assert(thisaval);
1756 Py_INCREF(thisaval); /* keep alive */
1757 thisbval = PyDict_GetItem((PyObject *)b, thiskey);
1758 if (thisbval == NULL)
1759 cmp = 0;
1760 else {
1761 /* both dicts have thiskey: same values? */
1762 cmp = PyObject_RichCompareBool(
1763 thisaval, thisbval, Py_EQ);
1764 if (cmp < 0) {
1765 Py_DECREF(thiskey);
1766 Py_DECREF(thisaval);
1767 goto Fail;
1768 }
1769 }
1770 if (cmp == 0) {
1771 /* New winner. */
1772 Py_XDECREF(akey);
1773 Py_XDECREF(aval);
1774 akey = thiskey;
1775 aval = thisaval;
1776 }
1777 else {
1778 Py_DECREF(thiskey);
1779 Py_DECREF(thisaval);
1780 }
1781 }
1782 *pval = aval;
1783 return akey;
1784
1785 Fail:
1786 Py_XDECREF(akey);
1787 Py_XDECREF(aval);
1788 *pval = NULL;
1789 return NULL;
1790 }
1791
1792 static int
dict_compare(PyDictObject * a,PyDictObject * b)1793 dict_compare(PyDictObject *a, PyDictObject *b)
1794 {
1795 PyObject *adiff, *bdiff, *aval, *bval;
1796 int res;
1797
1798 /* Compare lengths first */
1799 if (a->ma_used < b->ma_used)
1800 return -1; /* a is shorter */
1801 else if (a->ma_used > b->ma_used)
1802 return 1; /* b is shorter */
1803
1804 /* Same length -- check all keys */
1805 bdiff = bval = NULL;
1806 adiff = characterize(a, b, &aval);
1807 if (adiff == NULL) {
1808 assert(!aval);
1809 /* Either an error, or a is a subset with the same length so
1810 * must be equal.
1811 */
1812 res = PyErr_Occurred() ? -1 : 0;
1813 goto Finished;
1814 }
1815 bdiff = characterize(b, a, &bval);
1816 if (bdiff == NULL && PyErr_Occurred()) {
1817 assert(!bval);
1818 res = -1;
1819 goto Finished;
1820 }
1821 res = 0;
1822 if (bdiff) {
1823 /* bdiff == NULL "should be" impossible now, but perhaps
1824 * the last comparison done by the characterize() on a had
1825 * the side effect of making the dicts equal!
1826 */
1827 res = PyObject_Compare(adiff, bdiff);
1828 }
1829 if (res == 0 && bval != NULL)
1830 res = PyObject_Compare(aval, bval);
1831
1832 Finished:
1833 Py_XDECREF(adiff);
1834 Py_XDECREF(bdiff);
1835 Py_XDECREF(aval);
1836 Py_XDECREF(bval);
1837 return res;
1838 }
1839
1840 /* Return 1 if dicts equal, 0 if not, -1 if error.
1841 * Gets out as soon as any difference is detected.
1842 * Uses only Py_EQ comparison.
1843 */
1844 static int
dict_equal(PyDictObject * a,PyDictObject * b)1845 dict_equal(PyDictObject *a, PyDictObject *b)
1846 {
1847 Py_ssize_t i;
1848
1849 if (a->ma_used != b->ma_used)
1850 /* can't be equal if # of entries differ */
1851 return 0;
1852
1853 /* Same # of entries -- check all of 'em. Exit early on any diff. */
1854 for (i = 0; i <= a->ma_mask; i++) {
1855 PyObject *aval = a->ma_table[i].me_value;
1856 if (aval != NULL) {
1857 int cmp;
1858 PyObject *bval;
1859 PyObject *key = a->ma_table[i].me_key;
1860 /* temporarily bump aval's refcount to ensure it stays
1861 alive until we're done with it */
1862 Py_INCREF(aval);
1863 /* ditto for key */
1864 Py_INCREF(key);
1865 bval = PyDict_GetItem((PyObject *)b, key);
1866 Py_DECREF(key);
1867 if (bval == NULL) {
1868 Py_DECREF(aval);
1869 return 0;
1870 }
1871 cmp = PyObject_RichCompareBool(aval, bval, Py_EQ);
1872 Py_DECREF(aval);
1873 if (cmp <= 0) /* error or not equal */
1874 return cmp;
1875 }
1876 }
1877 return 1;
1878 }
1879
1880 static PyObject *
dict_richcompare(PyObject * v,PyObject * w,int op)1881 dict_richcompare(PyObject *v, PyObject *w, int op)
1882 {
1883 int cmp;
1884 PyObject *res;
1885
1886 if (!PyDict_Check(v) || !PyDict_Check(w)) {
1887 res = Py_NotImplemented;
1888 }
1889 else if (op == Py_EQ || op == Py_NE) {
1890 cmp = dict_equal((PyDictObject *)v, (PyDictObject *)w);
1891 if (cmp < 0)
1892 return NULL;
1893 res = (cmp == (op == Py_EQ)) ? Py_True : Py_False;
1894 }
1895 else {
1896 /* Py3K warning if comparison isn't == or != */
1897 if (PyErr_WarnPy3k("dict inequality comparisons not supported "
1898 "in 3.x", 1) < 0) {
1899 return NULL;
1900 }
1901 res = Py_NotImplemented;
1902 }
1903 Py_INCREF(res);
1904 return res;
1905 }
1906
1907 static PyObject *
dict_contains(register PyDictObject * mp,PyObject * key)1908 dict_contains(register PyDictObject *mp, PyObject *key)
1909 {
1910 long hash;
1911 PyDictEntry *ep;
1912
1913 if (!PyString_CheckExact(key) ||
1914 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1915 hash = PyObject_Hash(key);
1916 if (hash == -1)
1917 return NULL;
1918 }
1919 ep = (mp->ma_lookup)(mp, key, hash);
1920 if (ep == NULL)
1921 return NULL;
1922 return PyBool_FromLong(ep->me_value != NULL);
1923 }
1924
1925 static PyObject *
dict_has_key(register PyDictObject * mp,PyObject * key)1926 dict_has_key(register PyDictObject *mp, PyObject *key)
1927 {
1928 if (PyErr_WarnPy3k("dict.has_key() not supported in 3.x; "
1929 "use the in operator", 1) < 0)
1930 return NULL;
1931 return dict_contains(mp, key);
1932 }
1933
1934 static PyObject *
dict_get(register PyDictObject * mp,PyObject * args)1935 dict_get(register PyDictObject *mp, PyObject *args)
1936 {
1937 PyObject *key;
1938 PyObject *failobj = Py_None;
1939 PyObject *val = NULL;
1940 long hash;
1941 PyDictEntry *ep;
1942
1943 if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
1944 return NULL;
1945
1946 if (!PyString_CheckExact(key) ||
1947 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1948 hash = PyObject_Hash(key);
1949 if (hash == -1)
1950 return NULL;
1951 }
1952 ep = (mp->ma_lookup)(mp, key, hash);
1953 if (ep == NULL)
1954 return NULL;
1955 val = ep->me_value;
1956 if (val == NULL)
1957 val = failobj;
1958 Py_INCREF(val);
1959 return val;
1960 }
1961
1962
1963 static PyObject *
dict_setdefault(register PyDictObject * mp,PyObject * args)1964 dict_setdefault(register PyDictObject *mp, PyObject *args)
1965 {
1966 PyObject *key;
1967 PyObject *failobj = Py_None;
1968 PyObject *val = NULL;
1969 long hash;
1970 PyDictEntry *ep;
1971
1972 if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj))
1973 return NULL;
1974
1975 if (!PyString_CheckExact(key) ||
1976 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
1977 hash = PyObject_Hash(key);
1978 if (hash == -1)
1979 return NULL;
1980 }
1981 ep = (mp->ma_lookup)(mp, key, hash);
1982 if (ep == NULL)
1983 return NULL;
1984 val = ep->me_value;
1985 if (val == NULL) {
1986 if (dict_set_item_by_hash_or_entry((PyObject*)mp, key, hash, ep,
1987 failobj) == 0)
1988 val = failobj;
1989 }
1990 Py_XINCREF(val);
1991 return val;
1992 }
1993
1994
1995 static PyObject *
dict_clear(register PyDictObject * mp)1996 dict_clear(register PyDictObject *mp)
1997 {
1998 PyDict_Clear((PyObject *)mp);
1999 Py_RETURN_NONE;
2000 }
2001
2002 static PyObject *
dict_pop(PyDictObject * mp,PyObject * args)2003 dict_pop(PyDictObject *mp, PyObject *args)
2004 {
2005 long hash;
2006 PyDictEntry *ep;
2007 PyObject *old_value, *old_key;
2008 PyObject *key, *deflt = NULL;
2009
2010 if(!PyArg_UnpackTuple(args, "pop", 1, 2, &key, &deflt))
2011 return NULL;
2012 if (mp->ma_used == 0) {
2013 if (deflt) {
2014 Py_INCREF(deflt);
2015 return deflt;
2016 }
2017 set_key_error(key);
2018 return NULL;
2019 }
2020 if (!PyString_CheckExact(key) ||
2021 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
2022 hash = PyObject_Hash(key);
2023 if (hash == -1)
2024 return NULL;
2025 }
2026 ep = (mp->ma_lookup)(mp, key, hash);
2027 if (ep == NULL)
2028 return NULL;
2029 if (ep->me_value == NULL) {
2030 if (deflt) {
2031 Py_INCREF(deflt);
2032 return deflt;
2033 }
2034 set_key_error(key);
2035 return NULL;
2036 }
2037 old_key = ep->me_key;
2038 Py_INCREF(dummy);
2039 ep->me_key = dummy;
2040 old_value = ep->me_value;
2041 ep->me_value = NULL;
2042 mp->ma_used--;
2043 Py_DECREF(old_key);
2044 return old_value;
2045 }
2046
2047 static PyObject *
dict_popitem(PyDictObject * mp)2048 dict_popitem(PyDictObject *mp)
2049 {
2050 Py_ssize_t i = 0;
2051 PyDictEntry *ep;
2052 PyObject *res;
2053
2054 /* Allocate the result tuple before checking the size. Believe it
2055 * or not, this allocation could trigger a garbage collection which
2056 * could empty the dict, so if we checked the size first and that
2057 * happened, the result would be an infinite loop (searching for an
2058 * entry that no longer exists). Note that the usual popitem()
2059 * idiom is "while d: k, v = d.popitem()". so needing to throw the
2060 * tuple away if the dict *is* empty isn't a significant
2061 * inefficiency -- possible, but unlikely in practice.
2062 */
2063 res = PyTuple_New(2);
2064 if (res == NULL)
2065 return NULL;
2066 if (mp->ma_used == 0) {
2067 Py_DECREF(res);
2068 PyErr_SetString(PyExc_KeyError,
2069 "popitem(): dictionary is empty");
2070 return NULL;
2071 }
2072 /* Set ep to "the first" dict entry with a value. We abuse the hash
2073 * field of slot 0 to hold a search finger:
2074 * If slot 0 has a value, use slot 0.
2075 * Else slot 0 is being used to hold a search finger,
2076 * and we use its hash value as the first index to look.
2077 */
2078 ep = &mp->ma_table[0];
2079 if (ep->me_value == NULL) {
2080 i = ep->me_hash;
2081 /* The hash field may be a real hash value, or it may be a
2082 * legit search finger, or it may be a once-legit search
2083 * finger that's out of bounds now because it wrapped around
2084 * or the table shrunk -- simply make sure it's in bounds now.
2085 */
2086 if (i > mp->ma_mask || i < 1)
2087 i = 1; /* skip slot 0 */
2088 while ((ep = &mp->ma_table[i])->me_value == NULL) {
2089 i++;
2090 if (i > mp->ma_mask)
2091 i = 1;
2092 }
2093 }
2094 PyTuple_SET_ITEM(res, 0, ep->me_key);
2095 PyTuple_SET_ITEM(res, 1, ep->me_value);
2096 Py_INCREF(dummy);
2097 ep->me_key = dummy;
2098 ep->me_value = NULL;
2099 mp->ma_used--;
2100 assert(mp->ma_table[0].me_value == NULL);
2101 mp->ma_table[0].me_hash = i + 1; /* next place to start */
2102 return res;
2103 }
2104
2105 static int
dict_traverse(PyObject * op,visitproc visit,void * arg)2106 dict_traverse(PyObject *op, visitproc visit, void *arg)
2107 {
2108 Py_ssize_t i = 0;
2109 PyObject *pk;
2110 PyObject *pv;
2111
2112 while (PyDict_Next(op, &i, &pk, &pv)) {
2113 Py_VISIT(pk);
2114 Py_VISIT(pv);
2115 }
2116 return 0;
2117 }
2118
2119 static int
dict_tp_clear(PyObject * op)2120 dict_tp_clear(PyObject *op)
2121 {
2122 PyDict_Clear(op);
2123 return 0;
2124 }
2125
2126
2127 extern PyTypeObject PyDictIterKey_Type; /* Forward */
2128 extern PyTypeObject PyDictIterValue_Type; /* Forward */
2129 extern PyTypeObject PyDictIterItem_Type; /* Forward */
2130 static PyObject *dictiter_new(PyDictObject *, PyTypeObject *);
2131
2132 static PyObject *
dict_iterkeys(PyDictObject * dict)2133 dict_iterkeys(PyDictObject *dict)
2134 {
2135 return dictiter_new(dict, &PyDictIterKey_Type);
2136 }
2137
2138 static PyObject *
dict_itervalues(PyDictObject * dict)2139 dict_itervalues(PyDictObject *dict)
2140 {
2141 return dictiter_new(dict, &PyDictIterValue_Type);
2142 }
2143
2144 static PyObject *
dict_iteritems(PyDictObject * dict)2145 dict_iteritems(PyDictObject *dict)
2146 {
2147 return dictiter_new(dict, &PyDictIterItem_Type);
2148 }
2149
2150 static PyObject *
dict_sizeof(PyDictObject * mp)2151 dict_sizeof(PyDictObject *mp)
2152 {
2153 Py_ssize_t res;
2154
2155 res = sizeof(PyDictObject);
2156 if (mp->ma_table != mp->ma_smalltable)
2157 res = res + (mp->ma_mask + 1) * sizeof(PyDictEntry);
2158 return PyInt_FromSsize_t(res);
2159 }
2160
2161 PyDoc_STRVAR(has_key__doc__,
2162 "D.has_key(k) -> True if D has a key k, else False");
2163
2164 PyDoc_STRVAR(contains__doc__,
2165 "D.__contains__(k) -> True if D has a key k, else False");
2166
2167 PyDoc_STRVAR(getitem__doc__, "x.__getitem__(y) <==> x[y]");
2168
2169 PyDoc_STRVAR(sizeof__doc__,
2170 "D.__sizeof__() -> size of D in memory, in bytes");
2171
2172 PyDoc_STRVAR(get__doc__,
2173 "D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.");
2174
2175 PyDoc_STRVAR(setdefault_doc__,
2176 "D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D");
2177
2178 PyDoc_STRVAR(pop__doc__,
2179 "D.pop(k[,d]) -> v, remove specified key and return the corresponding value.\n\
2180 If key is not found, d is returned if given, otherwise KeyError is raised");
2181
2182 PyDoc_STRVAR(popitem__doc__,
2183 "D.popitem() -> (k, v), remove and return some (key, value) pair as a\n\
2184 2-tuple; but raise KeyError if D is empty.");
2185
2186 PyDoc_STRVAR(keys__doc__,
2187 "D.keys() -> list of D's keys");
2188
2189 PyDoc_STRVAR(items__doc__,
2190 "D.items() -> list of D's (key, value) pairs, as 2-tuples");
2191
2192 PyDoc_STRVAR(values__doc__,
2193 "D.values() -> list of D's values");
2194
2195 PyDoc_STRVAR(update__doc__,
2196 "D.update([E, ]**F) -> None. Update D from dict/iterable E and F.\n"
2197 "If E present and has a .keys() method, does: for k in E: D[k] = E[k]\n\
2198 If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v\n\
2199 In either case, this is followed by: for k in F: D[k] = F[k]");
2200
2201 PyDoc_STRVAR(fromkeys__doc__,
2202 "dict.fromkeys(S[,v]) -> New dict with keys from S and values equal to v.\n\
2203 v defaults to None.");
2204
2205 PyDoc_STRVAR(clear__doc__,
2206 "D.clear() -> None. Remove all items from D.");
2207
2208 PyDoc_STRVAR(copy__doc__,
2209 "D.copy() -> a shallow copy of D");
2210
2211 PyDoc_STRVAR(iterkeys__doc__,
2212 "D.iterkeys() -> an iterator over the keys of D");
2213
2214 PyDoc_STRVAR(itervalues__doc__,
2215 "D.itervalues() -> an iterator over the values of D");
2216
2217 PyDoc_STRVAR(iteritems__doc__,
2218 "D.iteritems() -> an iterator over the (key, value) items of D");
2219
2220 /* Forward */
2221 static PyObject *dictkeys_new(PyObject *);
2222 static PyObject *dictitems_new(PyObject *);
2223 static PyObject *dictvalues_new(PyObject *);
2224
2225 PyDoc_STRVAR(viewkeys__doc__,
2226 "D.viewkeys() -> a set-like object providing a view on D's keys");
2227 PyDoc_STRVAR(viewitems__doc__,
2228 "D.viewitems() -> a set-like object providing a view on D's items");
2229 PyDoc_STRVAR(viewvalues__doc__,
2230 "D.viewvalues() -> an object providing a view on D's values");
2231
2232 static PyMethodDef mapp_methods[] = {
2233 {"__contains__",(PyCFunction)dict_contains, METH_O | METH_COEXIST,
2234 contains__doc__},
2235 {"__getitem__", (PyCFunction)dict_subscript, METH_O | METH_COEXIST,
2236 getitem__doc__},
2237 {"__sizeof__", (PyCFunction)dict_sizeof, METH_NOARGS,
2238 sizeof__doc__},
2239 {"has_key", (PyCFunction)dict_has_key, METH_O,
2240 has_key__doc__},
2241 {"get", (PyCFunction)dict_get, METH_VARARGS,
2242 get__doc__},
2243 {"setdefault", (PyCFunction)dict_setdefault, METH_VARARGS,
2244 setdefault_doc__},
2245 {"pop", (PyCFunction)dict_pop, METH_VARARGS,
2246 pop__doc__},
2247 {"popitem", (PyCFunction)dict_popitem, METH_NOARGS,
2248 popitem__doc__},
2249 {"keys", (PyCFunction)dict_keys, METH_NOARGS,
2250 keys__doc__},
2251 {"items", (PyCFunction)dict_items, METH_NOARGS,
2252 items__doc__},
2253 {"values", (PyCFunction)dict_values, METH_NOARGS,
2254 values__doc__},
2255 {"viewkeys", (PyCFunction)dictkeys_new, METH_NOARGS,
2256 viewkeys__doc__},
2257 {"viewitems", (PyCFunction)dictitems_new, METH_NOARGS,
2258 viewitems__doc__},
2259 {"viewvalues", (PyCFunction)dictvalues_new, METH_NOARGS,
2260 viewvalues__doc__},
2261 {"update", (PyCFunction)dict_update, METH_VARARGS | METH_KEYWORDS,
2262 update__doc__},
2263 {"fromkeys", (PyCFunction)dict_fromkeys, METH_VARARGS | METH_CLASS,
2264 fromkeys__doc__},
2265 {"clear", (PyCFunction)dict_clear, METH_NOARGS,
2266 clear__doc__},
2267 {"copy", (PyCFunction)dict_copy, METH_NOARGS,
2268 copy__doc__},
2269 {"iterkeys", (PyCFunction)dict_iterkeys, METH_NOARGS,
2270 iterkeys__doc__},
2271 {"itervalues", (PyCFunction)dict_itervalues, METH_NOARGS,
2272 itervalues__doc__},
2273 {"iteritems", (PyCFunction)dict_iteritems, METH_NOARGS,
2274 iteritems__doc__},
2275 {NULL, NULL} /* sentinel */
2276 };
2277
2278 /* Return 1 if `key` is in dict `op`, 0 if not, and -1 on error. */
2279 int
PyDict_Contains(PyObject * op,PyObject * key)2280 PyDict_Contains(PyObject *op, PyObject *key)
2281 {
2282 long hash;
2283 PyDictObject *mp = (PyDictObject *)op;
2284 PyDictEntry *ep;
2285
2286 if (!PyString_CheckExact(key) ||
2287 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
2288 hash = PyObject_Hash(key);
2289 if (hash == -1)
2290 return -1;
2291 }
2292 ep = (mp->ma_lookup)(mp, key, hash);
2293 return ep == NULL ? -1 : (ep->me_value != NULL);
2294 }
2295
2296 /* Internal version of PyDict_Contains used when the hash value is already known */
2297 int
_PyDict_Contains(PyObject * op,PyObject * key,long hash)2298 _PyDict_Contains(PyObject *op, PyObject *key, long hash)
2299 {
2300 PyDictObject *mp = (PyDictObject *)op;
2301 PyDictEntry *ep;
2302
2303 ep = (mp->ma_lookup)(mp, key, hash);
2304 return ep == NULL ? -1 : (ep->me_value != NULL);
2305 }
2306
2307 /* Hack to implement "key in dict" */
2308 static PySequenceMethods dict_as_sequence = {
2309 0, /* sq_length */
2310 0, /* sq_concat */
2311 0, /* sq_repeat */
2312 0, /* sq_item */
2313 0, /* sq_slice */
2314 0, /* sq_ass_item */
2315 0, /* sq_ass_slice */
2316 PyDict_Contains, /* sq_contains */
2317 0, /* sq_inplace_concat */
2318 0, /* sq_inplace_repeat */
2319 };
2320
2321 static PyObject *
dict_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2322 dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2323 {
2324 PyObject *self;
2325
2326 assert(type != NULL && type->tp_alloc != NULL);
2327 self = type->tp_alloc(type, 0);
2328 if (self != NULL) {
2329 PyDictObject *d = (PyDictObject *)self;
2330 /* It's guaranteed that tp->alloc zeroed out the struct. */
2331 assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0);
2332 INIT_NONZERO_DICT_SLOTS(d);
2333 d->ma_lookup = lookdict_string;
2334 /* The object has been implicitly tracked by tp_alloc */
2335 if (type == &PyDict_Type)
2336 _PyObject_GC_UNTRACK(d);
2337 #ifdef SHOW_CONVERSION_COUNTS
2338 ++created;
2339 #endif
2340 #ifdef SHOW_TRACK_COUNT
2341 if (_PyObject_GC_IS_TRACKED(d))
2342 count_tracked++;
2343 else
2344 count_untracked++;
2345 #endif
2346 }
2347 return self;
2348 }
2349
2350 static int
dict_init(PyObject * self,PyObject * args,PyObject * kwds)2351 dict_init(PyObject *self, PyObject *args, PyObject *kwds)
2352 {
2353 return dict_update_common(self, args, kwds, "dict");
2354 }
2355
2356 static PyObject *
dict_iter(PyDictObject * dict)2357 dict_iter(PyDictObject *dict)
2358 {
2359 return dictiter_new(dict, &PyDictIterKey_Type);
2360 }
2361
2362 PyDoc_STRVAR(dictionary_doc,
2363 "dict() -> new empty dictionary\n"
2364 "dict(mapping) -> new dictionary initialized from a mapping object's\n"
2365 " (key, value) pairs\n"
2366 "dict(iterable) -> new dictionary initialized as if via:\n"
2367 " d = {}\n"
2368 " for k, v in iterable:\n"
2369 " d[k] = v\n"
2370 "dict(**kwargs) -> new dictionary initialized with the name=value pairs\n"
2371 " in the keyword argument list. For example: dict(one=1, two=2)");
2372
2373 PyTypeObject PyDict_Type = {
2374 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2375 "dict",
2376 sizeof(PyDictObject),
2377 0,
2378 (destructor)dict_dealloc, /* tp_dealloc */
2379 (printfunc)dict_print, /* tp_print */
2380 0, /* tp_getattr */
2381 0, /* tp_setattr */
2382 (cmpfunc)dict_compare, /* tp_compare */
2383 (reprfunc)dict_repr, /* tp_repr */
2384 0, /* tp_as_number */
2385 &dict_as_sequence, /* tp_as_sequence */
2386 &dict_as_mapping, /* tp_as_mapping */
2387 (hashfunc)PyObject_HashNotImplemented, /* tp_hash */
2388 0, /* tp_call */
2389 0, /* tp_str */
2390 PyObject_GenericGetAttr, /* tp_getattro */
2391 0, /* tp_setattro */
2392 0, /* tp_as_buffer */
2393 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
2394 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_DICT_SUBCLASS, /* tp_flags */
2395 dictionary_doc, /* tp_doc */
2396 dict_traverse, /* tp_traverse */
2397 dict_tp_clear, /* tp_clear */
2398 dict_richcompare, /* tp_richcompare */
2399 0, /* tp_weaklistoffset */
2400 (getiterfunc)dict_iter, /* tp_iter */
2401 0, /* tp_iternext */
2402 mapp_methods, /* tp_methods */
2403 0, /* tp_members */
2404 0, /* tp_getset */
2405 0, /* tp_base */
2406 0, /* tp_dict */
2407 0, /* tp_descr_get */
2408 0, /* tp_descr_set */
2409 0, /* tp_dictoffset */
2410 dict_init, /* tp_init */
2411 PyType_GenericAlloc, /* tp_alloc */
2412 dict_new, /* tp_new */
2413 PyObject_GC_Del, /* tp_free */
2414 };
2415
2416 /* For backward compatibility with old dictionary interface */
2417
2418 PyObject *
PyDict_GetItemString(PyObject * v,const char * key)2419 PyDict_GetItemString(PyObject *v, const char *key)
2420 {
2421 PyObject *kv, *rv;
2422 kv = PyString_FromString(key);
2423 if (kv == NULL)
2424 return NULL;
2425 rv = PyDict_GetItem(v, kv);
2426 Py_DECREF(kv);
2427 return rv;
2428 }
2429
2430 int
PyDict_SetItemString(PyObject * v,const char * key,PyObject * item)2431 PyDict_SetItemString(PyObject *v, const char *key, PyObject *item)
2432 {
2433 PyObject *kv;
2434 int err;
2435 kv = PyString_FromString(key);
2436 if (kv == NULL)
2437 return -1;
2438 PyString_InternInPlace(&kv); /* XXX Should we really? */
2439 err = PyDict_SetItem(v, kv, item);
2440 Py_DECREF(kv);
2441 return err;
2442 }
2443
2444 int
PyDict_DelItemString(PyObject * v,const char * key)2445 PyDict_DelItemString(PyObject *v, const char *key)
2446 {
2447 PyObject *kv;
2448 int err;
2449 kv = PyString_FromString(key);
2450 if (kv == NULL)
2451 return -1;
2452 err = PyDict_DelItem(v, kv);
2453 Py_DECREF(kv);
2454 return err;
2455 }
2456
2457 /* Dictionary iterator types */
2458
2459 typedef struct {
2460 PyObject_HEAD
2461 PyDictObject *di_dict; /* Set to NULL when iterator is exhausted */
2462 Py_ssize_t di_used;
2463 Py_ssize_t di_pos;
2464 PyObject* di_result; /* reusable result tuple for iteritems */
2465 Py_ssize_t len;
2466 } dictiterobject;
2467
2468 static PyObject *
dictiter_new(PyDictObject * dict,PyTypeObject * itertype)2469 dictiter_new(PyDictObject *dict, PyTypeObject *itertype)
2470 {
2471 dictiterobject *di;
2472 di = PyObject_GC_New(dictiterobject, itertype);
2473 if (di == NULL)
2474 return NULL;
2475 Py_INCREF(dict);
2476 di->di_dict = dict;
2477 di->di_used = dict->ma_used;
2478 di->di_pos = 0;
2479 di->len = dict->ma_used;
2480 if (itertype == &PyDictIterItem_Type) {
2481 di->di_result = PyTuple_Pack(2, Py_None, Py_None);
2482 if (di->di_result == NULL) {
2483 Py_DECREF(di);
2484 return NULL;
2485 }
2486 }
2487 else
2488 di->di_result = NULL;
2489 _PyObject_GC_TRACK(di);
2490 return (PyObject *)di;
2491 }
2492
2493 static void
dictiter_dealloc(dictiterobject * di)2494 dictiter_dealloc(dictiterobject *di)
2495 {
2496 Py_XDECREF(di->di_dict);
2497 Py_XDECREF(di->di_result);
2498 PyObject_GC_Del(di);
2499 }
2500
2501 static int
dictiter_traverse(dictiterobject * di,visitproc visit,void * arg)2502 dictiter_traverse(dictiterobject *di, visitproc visit, void *arg)
2503 {
2504 Py_VISIT(di->di_dict);
2505 Py_VISIT(di->di_result);
2506 return 0;
2507 }
2508
2509 static PyObject *
dictiter_len(dictiterobject * di)2510 dictiter_len(dictiterobject *di)
2511 {
2512 Py_ssize_t len = 0;
2513 if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used)
2514 len = di->len;
2515 return PyInt_FromSize_t(len);
2516 }
2517
2518 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
2519
2520 static PyMethodDef dictiter_methods[] = {
2521 {"__length_hint__", (PyCFunction)dictiter_len, METH_NOARGS, length_hint_doc},
2522 {NULL, NULL} /* sentinel */
2523 };
2524
dictiter_iternextkey(dictiterobject * di)2525 static PyObject *dictiter_iternextkey(dictiterobject *di)
2526 {
2527 PyObject *key;
2528 register Py_ssize_t i, mask;
2529 register PyDictEntry *ep;
2530 PyDictObject *d = di->di_dict;
2531
2532 if (d == NULL)
2533 return NULL;
2534 assert (PyDict_Check(d));
2535
2536 if (di->di_used != d->ma_used) {
2537 PyErr_SetString(PyExc_RuntimeError,
2538 "dictionary changed size during iteration");
2539 di->di_used = -1; /* Make this state sticky */
2540 return NULL;
2541 }
2542
2543 i = di->di_pos;
2544 if (i < 0)
2545 goto fail;
2546 ep = d->ma_table;
2547 mask = d->ma_mask;
2548 while (i <= mask && ep[i].me_value == NULL)
2549 i++;
2550 di->di_pos = i+1;
2551 if (i > mask)
2552 goto fail;
2553 di->len--;
2554 key = ep[i].me_key;
2555 Py_INCREF(key);
2556 return key;
2557
2558 fail:
2559 Py_DECREF(d);
2560 di->di_dict = NULL;
2561 return NULL;
2562 }
2563
2564 PyTypeObject PyDictIterKey_Type = {
2565 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2566 "dictionary-keyiterator", /* tp_name */
2567 sizeof(dictiterobject), /* tp_basicsize */
2568 0, /* tp_itemsize */
2569 /* methods */
2570 (destructor)dictiter_dealloc, /* tp_dealloc */
2571 0, /* tp_print */
2572 0, /* tp_getattr */
2573 0, /* tp_setattr */
2574 0, /* tp_compare */
2575 0, /* tp_repr */
2576 0, /* tp_as_number */
2577 0, /* tp_as_sequence */
2578 0, /* tp_as_mapping */
2579 0, /* tp_hash */
2580 0, /* tp_call */
2581 0, /* tp_str */
2582 PyObject_GenericGetAttr, /* tp_getattro */
2583 0, /* tp_setattro */
2584 0, /* tp_as_buffer */
2585 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2586 0, /* tp_doc */
2587 (traverseproc)dictiter_traverse, /* tp_traverse */
2588 0, /* tp_clear */
2589 0, /* tp_richcompare */
2590 0, /* tp_weaklistoffset */
2591 PyObject_SelfIter, /* tp_iter */
2592 (iternextfunc)dictiter_iternextkey, /* tp_iternext */
2593 dictiter_methods, /* tp_methods */
2594 0,
2595 };
2596
dictiter_iternextvalue(dictiterobject * di)2597 static PyObject *dictiter_iternextvalue(dictiterobject *di)
2598 {
2599 PyObject *value;
2600 register Py_ssize_t i, mask;
2601 register PyDictEntry *ep;
2602 PyDictObject *d = di->di_dict;
2603
2604 if (d == NULL)
2605 return NULL;
2606 assert (PyDict_Check(d));
2607
2608 if (di->di_used != d->ma_used) {
2609 PyErr_SetString(PyExc_RuntimeError,
2610 "dictionary changed size during iteration");
2611 di->di_used = -1; /* Make this state sticky */
2612 return NULL;
2613 }
2614
2615 i = di->di_pos;
2616 mask = d->ma_mask;
2617 if (i < 0 || i > mask)
2618 goto fail;
2619 ep = d->ma_table;
2620 while ((value=ep[i].me_value) == NULL) {
2621 i++;
2622 if (i > mask)
2623 goto fail;
2624 }
2625 di->di_pos = i+1;
2626 di->len--;
2627 Py_INCREF(value);
2628 return value;
2629
2630 fail:
2631 Py_DECREF(d);
2632 di->di_dict = NULL;
2633 return NULL;
2634 }
2635
2636 PyTypeObject PyDictIterValue_Type = {
2637 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2638 "dictionary-valueiterator", /* tp_name */
2639 sizeof(dictiterobject), /* tp_basicsize */
2640 0, /* tp_itemsize */
2641 /* methods */
2642 (destructor)dictiter_dealloc, /* tp_dealloc */
2643 0, /* tp_print */
2644 0, /* tp_getattr */
2645 0, /* tp_setattr */
2646 0, /* tp_compare */
2647 0, /* tp_repr */
2648 0, /* tp_as_number */
2649 0, /* tp_as_sequence */
2650 0, /* tp_as_mapping */
2651 0, /* tp_hash */
2652 0, /* tp_call */
2653 0, /* tp_str */
2654 PyObject_GenericGetAttr, /* tp_getattro */
2655 0, /* tp_setattro */
2656 0, /* tp_as_buffer */
2657 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2658 0, /* tp_doc */
2659 (traverseproc)dictiter_traverse, /* tp_traverse */
2660 0, /* tp_clear */
2661 0, /* tp_richcompare */
2662 0, /* tp_weaklistoffset */
2663 PyObject_SelfIter, /* tp_iter */
2664 (iternextfunc)dictiter_iternextvalue, /* tp_iternext */
2665 dictiter_methods, /* tp_methods */
2666 0,
2667 };
2668
dictiter_iternextitem(dictiterobject * di)2669 static PyObject *dictiter_iternextitem(dictiterobject *di)
2670 {
2671 PyObject *key, *value, *result = di->di_result;
2672 register Py_ssize_t i, mask;
2673 register PyDictEntry *ep;
2674 PyDictObject *d = di->di_dict;
2675
2676 if (d == NULL)
2677 return NULL;
2678 assert (PyDict_Check(d));
2679
2680 if (di->di_used != d->ma_used) {
2681 PyErr_SetString(PyExc_RuntimeError,
2682 "dictionary changed size during iteration");
2683 di->di_used = -1; /* Make this state sticky */
2684 return NULL;
2685 }
2686
2687 i = di->di_pos;
2688 if (i < 0)
2689 goto fail;
2690 ep = d->ma_table;
2691 mask = d->ma_mask;
2692 while (i <= mask && ep[i].me_value == NULL)
2693 i++;
2694 di->di_pos = i+1;
2695 if (i > mask)
2696 goto fail;
2697
2698 if (result->ob_refcnt == 1) {
2699 Py_INCREF(result);
2700 Py_DECREF(PyTuple_GET_ITEM(result, 0));
2701 Py_DECREF(PyTuple_GET_ITEM(result, 1));
2702 } else {
2703 result = PyTuple_New(2);
2704 if (result == NULL)
2705 return NULL;
2706 }
2707 di->len--;
2708 key = ep[i].me_key;
2709 value = ep[i].me_value;
2710 Py_INCREF(key);
2711 Py_INCREF(value);
2712 PyTuple_SET_ITEM(result, 0, key);
2713 PyTuple_SET_ITEM(result, 1, value);
2714 return result;
2715
2716 fail:
2717 Py_DECREF(d);
2718 di->di_dict = NULL;
2719 return NULL;
2720 }
2721
2722 PyTypeObject PyDictIterItem_Type = {
2723 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2724 "dictionary-itemiterator", /* tp_name */
2725 sizeof(dictiterobject), /* tp_basicsize */
2726 0, /* tp_itemsize */
2727 /* methods */
2728 (destructor)dictiter_dealloc, /* tp_dealloc */
2729 0, /* tp_print */
2730 0, /* tp_getattr */
2731 0, /* tp_setattr */
2732 0, /* tp_compare */
2733 0, /* tp_repr */
2734 0, /* tp_as_number */
2735 0, /* tp_as_sequence */
2736 0, /* tp_as_mapping */
2737 0, /* tp_hash */
2738 0, /* tp_call */
2739 0, /* tp_str */
2740 PyObject_GenericGetAttr, /* tp_getattro */
2741 0, /* tp_setattro */
2742 0, /* tp_as_buffer */
2743 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
2744 0, /* tp_doc */
2745 (traverseproc)dictiter_traverse, /* tp_traverse */
2746 0, /* tp_clear */
2747 0, /* tp_richcompare */
2748 0, /* tp_weaklistoffset */
2749 PyObject_SelfIter, /* tp_iter */
2750 (iternextfunc)dictiter_iternextitem, /* tp_iternext */
2751 dictiter_methods, /* tp_methods */
2752 0,
2753 };
2754
2755 /***********************************************/
2756 /* View objects for keys(), items(), values(). */
2757 /***********************************************/
2758
2759 /* The instance lay-out is the same for all three; but the type differs. */
2760
2761 typedef struct {
2762 PyObject_HEAD
2763 PyDictObject *dv_dict;
2764 } dictviewobject;
2765
2766
2767 static void
dictview_dealloc(dictviewobject * dv)2768 dictview_dealloc(dictviewobject *dv)
2769 {
2770 Py_XDECREF(dv->dv_dict);
2771 PyObject_GC_Del(dv);
2772 }
2773
2774 static int
dictview_traverse(dictviewobject * dv,visitproc visit,void * arg)2775 dictview_traverse(dictviewobject *dv, visitproc visit, void *arg)
2776 {
2777 Py_VISIT(dv->dv_dict);
2778 return 0;
2779 }
2780
2781 static Py_ssize_t
dictview_len(dictviewobject * dv)2782 dictview_len(dictviewobject *dv)
2783 {
2784 Py_ssize_t len = 0;
2785 if (dv->dv_dict != NULL)
2786 len = dv->dv_dict->ma_used;
2787 return len;
2788 }
2789
2790 static PyObject *
dictview_new(PyObject * dict,PyTypeObject * type)2791 dictview_new(PyObject *dict, PyTypeObject *type)
2792 {
2793 dictviewobject *dv;
2794 if (dict == NULL) {
2795 PyErr_BadInternalCall();
2796 return NULL;
2797 }
2798 if (!PyDict_Check(dict)) {
2799 /* XXX Get rid of this restriction later */
2800 PyErr_Format(PyExc_TypeError,
2801 "%s() requires a dict argument, not '%s'",
2802 type->tp_name, dict->ob_type->tp_name);
2803 return NULL;
2804 }
2805 dv = PyObject_GC_New(dictviewobject, type);
2806 if (dv == NULL)
2807 return NULL;
2808 Py_INCREF(dict);
2809 dv->dv_dict = (PyDictObject *)dict;
2810 _PyObject_GC_TRACK(dv);
2811 return (PyObject *)dv;
2812 }
2813
2814 /* TODO(guido): The views objects are not complete:
2815
2816 * support more set operations
2817 * support arbitrary mappings?
2818 - either these should be static or exported in dictobject.h
2819 - if public then they should probably be in builtins
2820 */
2821
2822 /* Return 1 if self is a subset of other, iterating over self;
2823 0 if not; -1 if an error occurred. */
2824 static int
all_contained_in(PyObject * self,PyObject * other)2825 all_contained_in(PyObject *self, PyObject *other)
2826 {
2827 PyObject *iter = PyObject_GetIter(self);
2828 int ok = 1;
2829
2830 if (iter == NULL)
2831 return -1;
2832 for (;;) {
2833 PyObject *next = PyIter_Next(iter);
2834 if (next == NULL) {
2835 if (PyErr_Occurred())
2836 ok = -1;
2837 break;
2838 }
2839 ok = PySequence_Contains(other, next);
2840 Py_DECREF(next);
2841 if (ok <= 0)
2842 break;
2843 }
2844 Py_DECREF(iter);
2845 return ok;
2846 }
2847
2848 static PyObject *
dictview_richcompare(PyObject * self,PyObject * other,int op)2849 dictview_richcompare(PyObject *self, PyObject *other, int op)
2850 {
2851 Py_ssize_t len_self, len_other;
2852 int ok;
2853 PyObject *result;
2854
2855 assert(self != NULL);
2856 assert(PyDictViewSet_Check(self));
2857 assert(other != NULL);
2858
2859 if (!PyAnySet_Check(other) && !PyDictViewSet_Check(other)) {
2860 Py_INCREF(Py_NotImplemented);
2861 return Py_NotImplemented;
2862 }
2863
2864 len_self = PyObject_Size(self);
2865 if (len_self < 0)
2866 return NULL;
2867 len_other = PyObject_Size(other);
2868 if (len_other < 0)
2869 return NULL;
2870
2871 ok = 0;
2872 switch(op) {
2873
2874 case Py_NE:
2875 case Py_EQ:
2876 if (len_self == len_other)
2877 ok = all_contained_in(self, other);
2878 if (op == Py_NE && ok >= 0)
2879 ok = !ok;
2880 break;
2881
2882 case Py_LT:
2883 if (len_self < len_other)
2884 ok = all_contained_in(self, other);
2885 break;
2886
2887 case Py_LE:
2888 if (len_self <= len_other)
2889 ok = all_contained_in(self, other);
2890 break;
2891
2892 case Py_GT:
2893 if (len_self > len_other)
2894 ok = all_contained_in(other, self);
2895 break;
2896
2897 case Py_GE:
2898 if (len_self >= len_other)
2899 ok = all_contained_in(other, self);
2900 break;
2901
2902 }
2903 if (ok < 0)
2904 return NULL;
2905 result = ok ? Py_True : Py_False;
2906 Py_INCREF(result);
2907 return result;
2908 }
2909
2910 static PyObject *
dictview_repr(dictviewobject * dv)2911 dictview_repr(dictviewobject *dv)
2912 {
2913 PyObject *seq;
2914 PyObject *seq_str;
2915 PyObject *result;
2916
2917 seq = PySequence_List((PyObject *)dv);
2918 if (seq == NULL)
2919 return NULL;
2920
2921 seq_str = PyObject_Repr(seq);
2922 if (seq_str == NULL) {
2923 Py_DECREF(seq);
2924 return NULL;
2925 }
2926 result = PyString_FromFormat("%s(%s)", Py_TYPE(dv)->tp_name,
2927 PyString_AS_STRING(seq_str));
2928 Py_DECREF(seq_str);
2929 Py_DECREF(seq);
2930 return result;
2931 }
2932
2933 /*** dict_keys ***/
2934
2935 static PyObject *
dictkeys_iter(dictviewobject * dv)2936 dictkeys_iter(dictviewobject *dv)
2937 {
2938 if (dv->dv_dict == NULL) {
2939 Py_RETURN_NONE;
2940 }
2941 return dictiter_new(dv->dv_dict, &PyDictIterKey_Type);
2942 }
2943
2944 static int
dictkeys_contains(dictviewobject * dv,PyObject * obj)2945 dictkeys_contains(dictviewobject *dv, PyObject *obj)
2946 {
2947 if (dv->dv_dict == NULL)
2948 return 0;
2949 return PyDict_Contains((PyObject *)dv->dv_dict, obj);
2950 }
2951
2952 static PySequenceMethods dictkeys_as_sequence = {
2953 (lenfunc)dictview_len, /* sq_length */
2954 0, /* sq_concat */
2955 0, /* sq_repeat */
2956 0, /* sq_item */
2957 0, /* sq_slice */
2958 0, /* sq_ass_item */
2959 0, /* sq_ass_slice */
2960 (objobjproc)dictkeys_contains, /* sq_contains */
2961 };
2962
2963 static PyObject*
dictviews_sub(PyObject * self,PyObject * other)2964 dictviews_sub(PyObject* self, PyObject *other)
2965 {
2966 PyObject *result = PySet_New(self);
2967 PyObject *tmp;
2968 if (result == NULL)
2969 return NULL;
2970
2971 tmp = PyObject_CallMethod(result, "difference_update", "O", other);
2972 if (tmp == NULL) {
2973 Py_DECREF(result);
2974 return NULL;
2975 }
2976
2977 Py_DECREF(tmp);
2978 return result;
2979 }
2980
2981 static PyObject*
dictviews_and(PyObject * self,PyObject * other)2982 dictviews_and(PyObject* self, PyObject *other)
2983 {
2984 PyObject *result = PySet_New(self);
2985 PyObject *tmp;
2986 if (result == NULL)
2987 return NULL;
2988
2989 tmp = PyObject_CallMethod(result, "intersection_update", "O", other);
2990 if (tmp == NULL) {
2991 Py_DECREF(result);
2992 return NULL;
2993 }
2994
2995 Py_DECREF(tmp);
2996 return result;
2997 }
2998
2999 static PyObject*
dictviews_or(PyObject * self,PyObject * other)3000 dictviews_or(PyObject* self, PyObject *other)
3001 {
3002 PyObject *result = PySet_New(self);
3003 PyObject *tmp;
3004 if (result == NULL)
3005 return NULL;
3006
3007 tmp = PyObject_CallMethod(result, "update", "O", other);
3008 if (tmp == NULL) {
3009 Py_DECREF(result);
3010 return NULL;
3011 }
3012
3013 Py_DECREF(tmp);
3014 return result;
3015 }
3016
3017 static PyObject*
dictviews_xor(PyObject * self,PyObject * other)3018 dictviews_xor(PyObject* self, PyObject *other)
3019 {
3020 PyObject *result = PySet_New(self);
3021 PyObject *tmp;
3022 if (result == NULL)
3023 return NULL;
3024
3025 tmp = PyObject_CallMethod(result, "symmetric_difference_update", "O",
3026 other);
3027 if (tmp == NULL) {
3028 Py_DECREF(result);
3029 return NULL;
3030 }
3031
3032 Py_DECREF(tmp);
3033 return result;
3034 }
3035
3036 static PyNumberMethods dictviews_as_number = {
3037 0, /*nb_add*/
3038 (binaryfunc)dictviews_sub, /*nb_subtract*/
3039 0, /*nb_multiply*/
3040 0, /*nb_divide*/
3041 0, /*nb_remainder*/
3042 0, /*nb_divmod*/
3043 0, /*nb_power*/
3044 0, /*nb_negative*/
3045 0, /*nb_positive*/
3046 0, /*nb_absolute*/
3047 0, /*nb_nonzero*/
3048 0, /*nb_invert*/
3049 0, /*nb_lshift*/
3050 0, /*nb_rshift*/
3051 (binaryfunc)dictviews_and, /*nb_and*/
3052 (binaryfunc)dictviews_xor, /*nb_xor*/
3053 (binaryfunc)dictviews_or, /*nb_or*/
3054 };
3055
3056 static PyMethodDef dictkeys_methods[] = {
3057 {NULL, NULL} /* sentinel */
3058 };
3059
3060 PyTypeObject PyDictKeys_Type = {
3061 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3062 "dict_keys", /* tp_name */
3063 sizeof(dictviewobject), /* tp_basicsize */
3064 0, /* tp_itemsize */
3065 /* methods */
3066 (destructor)dictview_dealloc, /* tp_dealloc */
3067 0, /* tp_print */
3068 0, /* tp_getattr */
3069 0, /* tp_setattr */
3070 0, /* tp_reserved */
3071 (reprfunc)dictview_repr, /* tp_repr */
3072 &dictviews_as_number, /* tp_as_number */
3073 &dictkeys_as_sequence, /* tp_as_sequence */
3074 0, /* tp_as_mapping */
3075 0, /* tp_hash */
3076 0, /* tp_call */
3077 0, /* tp_str */
3078 PyObject_GenericGetAttr, /* tp_getattro */
3079 0, /* tp_setattro */
3080 0, /* tp_as_buffer */
3081 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
3082 Py_TPFLAGS_CHECKTYPES, /* tp_flags */
3083 0, /* tp_doc */
3084 (traverseproc)dictview_traverse, /* tp_traverse */
3085 0, /* tp_clear */
3086 dictview_richcompare, /* tp_richcompare */
3087 0, /* tp_weaklistoffset */
3088 (getiterfunc)dictkeys_iter, /* tp_iter */
3089 0, /* tp_iternext */
3090 dictkeys_methods, /* tp_methods */
3091 0,
3092 };
3093
3094 static PyObject *
dictkeys_new(PyObject * dict)3095 dictkeys_new(PyObject *dict)
3096 {
3097 return dictview_new(dict, &PyDictKeys_Type);
3098 }
3099
3100 /*** dict_items ***/
3101
3102 static PyObject *
dictitems_iter(dictviewobject * dv)3103 dictitems_iter(dictviewobject *dv)
3104 {
3105 if (dv->dv_dict == NULL) {
3106 Py_RETURN_NONE;
3107 }
3108 return dictiter_new(dv->dv_dict, &PyDictIterItem_Type);
3109 }
3110
3111 static int
dictitems_contains(dictviewobject * dv,PyObject * obj)3112 dictitems_contains(dictviewobject *dv, PyObject *obj)
3113 {
3114 PyObject *key, *value, *found;
3115 if (dv->dv_dict == NULL)
3116 return 0;
3117 if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 2)
3118 return 0;
3119 key = PyTuple_GET_ITEM(obj, 0);
3120 value = PyTuple_GET_ITEM(obj, 1);
3121 found = PyDict_GetItem((PyObject *)dv->dv_dict, key);
3122 if (found == NULL) {
3123 if (PyErr_Occurred())
3124 return -1;
3125 return 0;
3126 }
3127 return PyObject_RichCompareBool(value, found, Py_EQ);
3128 }
3129
3130 static PySequenceMethods dictitems_as_sequence = {
3131 (lenfunc)dictview_len, /* sq_length */
3132 0, /* sq_concat */
3133 0, /* sq_repeat */
3134 0, /* sq_item */
3135 0, /* sq_slice */
3136 0, /* sq_ass_item */
3137 0, /* sq_ass_slice */
3138 (objobjproc)dictitems_contains, /* sq_contains */
3139 };
3140
3141 static PyMethodDef dictitems_methods[] = {
3142 {NULL, NULL} /* sentinel */
3143 };
3144
3145 PyTypeObject PyDictItems_Type = {
3146 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3147 "dict_items", /* tp_name */
3148 sizeof(dictviewobject), /* tp_basicsize */
3149 0, /* tp_itemsize */
3150 /* methods */
3151 (destructor)dictview_dealloc, /* tp_dealloc */
3152 0, /* tp_print */
3153 0, /* tp_getattr */
3154 0, /* tp_setattr */
3155 0, /* tp_reserved */
3156 (reprfunc)dictview_repr, /* tp_repr */
3157 &dictviews_as_number, /* tp_as_number */
3158 &dictitems_as_sequence, /* tp_as_sequence */
3159 0, /* tp_as_mapping */
3160 0, /* tp_hash */
3161 0, /* tp_call */
3162 0, /* tp_str */
3163 PyObject_GenericGetAttr, /* tp_getattro */
3164 0, /* tp_setattro */
3165 0, /* tp_as_buffer */
3166 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
3167 Py_TPFLAGS_CHECKTYPES, /* tp_flags */
3168 0, /* tp_doc */
3169 (traverseproc)dictview_traverse, /* tp_traverse */
3170 0, /* tp_clear */
3171 dictview_richcompare, /* tp_richcompare */
3172 0, /* tp_weaklistoffset */
3173 (getiterfunc)dictitems_iter, /* tp_iter */
3174 0, /* tp_iternext */
3175 dictitems_methods, /* tp_methods */
3176 0,
3177 };
3178
3179 static PyObject *
dictitems_new(PyObject * dict)3180 dictitems_new(PyObject *dict)
3181 {
3182 return dictview_new(dict, &PyDictItems_Type);
3183 }
3184
3185 /*** dict_values ***/
3186
3187 static PyObject *
dictvalues_iter(dictviewobject * dv)3188 dictvalues_iter(dictviewobject *dv)
3189 {
3190 if (dv->dv_dict == NULL) {
3191 Py_RETURN_NONE;
3192 }
3193 return dictiter_new(dv->dv_dict, &PyDictIterValue_Type);
3194 }
3195
3196 static PySequenceMethods dictvalues_as_sequence = {
3197 (lenfunc)dictview_len, /* sq_length */
3198 0, /* sq_concat */
3199 0, /* sq_repeat */
3200 0, /* sq_item */
3201 0, /* sq_slice */
3202 0, /* sq_ass_item */
3203 0, /* sq_ass_slice */
3204 (objobjproc)0, /* sq_contains */
3205 };
3206
3207 static PyMethodDef dictvalues_methods[] = {
3208 {NULL, NULL} /* sentinel */
3209 };
3210
3211 PyTypeObject PyDictValues_Type = {
3212 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3213 "dict_values", /* tp_name */
3214 sizeof(dictviewobject), /* tp_basicsize */
3215 0, /* tp_itemsize */
3216 /* methods */
3217 (destructor)dictview_dealloc, /* tp_dealloc */
3218 0, /* tp_print */
3219 0, /* tp_getattr */
3220 0, /* tp_setattr */
3221 0, /* tp_reserved */
3222 (reprfunc)dictview_repr, /* tp_repr */
3223 0, /* tp_as_number */
3224 &dictvalues_as_sequence, /* tp_as_sequence */
3225 0, /* tp_as_mapping */
3226 0, /* tp_hash */
3227 0, /* tp_call */
3228 0, /* tp_str */
3229 PyObject_GenericGetAttr, /* tp_getattro */
3230 0, /* tp_setattro */
3231 0, /* tp_as_buffer */
3232 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3233 0, /* tp_doc */
3234 (traverseproc)dictview_traverse, /* tp_traverse */
3235 0, /* tp_clear */
3236 0, /* tp_richcompare */
3237 0, /* tp_weaklistoffset */
3238 (getiterfunc)dictvalues_iter, /* tp_iter */
3239 0, /* tp_iternext */
3240 dictvalues_methods, /* tp_methods */
3241 0,
3242 };
3243
3244 static PyObject *
dictvalues_new(PyObject * dict)3245 dictvalues_new(PyObject *dict)
3246 {
3247 return dictview_new(dict, &PyDictValues_Type);
3248 }
3249