1 /*
2
3 Reference Cycle Garbage Collection
4 ==================================
5
6 Neil Schemenauer <nas@arctrix.com>
7
8 Based on a post on the python-dev list. Ideas from Guido van Rossum,
9 Eric Tiedemann, and various others.
10
11 http://www.arctrix.com/nas/python/gc/
12
13 The following mailing list threads provide a historical perspective on
14 the design of this module. Note that a fair amount of refinement has
15 occurred since those discussions.
16
17 http://mail.python.org/pipermail/python-dev/2000-March/002385.html
18 http://mail.python.org/pipermail/python-dev/2000-March/002434.html
19 http://mail.python.org/pipermail/python-dev/2000-March/002497.html
20
21 For a highlevel view of the collection process, read the collect
22 function.
23
24 */
25
26 #include "Python.h"
27 #include "pycore_context.h"
28 #include "pycore_initconfig.h"
29 #include "pycore_interp.h" // PyInterpreterState.gc
30 #include "pycore_object.h"
31 #include "pycore_pyerrors.h"
32 #include "pycore_pystate.h" // _PyThreadState_GET()
33 #include "pydtrace.h"
34 #include "pytime.h" // _PyTime_GetMonotonicClock()
35
36 typedef struct _gc_runtime_state GCState;
37
38 /*[clinic input]
39 module gc
40 [clinic start generated code]*/
41 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/
42
43
44 #ifdef Py_DEBUG
45 # define GC_DEBUG
46 #endif
47
48 #define GC_NEXT _PyGCHead_NEXT
49 #define GC_PREV _PyGCHead_PREV
50
51 // update_refs() set this bit for all objects in current generation.
52 // subtract_refs() and move_unreachable() uses this to distinguish
53 // visited object is in GCing or not.
54 //
55 // move_unreachable() removes this flag from reachable objects.
56 // Only unreachable objects have this flag.
57 //
58 // No objects in interpreter have this flag after GC ends.
59 #define PREV_MASK_COLLECTING _PyGC_PREV_MASK_COLLECTING
60
61 // Lowest bit of _gc_next is used for UNREACHABLE flag.
62 //
63 // This flag represents the object is in unreachable list in move_unreachable()
64 //
65 // Although this flag is used only in move_unreachable(), move_unreachable()
66 // doesn't clear this flag to skip unnecessary iteration.
67 // move_legacy_finalizers() removes this flag instead.
68 // Between them, unreachable list is not normal list and we can not use
69 // most gc_list_* functions for it.
70 #define NEXT_MASK_UNREACHABLE (1)
71
72 /* Get an object's GC head */
73 #define AS_GC(o) ((PyGC_Head *)(o)-1)
74
75 /* Get the object given the GC head */
76 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
77
78 static inline int
gc_is_collecting(PyGC_Head * g)79 gc_is_collecting(PyGC_Head *g)
80 {
81 return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
82 }
83
84 static inline void
gc_clear_collecting(PyGC_Head * g)85 gc_clear_collecting(PyGC_Head *g)
86 {
87 g->_gc_prev &= ~PREV_MASK_COLLECTING;
88 }
89
90 static inline Py_ssize_t
gc_get_refs(PyGC_Head * g)91 gc_get_refs(PyGC_Head *g)
92 {
93 return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
94 }
95
96 static inline void
gc_set_refs(PyGC_Head * g,Py_ssize_t refs)97 gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
98 {
99 g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
100 | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
101 }
102
103 static inline void
gc_reset_refs(PyGC_Head * g,Py_ssize_t refs)104 gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
105 {
106 g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
107 | PREV_MASK_COLLECTING
108 | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
109 }
110
111 static inline void
gc_decref(PyGC_Head * g)112 gc_decref(PyGC_Head *g)
113 {
114 _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
115 gc_get_refs(g) > 0,
116 "refcount is too small");
117 g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
118 }
119
120 /* set for debugging information */
121 #define DEBUG_STATS (1<<0) /* print collection statistics */
122 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
123 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */
124 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */
125 #define DEBUG_LEAK DEBUG_COLLECTABLE | \
126 DEBUG_UNCOLLECTABLE | \
127 DEBUG_SAVEALL
128
129 #define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head)
130
131 void
_PyGC_InitState(GCState * gcstate)132 _PyGC_InitState(GCState *gcstate)
133 {
134 gcstate->enabled = 1; /* automatic collection enabled? */
135
136 #define _GEN_HEAD(n) GEN_HEAD(gcstate, n)
137 struct gc_generation generations[NUM_GENERATIONS] = {
138 /* PyGC_Head, threshold, count */
139 {{(uintptr_t)_GEN_HEAD(0), (uintptr_t)_GEN_HEAD(0)}, 700, 0},
140 {{(uintptr_t)_GEN_HEAD(1), (uintptr_t)_GEN_HEAD(1)}, 10, 0},
141 {{(uintptr_t)_GEN_HEAD(2), (uintptr_t)_GEN_HEAD(2)}, 10, 0},
142 };
143 for (int i = 0; i < NUM_GENERATIONS; i++) {
144 gcstate->generations[i] = generations[i];
145 };
146 gcstate->generation0 = GEN_HEAD(gcstate, 0);
147 struct gc_generation permanent_generation = {
148 {(uintptr_t)&gcstate->permanent_generation.head,
149 (uintptr_t)&gcstate->permanent_generation.head}, 0, 0
150 };
151 gcstate->permanent_generation = permanent_generation;
152 }
153
154
155 PyStatus
_PyGC_Init(PyThreadState * tstate)156 _PyGC_Init(PyThreadState *tstate)
157 {
158 GCState *gcstate = &tstate->interp->gc;
159 if (gcstate->garbage == NULL) {
160 gcstate->garbage = PyList_New(0);
161 if (gcstate->garbage == NULL) {
162 return _PyStatus_NO_MEMORY();
163 }
164 }
165 return _PyStatus_OK();
166 }
167
168
169 /*
170 _gc_prev values
171 ---------------
172
173 Between collections, _gc_prev is used for doubly linked list.
174
175 Lowest two bits of _gc_prev are used for flags.
176 PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
177 or _PyObject_GC_UNTRACK() is called.
178
179 During a collection, _gc_prev is temporary used for gc_refs, and the gc list
180 is singly linked until _gc_prev is restored.
181
182 gc_refs
183 At the start of a collection, update_refs() copies the true refcount
184 to gc_refs, for each object in the generation being collected.
185 subtract_refs() then adjusts gc_refs so that it equals the number of
186 times an object is referenced directly from outside the generation
187 being collected.
188
189 PREV_MASK_COLLECTING
190 Objects in generation being collected are marked PREV_MASK_COLLECTING in
191 update_refs().
192
193
194 _gc_next values
195 ---------------
196
197 _gc_next takes these values:
198
199 0
200 The object is not tracked
201
202 != 0
203 Pointer to the next object in the GC list.
204 Additionally, lowest bit is used temporary for
205 NEXT_MASK_UNREACHABLE flag described below.
206
207 NEXT_MASK_UNREACHABLE
208 move_unreachable() then moves objects not reachable (whether directly or
209 indirectly) from outside the generation into an "unreachable" set and
210 set this flag.
211
212 Objects that are found to be reachable have gc_refs set to 1.
213 When this flag is set for the reachable object, the object must be in
214 "unreachable" set.
215 The flag is unset and the object is moved back to "reachable" set.
216
217 move_legacy_finalizers() will remove this flag from "unreachable" set.
218 */
219
220 /*** list functions ***/
221
222 static inline void
gc_list_init(PyGC_Head * list)223 gc_list_init(PyGC_Head *list)
224 {
225 // List header must not have flags.
226 // We can assign pointer by simple cast.
227 list->_gc_prev = (uintptr_t)list;
228 list->_gc_next = (uintptr_t)list;
229 }
230
231 static inline int
gc_list_is_empty(PyGC_Head * list)232 gc_list_is_empty(PyGC_Head *list)
233 {
234 return (list->_gc_next == (uintptr_t)list);
235 }
236
237 /* Append `node` to `list`. */
238 static inline void
gc_list_append(PyGC_Head * node,PyGC_Head * list)239 gc_list_append(PyGC_Head *node, PyGC_Head *list)
240 {
241 PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
242
243 // last <-> node
244 _PyGCHead_SET_PREV(node, last);
245 _PyGCHead_SET_NEXT(last, node);
246
247 // node <-> list
248 _PyGCHead_SET_NEXT(node, list);
249 list->_gc_prev = (uintptr_t)node;
250 }
251
252 /* Remove `node` from the gc list it's currently in. */
253 static inline void
gc_list_remove(PyGC_Head * node)254 gc_list_remove(PyGC_Head *node)
255 {
256 PyGC_Head *prev = GC_PREV(node);
257 PyGC_Head *next = GC_NEXT(node);
258
259 _PyGCHead_SET_NEXT(prev, next);
260 _PyGCHead_SET_PREV(next, prev);
261
262 node->_gc_next = 0; /* object is not currently tracked */
263 }
264
265 /* Move `node` from the gc list it's currently in (which is not explicitly
266 * named here) to the end of `list`. This is semantically the same as
267 * gc_list_remove(node) followed by gc_list_append(node, list).
268 */
269 static void
gc_list_move(PyGC_Head * node,PyGC_Head * list)270 gc_list_move(PyGC_Head *node, PyGC_Head *list)
271 {
272 /* Unlink from current list. */
273 PyGC_Head *from_prev = GC_PREV(node);
274 PyGC_Head *from_next = GC_NEXT(node);
275 _PyGCHead_SET_NEXT(from_prev, from_next);
276 _PyGCHead_SET_PREV(from_next, from_prev);
277
278 /* Relink at end of new list. */
279 // list must not have flags. So we can skip macros.
280 PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
281 _PyGCHead_SET_PREV(node, to_prev);
282 _PyGCHead_SET_NEXT(to_prev, node);
283 list->_gc_prev = (uintptr_t)node;
284 _PyGCHead_SET_NEXT(node, list);
285 }
286
287 /* append list `from` onto list `to`; `from` becomes an empty list */
288 static void
gc_list_merge(PyGC_Head * from,PyGC_Head * to)289 gc_list_merge(PyGC_Head *from, PyGC_Head *to)
290 {
291 assert(from != to);
292 if (!gc_list_is_empty(from)) {
293 PyGC_Head *to_tail = GC_PREV(to);
294 PyGC_Head *from_head = GC_NEXT(from);
295 PyGC_Head *from_tail = GC_PREV(from);
296 assert(from_head != from);
297 assert(from_tail != from);
298
299 _PyGCHead_SET_NEXT(to_tail, from_head);
300 _PyGCHead_SET_PREV(from_head, to_tail);
301
302 _PyGCHead_SET_NEXT(from_tail, to);
303 _PyGCHead_SET_PREV(to, from_tail);
304 }
305 gc_list_init(from);
306 }
307
308 static Py_ssize_t
gc_list_size(PyGC_Head * list)309 gc_list_size(PyGC_Head *list)
310 {
311 PyGC_Head *gc;
312 Py_ssize_t n = 0;
313 for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
314 n++;
315 }
316 return n;
317 }
318
319 /* Walk the list and mark all objects as non-collecting */
320 static inline void
gc_list_clear_collecting(PyGC_Head * collectable)321 gc_list_clear_collecting(PyGC_Head *collectable)
322 {
323 PyGC_Head *gc;
324 for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
325 gc_clear_collecting(gc);
326 }
327 }
328
329 /* Append objects in a GC list to a Python list.
330 * Return 0 if all OK, < 0 if error (out of memory for list)
331 */
332 static int
append_objects(PyObject * py_list,PyGC_Head * gc_list)333 append_objects(PyObject *py_list, PyGC_Head *gc_list)
334 {
335 PyGC_Head *gc;
336 for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
337 PyObject *op = FROM_GC(gc);
338 if (op != py_list) {
339 if (PyList_Append(py_list, op)) {
340 return -1; /* exception */
341 }
342 }
343 }
344 return 0;
345 }
346
347 // Constants for validate_list's flags argument.
348 enum flagstates {collecting_clear_unreachable_clear,
349 collecting_clear_unreachable_set,
350 collecting_set_unreachable_clear,
351 collecting_set_unreachable_set};
352
353 #ifdef GC_DEBUG
354 // validate_list checks list consistency. And it works as document
355 // describing when flags are expected to be set / unset.
356 // `head` must be a doubly-linked gc list, although it's fine (expected!) if
357 // the prev and next pointers are "polluted" with flags.
358 // What's checked:
359 // - The `head` pointers are not polluted.
360 // - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
361 // `set or clear, as specified by the 'flags' argument.
362 // - The prev and next pointers are mutually consistent.
363 static void
validate_list(PyGC_Head * head,enum flagstates flags)364 validate_list(PyGC_Head *head, enum flagstates flags)
365 {
366 assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0);
367 assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
368 uintptr_t prev_value = 0, next_value = 0;
369 switch (flags) {
370 case collecting_clear_unreachable_clear:
371 break;
372 case collecting_set_unreachable_clear:
373 prev_value = PREV_MASK_COLLECTING;
374 break;
375 case collecting_clear_unreachable_set:
376 next_value = NEXT_MASK_UNREACHABLE;
377 break;
378 case collecting_set_unreachable_set:
379 prev_value = PREV_MASK_COLLECTING;
380 next_value = NEXT_MASK_UNREACHABLE;
381 break;
382 default:
383 assert(! "bad internal flags argument");
384 }
385 PyGC_Head *prev = head;
386 PyGC_Head *gc = GC_NEXT(head);
387 while (gc != head) {
388 PyGC_Head *trueprev = GC_PREV(gc);
389 PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
390 assert(truenext != NULL);
391 assert(trueprev == prev);
392 assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
393 assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
394 prev = gc;
395 gc = truenext;
396 }
397 assert(prev == GC_PREV(head));
398 }
399 #else
400 #define validate_list(x, y) do{}while(0)
401 #endif
402
403 /*** end of list stuff ***/
404
405
406 /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and
407 * PREV_MASK_COLLECTING bit is set for all objects in containers.
408 */
409 static void
update_refs(PyGC_Head * containers)410 update_refs(PyGC_Head *containers)
411 {
412 PyGC_Head *gc = GC_NEXT(containers);
413 for (; gc != containers; gc = GC_NEXT(gc)) {
414 gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc)));
415 /* Python's cyclic gc should never see an incoming refcount
416 * of 0: if something decref'ed to 0, it should have been
417 * deallocated immediately at that time.
418 * Possible cause (if the assert triggers): a tp_dealloc
419 * routine left a gc-aware object tracked during its teardown
420 * phase, and did something-- or allowed something to happen --
421 * that called back into Python. gc can trigger then, and may
422 * see the still-tracked dying object. Before this assert
423 * was added, such mistakes went on to allow gc to try to
424 * delete the object again. In a debug build, that caused
425 * a mysterious segfault, when _Py_ForgetReference tried
426 * to remove the object from the doubly-linked list of all
427 * objects a second time. In a release build, an actual
428 * double deallocation occurred, which leads to corruption
429 * of the allocator's internal bookkeeping pointers. That's
430 * so serious that maybe this should be a release-build
431 * check instead of an assert?
432 */
433 _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0);
434 }
435 }
436
437 /* A traversal callback for subtract_refs. */
438 static int
visit_decref(PyObject * op,void * parent)439 visit_decref(PyObject *op, void *parent)
440 {
441 _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op));
442
443 if (_PyObject_IS_GC(op)) {
444 PyGC_Head *gc = AS_GC(op);
445 /* We're only interested in gc_refs for objects in the
446 * generation being collected, which can be recognized
447 * because only they have positive gc_refs.
448 */
449 if (gc_is_collecting(gc)) {
450 gc_decref(gc);
451 }
452 }
453 return 0;
454 }
455
456 /* Subtract internal references from gc_refs. After this, gc_refs is >= 0
457 * for all objects in containers, and is GC_REACHABLE for all tracked gc
458 * objects not in containers. The ones with gc_refs > 0 are directly
459 * reachable from outside containers, and so can't be collected.
460 */
461 static void
subtract_refs(PyGC_Head * containers)462 subtract_refs(PyGC_Head *containers)
463 {
464 traverseproc traverse;
465 PyGC_Head *gc = GC_NEXT(containers);
466 for (; gc != containers; gc = GC_NEXT(gc)) {
467 PyObject *op = FROM_GC(gc);
468 traverse = Py_TYPE(op)->tp_traverse;
469 (void) traverse(FROM_GC(gc),
470 (visitproc)visit_decref,
471 op);
472 }
473 }
474
475 /* A traversal callback for move_unreachable. */
476 static int
visit_reachable(PyObject * op,PyGC_Head * reachable)477 visit_reachable(PyObject *op, PyGC_Head *reachable)
478 {
479 if (!_PyObject_IS_GC(op)) {
480 return 0;
481 }
482
483 PyGC_Head *gc = AS_GC(op);
484 const Py_ssize_t gc_refs = gc_get_refs(gc);
485
486 // Ignore objects in other generation.
487 // This also skips objects "to the left" of the current position in
488 // move_unreachable's scan of the 'young' list - they've already been
489 // traversed, and no longer have the PREV_MASK_COLLECTING flag.
490 if (! gc_is_collecting(gc)) {
491 return 0;
492 }
493 // It would be a logic error elsewhere if the collecting flag were set on
494 // an untracked object.
495 assert(gc->_gc_next != 0);
496
497 if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
498 /* This had gc_refs = 0 when move_unreachable got
499 * to it, but turns out it's reachable after all.
500 * Move it back to move_unreachable's 'young' list,
501 * and move_unreachable will eventually get to it
502 * again.
503 */
504 // Manually unlink gc from unreachable list because the list functions
505 // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
506 PyGC_Head *prev = GC_PREV(gc);
507 PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
508 _PyObject_ASSERT(FROM_GC(prev),
509 prev->_gc_next & NEXT_MASK_UNREACHABLE);
510 _PyObject_ASSERT(FROM_GC(next),
511 next->_gc_next & NEXT_MASK_UNREACHABLE);
512 prev->_gc_next = gc->_gc_next; // copy NEXT_MASK_UNREACHABLE
513 _PyGCHead_SET_PREV(next, prev);
514
515 gc_list_append(gc, reachable);
516 gc_set_refs(gc, 1);
517 }
518 else if (gc_refs == 0) {
519 /* This is in move_unreachable's 'young' list, but
520 * the traversal hasn't yet gotten to it. All
521 * we need to do is tell move_unreachable that it's
522 * reachable.
523 */
524 gc_set_refs(gc, 1);
525 }
526 /* Else there's nothing to do.
527 * If gc_refs > 0, it must be in move_unreachable's 'young'
528 * list, and move_unreachable will eventually get to it.
529 */
530 else {
531 _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
532 }
533 return 0;
534 }
535
536 /* Move the unreachable objects from young to unreachable. After this,
537 * all objects in young don't have PREV_MASK_COLLECTING flag and
538 * unreachable have the flag.
539 * All objects in young after this are directly or indirectly reachable
540 * from outside the original young; and all objects in unreachable are
541 * not.
542 *
543 * This function restores _gc_prev pointer. young and unreachable are
544 * doubly linked list after this function.
545 * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
546 * So we can not gc_list_* functions for unreachable until we remove the flag.
547 */
548 static void
move_unreachable(PyGC_Head * young,PyGC_Head * unreachable)549 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
550 {
551 // previous elem in the young list, used for restore gc_prev.
552 PyGC_Head *prev = young;
553 PyGC_Head *gc = GC_NEXT(young);
554
555 /* Invariants: all objects "to the left" of us in young are reachable
556 * (directly or indirectly) from outside the young list as it was at entry.
557 *
558 * All other objects from the original young "to the left" of us are in
559 * unreachable now, and have NEXT_MASK_UNREACHABLE. All objects to the
560 * left of us in 'young' now have been scanned, and no objects here
561 * or to the right have been scanned yet.
562 */
563
564 while (gc != young) {
565 if (gc_get_refs(gc)) {
566 /* gc is definitely reachable from outside the
567 * original 'young'. Mark it as such, and traverse
568 * its pointers to find any other objects that may
569 * be directly reachable from it. Note that the
570 * call to tp_traverse may append objects to young,
571 * so we have to wait until it returns to determine
572 * the next object to visit.
573 */
574 PyObject *op = FROM_GC(gc);
575 traverseproc traverse = Py_TYPE(op)->tp_traverse;
576 _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
577 "refcount is too small");
578 // NOTE: visit_reachable may change gc->_gc_next when
579 // young->_gc_prev == gc. Don't do gc = GC_NEXT(gc) before!
580 (void) traverse(op,
581 (visitproc)visit_reachable,
582 (void *)young);
583 // relink gc_prev to prev element.
584 _PyGCHead_SET_PREV(gc, prev);
585 // gc is not COLLECTING state after here.
586 gc_clear_collecting(gc);
587 prev = gc;
588 }
589 else {
590 /* This *may* be unreachable. To make progress,
591 * assume it is. gc isn't directly reachable from
592 * any object we've already traversed, but may be
593 * reachable from an object we haven't gotten to yet.
594 * visit_reachable will eventually move gc back into
595 * young if that's so, and we'll see it again.
596 */
597 // Move gc to unreachable.
598 // No need to gc->next->prev = prev because it is single linked.
599 prev->_gc_next = gc->_gc_next;
600
601 // We can't use gc_list_append() here because we use
602 // NEXT_MASK_UNREACHABLE here.
603 PyGC_Head *last = GC_PREV(unreachable);
604 // NOTE: Since all objects in unreachable set has
605 // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
606 // But this may pollute the unreachable list head's 'next' pointer
607 // too. That's semantically senseless but expedient here - the
608 // damage is repaired when this function ends.
609 last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
610 _PyGCHead_SET_PREV(gc, last);
611 gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
612 unreachable->_gc_prev = (uintptr_t)gc;
613 }
614 gc = (PyGC_Head*)prev->_gc_next;
615 }
616 // young->_gc_prev must be last element remained in the list.
617 young->_gc_prev = (uintptr_t)prev;
618 // don't let the pollution of the list head's next pointer leak
619 unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE;
620 }
621
622 static void
untrack_tuples(PyGC_Head * head)623 untrack_tuples(PyGC_Head *head)
624 {
625 PyGC_Head *next, *gc = GC_NEXT(head);
626 while (gc != head) {
627 PyObject *op = FROM_GC(gc);
628 next = GC_NEXT(gc);
629 if (PyTuple_CheckExact(op)) {
630 _PyTuple_MaybeUntrack(op);
631 }
632 gc = next;
633 }
634 }
635
636 /* Try to untrack all currently tracked dictionaries */
637 static void
untrack_dicts(PyGC_Head * head)638 untrack_dicts(PyGC_Head *head)
639 {
640 PyGC_Head *next, *gc = GC_NEXT(head);
641 while (gc != head) {
642 PyObject *op = FROM_GC(gc);
643 next = GC_NEXT(gc);
644 if (PyDict_CheckExact(op)) {
645 _PyDict_MaybeUntrack(op);
646 }
647 gc = next;
648 }
649 }
650
651 /* Return true if object has a pre-PEP 442 finalization method. */
652 static int
has_legacy_finalizer(PyObject * op)653 has_legacy_finalizer(PyObject *op)
654 {
655 return Py_TYPE(op)->tp_del != NULL;
656 }
657
658 /* Move the objects in unreachable with tp_del slots into `finalizers`.
659 *
660 * This function also removes NEXT_MASK_UNREACHABLE flag
661 * from _gc_next in unreachable.
662 */
663 static void
move_legacy_finalizers(PyGC_Head * unreachable,PyGC_Head * finalizers)664 move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
665 {
666 PyGC_Head *gc, *next;
667 assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
668
669 /* March over unreachable. Move objects with finalizers into
670 * `finalizers`.
671 */
672 for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
673 PyObject *op = FROM_GC(gc);
674
675 _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
676 gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
677 next = (PyGC_Head*)gc->_gc_next;
678
679 if (has_legacy_finalizer(op)) {
680 gc_clear_collecting(gc);
681 gc_list_move(gc, finalizers);
682 }
683 }
684 }
685
686 static inline void
clear_unreachable_mask(PyGC_Head * unreachable)687 clear_unreachable_mask(PyGC_Head *unreachable)
688 {
689 /* Check that the list head does not have the unreachable bit set */
690 assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0);
691
692 PyGC_Head *gc, *next;
693 assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
694 for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
695 _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
696 gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
697 next = (PyGC_Head*)gc->_gc_next;
698 }
699 validate_list(unreachable, collecting_set_unreachable_clear);
700 }
701
702 /* A traversal callback for move_legacy_finalizer_reachable. */
703 static int
visit_move(PyObject * op,PyGC_Head * tolist)704 visit_move(PyObject *op, PyGC_Head *tolist)
705 {
706 if (_PyObject_IS_GC(op)) {
707 PyGC_Head *gc = AS_GC(op);
708 if (gc_is_collecting(gc)) {
709 gc_list_move(gc, tolist);
710 gc_clear_collecting(gc);
711 }
712 }
713 return 0;
714 }
715
716 /* Move objects that are reachable from finalizers, from the unreachable set
717 * into finalizers set.
718 */
719 static void
move_legacy_finalizer_reachable(PyGC_Head * finalizers)720 move_legacy_finalizer_reachable(PyGC_Head *finalizers)
721 {
722 traverseproc traverse;
723 PyGC_Head *gc = GC_NEXT(finalizers);
724 for (; gc != finalizers; gc = GC_NEXT(gc)) {
725 /* Note that the finalizers list may grow during this. */
726 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
727 (void) traverse(FROM_GC(gc),
728 (visitproc)visit_move,
729 (void *)finalizers);
730 }
731 }
732
733 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
734 * callback, invoke it if necessary. Note that it's possible for such
735 * weakrefs to be outside the unreachable set -- indeed, those are precisely
736 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for
737 * overview & some details. Some weakrefs with callbacks may be reclaimed
738 * directly by this routine; the number reclaimed is the return value. Other
739 * weakrefs with callbacks may be moved into the `old` generation. Objects
740 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
741 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns,
742 * no object in `unreachable` is weakly referenced anymore.
743 */
744 static int
handle_weakrefs(PyGC_Head * unreachable,PyGC_Head * old)745 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
746 {
747 PyGC_Head *gc;
748 PyObject *op; /* generally FROM_GC(gc) */
749 PyWeakReference *wr; /* generally a cast of op */
750 PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */
751 PyGC_Head *next;
752 int num_freed = 0;
753
754 gc_list_init(&wrcb_to_call);
755
756 /* Clear all weakrefs to the objects in unreachable. If such a weakref
757 * also has a callback, move it into `wrcb_to_call` if the callback
758 * needs to be invoked. Note that we cannot invoke any callbacks until
759 * all weakrefs to unreachable objects are cleared, lest the callback
760 * resurrect an unreachable object via a still-active weakref. We
761 * make another pass over wrcb_to_call, invoking callbacks, after this
762 * pass completes.
763 */
764 for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
765 PyWeakReference **wrlist;
766
767 op = FROM_GC(gc);
768 next = GC_NEXT(gc);
769
770 if (PyWeakref_Check(op)) {
771 /* A weakref inside the unreachable set must be cleared. If we
772 * allow its callback to execute inside delete_garbage(), it
773 * could expose objects that have tp_clear already called on
774 * them. Or, it could resurrect unreachable objects. One way
775 * this can happen is if some container objects do not implement
776 * tp_traverse. Then, wr_object can be outside the unreachable
777 * set but can be deallocated as a result of breaking the
778 * reference cycle. If we don't clear the weakref, the callback
779 * will run and potentially cause a crash. See bpo-38006 for
780 * one example.
781 */
782 _PyWeakref_ClearRef((PyWeakReference *)op);
783 }
784
785 if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
786 continue;
787
788 /* It supports weakrefs. Does it have any? */
789 wrlist = (PyWeakReference **)
790 _PyObject_GET_WEAKREFS_LISTPTR(op);
791
792 /* `op` may have some weakrefs. March over the list, clear
793 * all the weakrefs, and move the weakrefs with callbacks
794 * that must be called into wrcb_to_call.
795 */
796 for (wr = *wrlist; wr != NULL; wr = *wrlist) {
797 PyGC_Head *wrasgc; /* AS_GC(wr) */
798
799 /* _PyWeakref_ClearRef clears the weakref but leaves
800 * the callback pointer intact. Obscure: it also
801 * changes *wrlist.
802 */
803 _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
804 _PyWeakref_ClearRef(wr);
805 _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
806 if (wr->wr_callback == NULL) {
807 /* no callback */
808 continue;
809 }
810
811 /* Headache time. `op` is going away, and is weakly referenced by
812 * `wr`, which has a callback. Should the callback be invoked? If wr
813 * is also trash, no:
814 *
815 * 1. There's no need to call it. The object and the weakref are
816 * both going away, so it's legitimate to pretend the weakref is
817 * going away first. The user has to ensure a weakref outlives its
818 * referent if they want a guarantee that the wr callback will get
819 * invoked.
820 *
821 * 2. It may be catastrophic to call it. If the callback is also in
822 * cyclic trash (CT), then although the CT is unreachable from
823 * outside the current generation, CT may be reachable from the
824 * callback. Then the callback could resurrect insane objects.
825 *
826 * Since the callback is never needed and may be unsafe in this case,
827 * wr is simply left in the unreachable set. Note that because we
828 * already called _PyWeakref_ClearRef(wr), its callback will never
829 * trigger.
830 *
831 * OTOH, if wr isn't part of CT, we should invoke the callback: the
832 * weakref outlived the trash. Note that since wr isn't CT in this
833 * case, its callback can't be CT either -- wr acted as an external
834 * root to this generation, and therefore its callback did too. So
835 * nothing in CT is reachable from the callback either, so it's hard
836 * to imagine how calling it later could create a problem for us. wr
837 * is moved to wrcb_to_call in this case.
838 */
839 if (gc_is_collecting(AS_GC(wr))) {
840 /* it should already have been cleared above */
841 assert(wr->wr_object == Py_None);
842 continue;
843 }
844
845 /* Create a new reference so that wr can't go away
846 * before we can process it again.
847 */
848 Py_INCREF(wr);
849
850 /* Move wr to wrcb_to_call, for the next pass. */
851 wrasgc = AS_GC(wr);
852 assert(wrasgc != next); /* wrasgc is reachable, but
853 next isn't, so they can't
854 be the same */
855 gc_list_move(wrasgc, &wrcb_to_call);
856 }
857 }
858
859 /* Invoke the callbacks we decided to honor. It's safe to invoke them
860 * because they can't reference unreachable objects.
861 */
862 while (! gc_list_is_empty(&wrcb_to_call)) {
863 PyObject *temp;
864 PyObject *callback;
865
866 gc = (PyGC_Head*)wrcb_to_call._gc_next;
867 op = FROM_GC(gc);
868 _PyObject_ASSERT(op, PyWeakref_Check(op));
869 wr = (PyWeakReference *)op;
870 callback = wr->wr_callback;
871 _PyObject_ASSERT(op, callback != NULL);
872
873 /* copy-paste of weakrefobject.c's handle_callback() */
874 temp = PyObject_CallOneArg(callback, (PyObject *)wr);
875 if (temp == NULL)
876 PyErr_WriteUnraisable(callback);
877 else
878 Py_DECREF(temp);
879
880 /* Give up the reference we created in the first pass. When
881 * op's refcount hits 0 (which it may or may not do right now),
882 * op's tp_dealloc will decref op->wr_callback too. Note
883 * that the refcount probably will hit 0 now, and because this
884 * weakref was reachable to begin with, gc didn't already
885 * add it to its count of freed objects. Example: a reachable
886 * weak value dict maps some key to this reachable weakref.
887 * The callback removes this key->weakref mapping from the
888 * dict, leaving no other references to the weakref (excepting
889 * ours).
890 */
891 Py_DECREF(op);
892 if (wrcb_to_call._gc_next == (uintptr_t)gc) {
893 /* object is still alive -- move it */
894 gc_list_move(gc, old);
895 }
896 else {
897 ++num_freed;
898 }
899 }
900
901 return num_freed;
902 }
903
904 static void
debug_cycle(const char * msg,PyObject * op)905 debug_cycle(const char *msg, PyObject *op)
906 {
907 PySys_FormatStderr("gc: %s <%s %p>\n",
908 msg, Py_TYPE(op)->tp_name, op);
909 }
910
911 /* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
912 * only from such cycles).
913 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
914 * garbage list (a Python list), else only the objects in finalizers with
915 * __del__ methods are appended to garbage. All objects in finalizers are
916 * merged into the old list regardless.
917 */
918 static void
handle_legacy_finalizers(PyThreadState * tstate,GCState * gcstate,PyGC_Head * finalizers,PyGC_Head * old)919 handle_legacy_finalizers(PyThreadState *tstate,
920 GCState *gcstate,
921 PyGC_Head *finalizers, PyGC_Head *old)
922 {
923 assert(!_PyErr_Occurred(tstate));
924 assert(gcstate->garbage != NULL);
925
926 PyGC_Head *gc = GC_NEXT(finalizers);
927 for (; gc != finalizers; gc = GC_NEXT(gc)) {
928 PyObject *op = FROM_GC(gc);
929
930 if ((gcstate->debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
931 if (PyList_Append(gcstate->garbage, op) < 0) {
932 _PyErr_Clear(tstate);
933 break;
934 }
935 }
936 }
937
938 gc_list_merge(finalizers, old);
939 }
940
941 /* Run first-time finalizers (if any) on all the objects in collectable.
942 * Note that this may remove some (or even all) of the objects from the
943 * list, due to refcounts falling to 0.
944 */
945 static void
finalize_garbage(PyThreadState * tstate,PyGC_Head * collectable)946 finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
947 {
948 destructor finalize;
949 PyGC_Head seen;
950
951 /* While we're going through the loop, `finalize(op)` may cause op, or
952 * other objects, to be reclaimed via refcounts falling to zero. So
953 * there's little we can rely on about the structure of the input
954 * `collectable` list across iterations. For safety, we always take the
955 * first object in that list and move it to a temporary `seen` list.
956 * If objects vanish from the `collectable` and `seen` lists we don't
957 * care.
958 */
959 gc_list_init(&seen);
960
961 while (!gc_list_is_empty(collectable)) {
962 PyGC_Head *gc = GC_NEXT(collectable);
963 PyObject *op = FROM_GC(gc);
964 gc_list_move(gc, &seen);
965 if (!_PyGCHead_FINALIZED(gc) &&
966 (finalize = Py_TYPE(op)->tp_finalize) != NULL) {
967 _PyGCHead_SET_FINALIZED(gc);
968 Py_INCREF(op);
969 finalize(op);
970 assert(!_PyErr_Occurred(tstate));
971 Py_DECREF(op);
972 }
973 }
974 gc_list_merge(&seen, collectable);
975 }
976
977 /* Break reference cycles by clearing the containers involved. This is
978 * tricky business as the lists can be changing and we don't know which
979 * objects may be freed. It is possible I screwed something up here.
980 */
981 static void
delete_garbage(PyThreadState * tstate,GCState * gcstate,PyGC_Head * collectable,PyGC_Head * old)982 delete_garbage(PyThreadState *tstate, GCState *gcstate,
983 PyGC_Head *collectable, PyGC_Head *old)
984 {
985 assert(!_PyErr_Occurred(tstate));
986
987 while (!gc_list_is_empty(collectable)) {
988 PyGC_Head *gc = GC_NEXT(collectable);
989 PyObject *op = FROM_GC(gc);
990
991 _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
992 "refcount is too small");
993
994 if (gcstate->debug & DEBUG_SAVEALL) {
995 assert(gcstate->garbage != NULL);
996 if (PyList_Append(gcstate->garbage, op) < 0) {
997 _PyErr_Clear(tstate);
998 }
999 }
1000 else {
1001 inquiry clear;
1002 if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
1003 Py_INCREF(op);
1004 (void) clear(op);
1005 if (_PyErr_Occurred(tstate)) {
1006 _PyErr_WriteUnraisableMsg("in tp_clear of",
1007 (PyObject*)Py_TYPE(op));
1008 }
1009 Py_DECREF(op);
1010 }
1011 }
1012 if (GC_NEXT(collectable) == gc) {
1013 /* object is still alive, move it, it may die later */
1014 gc_clear_collecting(gc);
1015 gc_list_move(gc, old);
1016 }
1017 }
1018 }
1019
1020 /* Clear all free lists
1021 * All free lists are cleared during the collection of the highest generation.
1022 * Allocated items in the free list may keep a pymalloc arena occupied.
1023 * Clearing the free lists may give back memory to the OS earlier.
1024 */
1025 static void
clear_freelists(void)1026 clear_freelists(void)
1027 {
1028 _PyFrame_ClearFreeList();
1029 _PyTuple_ClearFreeList();
1030 _PyFloat_ClearFreeList();
1031 _PyList_ClearFreeList();
1032 _PyDict_ClearFreeList();
1033 _PyAsyncGen_ClearFreeLists();
1034 _PyContext_ClearFreeList();
1035 }
1036
1037 // Show stats for objects in each generations
1038 static void
show_stats_each_generations(GCState * gcstate)1039 show_stats_each_generations(GCState *gcstate)
1040 {
1041 char buf[100];
1042 size_t pos = 0;
1043
1044 for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
1045 pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
1046 " %"PY_FORMAT_SIZE_T"d",
1047 gc_list_size(GEN_HEAD(gcstate, i)));
1048 }
1049
1050 PySys_FormatStderr(
1051 "gc: objects in each generation:%s\n"
1052 "gc: objects in permanent generation: %zd\n",
1053 buf, gc_list_size(&gcstate->permanent_generation.head));
1054 }
1055
1056 /* Deduce which objects among "base" are unreachable from outside the list
1057 and move them to 'unreachable'. The process consist in the following steps:
1058
1059 1. Copy all reference counts to a different field (gc_prev is used to hold
1060 this copy to save memory).
1061 2. Traverse all objects in "base" and visit all referred objects using
1062 "tp_traverse" and for every visited object, subtract 1 to the reference
1063 count (the one that we copied in the previous step). After this step, all
1064 objects that can be reached directly from outside must have strictly positive
1065 reference count, while all unreachable objects must have a count of exactly 0.
1066 3. Identify all unreachable objects (the ones with 0 reference count) and move
1067 them to the "unreachable" list. This step also needs to move back to "base" all
1068 objects that were initially marked as unreachable but are referred transitively
1069 by the reachable objects (the ones with strictly positive reference count).
1070
1071 Contracts:
1072
1073 * The "base" has to be a valid list with no mask set.
1074
1075 * The "unreachable" list must be uninitialized (this function calls
1076 gc_list_init over 'unreachable').
1077
1078 IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
1079 flag set but it does not clear it to skip unnecessary iteration. Before the
1080 flag is cleared (for example, by using 'clear_unreachable_mask' function or
1081 by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
1082 list and we can not use most gc_list_* functions for it. */
1083 static inline void
deduce_unreachable(PyGC_Head * base,PyGC_Head * unreachable)1084 deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
1085 validate_list(base, collecting_clear_unreachable_clear);
1086 /* Using ob_refcnt and gc_refs, calculate which objects in the
1087 * container set are reachable from outside the set (i.e., have a
1088 * refcount greater than 0 when all the references within the
1089 * set are taken into account).
1090 */
1091 update_refs(base); // gc_prev is used for gc_refs
1092 subtract_refs(base);
1093
1094 /* Leave everything reachable from outside base in base, and move
1095 * everything else (in base) to unreachable.
1096 *
1097 * NOTE: This used to move the reachable objects into a reachable
1098 * set instead. But most things usually turn out to be reachable,
1099 * so it's more efficient to move the unreachable things. It "sounds slick"
1100 * to move the unreachable objects, until you think about it - the reason it
1101 * pays isn't actually obvious.
1102 *
1103 * Suppose we create objects A, B, C in that order. They appear in the young
1104 * generation in the same order. If B points to A, and C to B, and C is
1105 * reachable from outside, then the adjusted refcounts will be 0, 0, and 1
1106 * respectively.
1107 *
1108 * When move_unreachable finds A, A is moved to the unreachable list. The
1109 * same for B when it's first encountered. Then C is traversed, B is moved
1110 * _back_ to the reachable list. B is eventually traversed, and then A is
1111 * moved back to the reachable list.
1112 *
1113 * So instead of not moving at all, the reachable objects B and A are moved
1114 * twice each. Why is this a win? A straightforward algorithm to move the
1115 * reachable objects instead would move A, B, and C once each.
1116 *
1117 * The key is that this dance leaves the objects in order C, B, A - it's
1118 * reversed from the original order. On all _subsequent_ scans, none of
1119 * them will move. Since most objects aren't in cycles, this can save an
1120 * unbounded number of moves across an unbounded number of later collections.
1121 * It can cost more only the first time the chain is scanned.
1122 *
1123 * Drawback: move_unreachable is also used to find out what's still trash
1124 * after finalizers may resurrect objects. In _that_ case most unreachable
1125 * objects will remain unreachable, so it would be more efficient to move
1126 * the reachable objects instead. But this is a one-time cost, probably not
1127 * worth complicating the code to speed just a little.
1128 */
1129 gc_list_init(unreachable);
1130 move_unreachable(base, unreachable); // gc_prev is pointer again
1131 validate_list(base, collecting_clear_unreachable_clear);
1132 validate_list(unreachable, collecting_set_unreachable_set);
1133 }
1134
1135 /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
1136 them to 'old_generation' and placing the rest on 'still_unreachable'.
1137
1138 Contracts:
1139 * After this function 'unreachable' must not be used anymore and 'still_unreachable'
1140 will contain the objects that did not resurrect.
1141
1142 * The "still_unreachable" list must be uninitialized (this function calls
1143 gc_list_init over 'still_unreachable').
1144
1145 IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
1146 PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
1147 we can skip the expense of clearing the flag to avoid extra iteration. */
1148 static inline void
handle_resurrected_objects(PyGC_Head * unreachable,PyGC_Head * still_unreachable,PyGC_Head * old_generation)1149 handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
1150 PyGC_Head *old_generation)
1151 {
1152 // Remove the PREV_MASK_COLLECTING from unreachable
1153 // to prepare it for a new call to 'deduce_unreachable'
1154 gc_list_clear_collecting(unreachable);
1155
1156 // After the call to deduce_unreachable, the 'still_unreachable' set will
1157 // have the PREV_MARK_COLLECTING set, but the objects are going to be
1158 // removed so we can skip the expense of clearing the flag.
1159 PyGC_Head* resurrected = unreachable;
1160 deduce_unreachable(resurrected, still_unreachable);
1161 clear_unreachable_mask(still_unreachable);
1162
1163 // Move the resurrected objects to the old generation for future collection.
1164 gc_list_merge(resurrected, old_generation);
1165 }
1166
1167 /* This is the main function. Read this to understand how the
1168 * collection process works. */
1169 static Py_ssize_t
collect(PyThreadState * tstate,int generation,Py_ssize_t * n_collected,Py_ssize_t * n_uncollectable,int nofail)1170 collect(PyThreadState *tstate, int generation,
1171 Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, int nofail)
1172 {
1173 int i;
1174 Py_ssize_t m = 0; /* # objects collected */
1175 Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
1176 PyGC_Head *young; /* the generation we are examining */
1177 PyGC_Head *old; /* next older generation */
1178 PyGC_Head unreachable; /* non-problematic unreachable trash */
1179 PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
1180 PyGC_Head *gc;
1181 _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */
1182 GCState *gcstate = &tstate->interp->gc;
1183
1184 if (gcstate->debug & DEBUG_STATS) {
1185 PySys_WriteStderr("gc: collecting generation %d...\n", generation);
1186 show_stats_each_generations(gcstate);
1187 t1 = _PyTime_GetMonotonicClock();
1188 }
1189
1190 if (PyDTrace_GC_START_ENABLED())
1191 PyDTrace_GC_START(generation);
1192
1193 /* update collection and allocation counters */
1194 if (generation+1 < NUM_GENERATIONS)
1195 gcstate->generations[generation+1].count += 1;
1196 for (i = 0; i <= generation; i++)
1197 gcstate->generations[i].count = 0;
1198
1199 /* merge younger generations with one we are currently collecting */
1200 for (i = 0; i < generation; i++) {
1201 gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
1202 }
1203
1204 /* handy references */
1205 young = GEN_HEAD(gcstate, generation);
1206 if (generation < NUM_GENERATIONS-1)
1207 old = GEN_HEAD(gcstate, generation+1);
1208 else
1209 old = young;
1210 validate_list(old, collecting_clear_unreachable_clear);
1211
1212 deduce_unreachable(young, &unreachable);
1213
1214 untrack_tuples(young);
1215 /* Move reachable objects to next generation. */
1216 if (young != old) {
1217 if (generation == NUM_GENERATIONS - 2) {
1218 gcstate->long_lived_pending += gc_list_size(young);
1219 }
1220 gc_list_merge(young, old);
1221 }
1222 else {
1223 /* We only un-track dicts in full collections, to avoid quadratic
1224 dict build-up. See issue #14775. */
1225 untrack_dicts(young);
1226 gcstate->long_lived_pending = 0;
1227 gcstate->long_lived_total = gc_list_size(young);
1228 }
1229
1230 /* All objects in unreachable are trash, but objects reachable from
1231 * legacy finalizers (e.g. tp_del) can't safely be deleted.
1232 */
1233 gc_list_init(&finalizers);
1234 // NEXT_MASK_UNREACHABLE is cleared here.
1235 // After move_legacy_finalizers(), unreachable is normal list.
1236 move_legacy_finalizers(&unreachable, &finalizers);
1237 /* finalizers contains the unreachable objects with a legacy finalizer;
1238 * unreachable objects reachable *from* those are also uncollectable,
1239 * and we move those into the finalizers list too.
1240 */
1241 move_legacy_finalizer_reachable(&finalizers);
1242
1243 validate_list(&finalizers, collecting_clear_unreachable_clear);
1244 validate_list(&unreachable, collecting_set_unreachable_clear);
1245
1246 /* Print debugging information. */
1247 if (gcstate->debug & DEBUG_COLLECTABLE) {
1248 for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1249 debug_cycle("collectable", FROM_GC(gc));
1250 }
1251 }
1252
1253 /* Clear weakrefs and invoke callbacks as necessary. */
1254 m += handle_weakrefs(&unreachable, old);
1255
1256 validate_list(old, collecting_clear_unreachable_clear);
1257 validate_list(&unreachable, collecting_set_unreachable_clear);
1258
1259 /* Call tp_finalize on objects which have one. */
1260 finalize_garbage(tstate, &unreachable);
1261
1262 /* Handle any objects that may have resurrected after the call
1263 * to 'finalize_garbage' and continue the collection with the
1264 * objects that are still unreachable */
1265 PyGC_Head final_unreachable;
1266 handle_resurrected_objects(&unreachable, &final_unreachable, old);
1267
1268 /* Call tp_clear on objects in the final_unreachable set. This will cause
1269 * the reference cycles to be broken. It may also cause some objects
1270 * in finalizers to be freed.
1271 */
1272 m += gc_list_size(&final_unreachable);
1273 delete_garbage(tstate, gcstate, &final_unreachable, old);
1274
1275 /* Collect statistics on uncollectable objects found and print
1276 * debugging information. */
1277 for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1278 n++;
1279 if (gcstate->debug & DEBUG_UNCOLLECTABLE)
1280 debug_cycle("uncollectable", FROM_GC(gc));
1281 }
1282 if (gcstate->debug & DEBUG_STATS) {
1283 double d = _PyTime_AsSecondsDouble(_PyTime_GetMonotonicClock() - t1);
1284 PySys_WriteStderr(
1285 "gc: done, %" PY_FORMAT_SIZE_T "d unreachable, "
1286 "%" PY_FORMAT_SIZE_T "d uncollectable, %.4fs elapsed\n",
1287 n+m, n, d);
1288 }
1289
1290 /* Append instances in the uncollectable set to a Python
1291 * reachable list of garbage. The programmer has to deal with
1292 * this if they insist on creating this type of structure.
1293 */
1294 handle_legacy_finalizers(tstate, gcstate, &finalizers, old);
1295 validate_list(old, collecting_clear_unreachable_clear);
1296
1297 /* Clear free list only during the collection of the highest
1298 * generation */
1299 if (generation == NUM_GENERATIONS-1) {
1300 clear_freelists();
1301 }
1302
1303 if (_PyErr_Occurred(tstate)) {
1304 if (nofail) {
1305 _PyErr_Clear(tstate);
1306 }
1307 else {
1308 _PyErr_WriteUnraisableMsg("in garbage collection", NULL);
1309 }
1310 }
1311
1312 /* Update stats */
1313 if (n_collected) {
1314 *n_collected = m;
1315 }
1316 if (n_uncollectable) {
1317 *n_uncollectable = n;
1318 }
1319
1320 struct gc_generation_stats *stats = &gcstate->generation_stats[generation];
1321 stats->collections++;
1322 stats->collected += m;
1323 stats->uncollectable += n;
1324
1325 if (PyDTrace_GC_DONE_ENABLED()) {
1326 PyDTrace_GC_DONE(n + m);
1327 }
1328
1329 assert(!_PyErr_Occurred(tstate));
1330 return n + m;
1331 }
1332
1333 /* Invoke progress callbacks to notify clients that garbage collection
1334 * is starting or stopping
1335 */
1336 static void
invoke_gc_callback(PyThreadState * tstate,const char * phase,int generation,Py_ssize_t collected,Py_ssize_t uncollectable)1337 invoke_gc_callback(PyThreadState *tstate, const char *phase,
1338 int generation, Py_ssize_t collected,
1339 Py_ssize_t uncollectable)
1340 {
1341 assert(!_PyErr_Occurred(tstate));
1342
1343 /* we may get called very early */
1344 GCState *gcstate = &tstate->interp->gc;
1345 if (gcstate->callbacks == NULL) {
1346 return;
1347 }
1348
1349 /* The local variable cannot be rebound, check it for sanity */
1350 assert(PyList_CheckExact(gcstate->callbacks));
1351 PyObject *info = NULL;
1352 if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1353 info = Py_BuildValue("{sisnsn}",
1354 "generation", generation,
1355 "collected", collected,
1356 "uncollectable", uncollectable);
1357 if (info == NULL) {
1358 PyErr_WriteUnraisable(NULL);
1359 return;
1360 }
1361 }
1362 for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
1363 PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
1364 Py_INCREF(cb); /* make sure cb doesn't go away */
1365 r = PyObject_CallFunction(cb, "sO", phase, info);
1366 if (r == NULL) {
1367 PyErr_WriteUnraisable(cb);
1368 }
1369 else {
1370 Py_DECREF(r);
1371 }
1372 Py_DECREF(cb);
1373 }
1374 Py_XDECREF(info);
1375 assert(!_PyErr_Occurred(tstate));
1376 }
1377
1378 /* Perform garbage collection of a generation and invoke
1379 * progress callbacks.
1380 */
1381 static Py_ssize_t
collect_with_callback(PyThreadState * tstate,int generation)1382 collect_with_callback(PyThreadState *tstate, int generation)
1383 {
1384 assert(!_PyErr_Occurred(tstate));
1385 Py_ssize_t result, collected, uncollectable;
1386 invoke_gc_callback(tstate, "start", generation, 0, 0);
1387 result = collect(tstate, generation, &collected, &uncollectable, 0);
1388 invoke_gc_callback(tstate, "stop", generation, collected, uncollectable);
1389 assert(!_PyErr_Occurred(tstate));
1390 return result;
1391 }
1392
1393 static Py_ssize_t
collect_generations(PyThreadState * tstate)1394 collect_generations(PyThreadState *tstate)
1395 {
1396 GCState *gcstate = &tstate->interp->gc;
1397 /* Find the oldest generation (highest numbered) where the count
1398 * exceeds the threshold. Objects in the that generation and
1399 * generations younger than it will be collected. */
1400 Py_ssize_t n = 0;
1401 for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
1402 if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
1403 /* Avoid quadratic performance degradation in number
1404 of tracked objects (see also issue #4074):
1405
1406 To limit the cost of garbage collection, there are two strategies;
1407 - make each collection faster, e.g. by scanning fewer objects
1408 - do less collections
1409 This heuristic is about the latter strategy.
1410
1411 In addition to the various configurable thresholds, we only trigger a
1412 full collection if the ratio
1413
1414 long_lived_pending / long_lived_total
1415
1416 is above a given value (hardwired to 25%).
1417
1418 The reason is that, while "non-full" collections (i.e., collections of
1419 the young and middle generations) will always examine roughly the same
1420 number of objects -- determined by the aforementioned thresholds --,
1421 the cost of a full collection is proportional to the total number of
1422 long-lived objects, which is virtually unbounded.
1423
1424 Indeed, it has been remarked that doing a full collection every
1425 <constant number> of object creations entails a dramatic performance
1426 degradation in workloads which consist in creating and storing lots of
1427 long-lived objects (e.g. building a large list of GC-tracked objects would
1428 show quadratic performance, instead of linear as expected: see issue #4074).
1429
1430 Using the above ratio, instead, yields amortized linear performance in
1431 the total number of objects (the effect of which can be summarized
1432 thusly: "each full garbage collection is more and more costly as the
1433 number of objects grows, but we do fewer and fewer of them").
1434
1435 This heuristic was suggested by Martin von Löwis on python-dev in
1436 June 2008. His original analysis and proposal can be found at:
1437 http://mail.python.org/pipermail/python-dev/2008-June/080579.html
1438 */
1439 if (i == NUM_GENERATIONS - 1
1440 && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
1441 continue;
1442 n = collect_with_callback(tstate, i);
1443 break;
1444 }
1445 }
1446 return n;
1447 }
1448
1449 #include "clinic/gcmodule.c.h"
1450
1451 /*[clinic input]
1452 gc.enable
1453
1454 Enable automatic garbage collection.
1455 [clinic start generated code]*/
1456
1457 static PyObject *
gc_enable_impl(PyObject * module)1458 gc_enable_impl(PyObject *module)
1459 /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/
1460 {
1461 PyThreadState *tstate = _PyThreadState_GET();
1462 GCState *gcstate = &tstate->interp->gc;
1463 gcstate->enabled = 1;
1464 Py_RETURN_NONE;
1465 }
1466
1467 /*[clinic input]
1468 gc.disable
1469
1470 Disable automatic garbage collection.
1471 [clinic start generated code]*/
1472
1473 static PyObject *
gc_disable_impl(PyObject * module)1474 gc_disable_impl(PyObject *module)
1475 /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/
1476 {
1477 PyThreadState *tstate = _PyThreadState_GET();
1478 GCState *gcstate = &tstate->interp->gc;
1479 gcstate->enabled = 0;
1480 Py_RETURN_NONE;
1481 }
1482
1483 /*[clinic input]
1484 gc.isenabled -> bool
1485
1486 Returns true if automatic garbage collection is enabled.
1487 [clinic start generated code]*/
1488
1489 static int
gc_isenabled_impl(PyObject * module)1490 gc_isenabled_impl(PyObject *module)
1491 /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/
1492 {
1493 PyThreadState *tstate = _PyThreadState_GET();
1494 GCState *gcstate = &tstate->interp->gc;
1495 return gcstate->enabled;
1496 }
1497
1498 /*[clinic input]
1499 gc.collect -> Py_ssize_t
1500
1501 generation: int(c_default="NUM_GENERATIONS - 1") = 2
1502
1503 Run the garbage collector.
1504
1505 With no arguments, run a full collection. The optional argument
1506 may be an integer specifying which generation to collect. A ValueError
1507 is raised if the generation number is invalid.
1508
1509 The number of unreachable objects is returned.
1510 [clinic start generated code]*/
1511
1512 static Py_ssize_t
gc_collect_impl(PyObject * module,int generation)1513 gc_collect_impl(PyObject *module, int generation)
1514 /*[clinic end generated code: output=b697e633043233c7 input=40720128b682d879]*/
1515 {
1516 PyThreadState *tstate = _PyThreadState_GET();
1517
1518 if (generation < 0 || generation >= NUM_GENERATIONS) {
1519 _PyErr_SetString(tstate, PyExc_ValueError, "invalid generation");
1520 return -1;
1521 }
1522
1523 GCState *gcstate = &tstate->interp->gc;
1524 Py_ssize_t n;
1525 if (gcstate->collecting) {
1526 /* already collecting, don't do anything */
1527 n = 0;
1528 }
1529 else {
1530 gcstate->collecting = 1;
1531 n = collect_with_callback(tstate, generation);
1532 gcstate->collecting = 0;
1533 }
1534 return n;
1535 }
1536
1537 /*[clinic input]
1538 gc.set_debug
1539
1540 flags: int
1541 An integer that can have the following bits turned on:
1542 DEBUG_STATS - Print statistics during collection.
1543 DEBUG_COLLECTABLE - Print collectable objects found.
1544 DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects
1545 found.
1546 DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.
1547 DEBUG_LEAK - Debug leaking programs (everything but STATS).
1548 /
1549
1550 Set the garbage collection debugging flags.
1551
1552 Debugging information is written to sys.stderr.
1553 [clinic start generated code]*/
1554
1555 static PyObject *
gc_set_debug_impl(PyObject * module,int flags)1556 gc_set_debug_impl(PyObject *module, int flags)
1557 /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/
1558 {
1559 PyThreadState *tstate = _PyThreadState_GET();
1560 GCState *gcstate = &tstate->interp->gc;
1561 gcstate->debug = flags;
1562 Py_RETURN_NONE;
1563 }
1564
1565 /*[clinic input]
1566 gc.get_debug -> int
1567
1568 Get the garbage collection debugging flags.
1569 [clinic start generated code]*/
1570
1571 static int
gc_get_debug_impl(PyObject * module)1572 gc_get_debug_impl(PyObject *module)
1573 /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/
1574 {
1575 PyThreadState *tstate = _PyThreadState_GET();
1576 GCState *gcstate = &tstate->interp->gc;
1577 return gcstate->debug;
1578 }
1579
1580 PyDoc_STRVAR(gc_set_thresh__doc__,
1581 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1582 "\n"
1583 "Sets the collection thresholds. Setting threshold0 to zero disables\n"
1584 "collection.\n");
1585
1586 static PyObject *
gc_set_threshold(PyObject * self,PyObject * args)1587 gc_set_threshold(PyObject *self, PyObject *args)
1588 {
1589 PyThreadState *tstate = _PyThreadState_GET();
1590 GCState *gcstate = &tstate->interp->gc;
1591 if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
1592 &gcstate->generations[0].threshold,
1593 &gcstate->generations[1].threshold,
1594 &gcstate->generations[2].threshold))
1595 return NULL;
1596 for (int i = 3; i < NUM_GENERATIONS; i++) {
1597 /* generations higher than 2 get the same threshold */
1598 gcstate->generations[i].threshold = gcstate->generations[2].threshold;
1599 }
1600 Py_RETURN_NONE;
1601 }
1602
1603 /*[clinic input]
1604 gc.get_threshold
1605
1606 Return the current collection thresholds.
1607 [clinic start generated code]*/
1608
1609 static PyObject *
gc_get_threshold_impl(PyObject * module)1610 gc_get_threshold_impl(PyObject *module)
1611 /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/
1612 {
1613 PyThreadState *tstate = _PyThreadState_GET();
1614 GCState *gcstate = &tstate->interp->gc;
1615 return Py_BuildValue("(iii)",
1616 gcstate->generations[0].threshold,
1617 gcstate->generations[1].threshold,
1618 gcstate->generations[2].threshold);
1619 }
1620
1621 /*[clinic input]
1622 gc.get_count
1623
1624 Return a three-tuple of the current collection counts.
1625 [clinic start generated code]*/
1626
1627 static PyObject *
gc_get_count_impl(PyObject * module)1628 gc_get_count_impl(PyObject *module)
1629 /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
1630 {
1631 PyThreadState *tstate = _PyThreadState_GET();
1632 GCState *gcstate = &tstate->interp->gc;
1633 return Py_BuildValue("(iii)",
1634 gcstate->generations[0].count,
1635 gcstate->generations[1].count,
1636 gcstate->generations[2].count);
1637 }
1638
1639 static int
referrersvisit(PyObject * obj,PyObject * objs)1640 referrersvisit(PyObject* obj, PyObject *objs)
1641 {
1642 Py_ssize_t i;
1643 for (i = 0; i < PyTuple_GET_SIZE(objs); i++)
1644 if (PyTuple_GET_ITEM(objs, i) == obj)
1645 return 1;
1646 return 0;
1647 }
1648
1649 static int
gc_referrers_for(PyObject * objs,PyGC_Head * list,PyObject * resultlist)1650 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1651 {
1652 PyGC_Head *gc;
1653 PyObject *obj;
1654 traverseproc traverse;
1655 for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1656 obj = FROM_GC(gc);
1657 traverse = Py_TYPE(obj)->tp_traverse;
1658 if (obj == objs || obj == resultlist)
1659 continue;
1660 if (traverse(obj, (visitproc)referrersvisit, objs)) {
1661 if (PyList_Append(resultlist, obj) < 0)
1662 return 0; /* error */
1663 }
1664 }
1665 return 1; /* no error */
1666 }
1667
1668 PyDoc_STRVAR(gc_get_referrers__doc__,
1669 "get_referrers(*objs) -> list\n\
1670 Return the list of objects that directly refer to any of objs.");
1671
1672 static PyObject *
gc_get_referrers(PyObject * self,PyObject * args)1673 gc_get_referrers(PyObject *self, PyObject *args)
1674 {
1675 PyThreadState *tstate = _PyThreadState_GET();
1676 int i;
1677 PyObject *result = PyList_New(0);
1678 if (!result) {
1679 return NULL;
1680 }
1681
1682 GCState *gcstate = &tstate->interp->gc;
1683 for (i = 0; i < NUM_GENERATIONS; i++) {
1684 if (!(gc_referrers_for(args, GEN_HEAD(gcstate, i), result))) {
1685 Py_DECREF(result);
1686 return NULL;
1687 }
1688 }
1689 return result;
1690 }
1691
1692 /* Append obj to list; return true if error (out of memory), false if OK. */
1693 static int
referentsvisit(PyObject * obj,PyObject * list)1694 referentsvisit(PyObject *obj, PyObject *list)
1695 {
1696 return PyList_Append(list, obj) < 0;
1697 }
1698
1699 PyDoc_STRVAR(gc_get_referents__doc__,
1700 "get_referents(*objs) -> list\n\
1701 Return the list of objects that are directly referred to by objs.");
1702
1703 static PyObject *
gc_get_referents(PyObject * self,PyObject * args)1704 gc_get_referents(PyObject *self, PyObject *args)
1705 {
1706 Py_ssize_t i;
1707 PyObject *result = PyList_New(0);
1708
1709 if (result == NULL)
1710 return NULL;
1711
1712 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
1713 traverseproc traverse;
1714 PyObject *obj = PyTuple_GET_ITEM(args, i);
1715
1716 if (!_PyObject_IS_GC(obj))
1717 continue;
1718 traverse = Py_TYPE(obj)->tp_traverse;
1719 if (! traverse)
1720 continue;
1721 if (traverse(obj, (visitproc)referentsvisit, result)) {
1722 Py_DECREF(result);
1723 return NULL;
1724 }
1725 }
1726 return result;
1727 }
1728
1729 /*[clinic input]
1730 gc.get_objects
1731 generation: Py_ssize_t(accept={int, NoneType}, c_default="-1") = None
1732 Generation to extract the objects from.
1733
1734 Return a list of objects tracked by the collector (excluding the list returned).
1735
1736 If generation is not None, return only the objects tracked by the collector
1737 that are in that generation.
1738 [clinic start generated code]*/
1739
1740 static PyObject *
gc_get_objects_impl(PyObject * module,Py_ssize_t generation)1741 gc_get_objects_impl(PyObject *module, Py_ssize_t generation)
1742 /*[clinic end generated code: output=48b35fea4ba6cb0e input=ef7da9df9806754c]*/
1743 {
1744 PyThreadState *tstate = _PyThreadState_GET();
1745 int i;
1746 PyObject* result;
1747 GCState *gcstate = &tstate->interp->gc;
1748
1749 result = PyList_New(0);
1750 if (result == NULL) {
1751 return NULL;
1752 }
1753
1754 /* If generation is passed, we extract only that generation */
1755 if (generation != -1) {
1756 if (generation >= NUM_GENERATIONS) {
1757 _PyErr_Format(tstate, PyExc_ValueError,
1758 "generation parameter must be less than the number of "
1759 "available generations (%i)",
1760 NUM_GENERATIONS);
1761 goto error;
1762 }
1763
1764 if (generation < 0) {
1765 _PyErr_SetString(tstate, PyExc_ValueError,
1766 "generation parameter cannot be negative");
1767 goto error;
1768 }
1769
1770 if (append_objects(result, GEN_HEAD(gcstate, generation))) {
1771 goto error;
1772 }
1773
1774 return result;
1775 }
1776
1777 /* If generation is not passed or None, get all objects from all generations */
1778 for (i = 0; i < NUM_GENERATIONS; i++) {
1779 if (append_objects(result, GEN_HEAD(gcstate, i))) {
1780 goto error;
1781 }
1782 }
1783 return result;
1784
1785 error:
1786 Py_DECREF(result);
1787 return NULL;
1788 }
1789
1790 /*[clinic input]
1791 gc.get_stats
1792
1793 Return a list of dictionaries containing per-generation statistics.
1794 [clinic start generated code]*/
1795
1796 static PyObject *
gc_get_stats_impl(PyObject * module)1797 gc_get_stats_impl(PyObject *module)
1798 /*[clinic end generated code: output=a8ab1d8a5d26f3ab input=1ef4ed9d17b1a470]*/
1799 {
1800 int i;
1801 struct gc_generation_stats stats[NUM_GENERATIONS], *st;
1802 PyThreadState *tstate = _PyThreadState_GET();
1803
1804 /* To get consistent values despite allocations while constructing
1805 the result list, we use a snapshot of the running stats. */
1806 GCState *gcstate = &tstate->interp->gc;
1807 for (i = 0; i < NUM_GENERATIONS; i++) {
1808 stats[i] = gcstate->generation_stats[i];
1809 }
1810
1811 PyObject *result = PyList_New(0);
1812 if (result == NULL)
1813 return NULL;
1814
1815 for (i = 0; i < NUM_GENERATIONS; i++) {
1816 PyObject *dict;
1817 st = &stats[i];
1818 dict = Py_BuildValue("{snsnsn}",
1819 "collections", st->collections,
1820 "collected", st->collected,
1821 "uncollectable", st->uncollectable
1822 );
1823 if (dict == NULL)
1824 goto error;
1825 if (PyList_Append(result, dict)) {
1826 Py_DECREF(dict);
1827 goto error;
1828 }
1829 Py_DECREF(dict);
1830 }
1831 return result;
1832
1833 error:
1834 Py_XDECREF(result);
1835 return NULL;
1836 }
1837
1838
1839 /*[clinic input]
1840 gc.is_tracked
1841
1842 obj: object
1843 /
1844
1845 Returns true if the object is tracked by the garbage collector.
1846
1847 Simple atomic objects will return false.
1848 [clinic start generated code]*/
1849
1850 static PyObject *
gc_is_tracked(PyObject * module,PyObject * obj)1851 gc_is_tracked(PyObject *module, PyObject *obj)
1852 /*[clinic end generated code: output=14f0103423b28e31 input=d83057f170ea2723]*/
1853 {
1854 PyObject *result;
1855
1856 if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj))
1857 result = Py_True;
1858 else
1859 result = Py_False;
1860 Py_INCREF(result);
1861 return result;
1862 }
1863
1864 /*[clinic input]
1865 gc.is_finalized
1866
1867 obj: object
1868 /
1869
1870 Returns true if the object has been already finalized by the GC.
1871 [clinic start generated code]*/
1872
1873 static PyObject *
gc_is_finalized(PyObject * module,PyObject * obj)1874 gc_is_finalized(PyObject *module, PyObject *obj)
1875 /*[clinic end generated code: output=e1516ac119a918ed input=201d0c58f69ae390]*/
1876 {
1877 if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) {
1878 Py_RETURN_TRUE;
1879 }
1880 Py_RETURN_FALSE;
1881 }
1882
1883 /*[clinic input]
1884 gc.freeze
1885
1886 Freeze all current tracked objects and ignore them for future collections.
1887
1888 This can be used before a POSIX fork() call to make the gc copy-on-write friendly.
1889 Note: collection before a POSIX fork() call may free pages for future allocation
1890 which can cause copy-on-write.
1891 [clinic start generated code]*/
1892
1893 static PyObject *
gc_freeze_impl(PyObject * module)1894 gc_freeze_impl(PyObject *module)
1895 /*[clinic end generated code: output=502159d9cdc4c139 input=b602b16ac5febbe5]*/
1896 {
1897 PyThreadState *tstate = _PyThreadState_GET();
1898 GCState *gcstate = &tstate->interp->gc;
1899 for (int i = 0; i < NUM_GENERATIONS; ++i) {
1900 gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head);
1901 gcstate->generations[i].count = 0;
1902 }
1903 Py_RETURN_NONE;
1904 }
1905
1906 /*[clinic input]
1907 gc.unfreeze
1908
1909 Unfreeze all objects in the permanent generation.
1910
1911 Put all objects in the permanent generation back into oldest generation.
1912 [clinic start generated code]*/
1913
1914 static PyObject *
gc_unfreeze_impl(PyObject * module)1915 gc_unfreeze_impl(PyObject *module)
1916 /*[clinic end generated code: output=1c15f2043b25e169 input=2dd52b170f4cef6c]*/
1917 {
1918 PyThreadState *tstate = _PyThreadState_GET();
1919 GCState *gcstate = &tstate->interp->gc;
1920 gc_list_merge(&gcstate->permanent_generation.head,
1921 GEN_HEAD(gcstate, NUM_GENERATIONS-1));
1922 Py_RETURN_NONE;
1923 }
1924
1925 /*[clinic input]
1926 gc.get_freeze_count -> Py_ssize_t
1927
1928 Return the number of objects in the permanent generation.
1929 [clinic start generated code]*/
1930
1931 static Py_ssize_t
gc_get_freeze_count_impl(PyObject * module)1932 gc_get_freeze_count_impl(PyObject *module)
1933 /*[clinic end generated code: output=61cbd9f43aa032e1 input=45ffbc65cfe2a6ed]*/
1934 {
1935 PyThreadState *tstate = _PyThreadState_GET();
1936 GCState *gcstate = &tstate->interp->gc;
1937 return gc_list_size(&gcstate->permanent_generation.head);
1938 }
1939
1940
1941 PyDoc_STRVAR(gc__doc__,
1942 "This module provides access to the garbage collector for reference cycles.\n"
1943 "\n"
1944 "enable() -- Enable automatic garbage collection.\n"
1945 "disable() -- Disable automatic garbage collection.\n"
1946 "isenabled() -- Returns true if automatic collection is enabled.\n"
1947 "collect() -- Do a full collection right now.\n"
1948 "get_count() -- Return the current collection counts.\n"
1949 "get_stats() -- Return list of dictionaries containing per-generation stats.\n"
1950 "set_debug() -- Set debugging flags.\n"
1951 "get_debug() -- Get debugging flags.\n"
1952 "set_threshold() -- Set the collection thresholds.\n"
1953 "get_threshold() -- Return the current the collection thresholds.\n"
1954 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1955 "is_tracked() -- Returns true if a given object is tracked.\n"
1956 "is_finalized() -- Returns true if a given object has been already finalized.\n"
1957 "get_referrers() -- Return the list of objects that refer to an object.\n"
1958 "get_referents() -- Return the list of objects that an object refers to.\n"
1959 "freeze() -- Freeze all tracked objects and ignore them for future collections.\n"
1960 "unfreeze() -- Unfreeze all objects in the permanent generation.\n"
1961 "get_freeze_count() -- Return the number of objects in the permanent generation.\n");
1962
1963 static PyMethodDef GcMethods[] = {
1964 GC_ENABLE_METHODDEF
1965 GC_DISABLE_METHODDEF
1966 GC_ISENABLED_METHODDEF
1967 GC_SET_DEBUG_METHODDEF
1968 GC_GET_DEBUG_METHODDEF
1969 GC_GET_COUNT_METHODDEF
1970 {"set_threshold", gc_set_threshold, METH_VARARGS, gc_set_thresh__doc__},
1971 GC_GET_THRESHOLD_METHODDEF
1972 GC_COLLECT_METHODDEF
1973 GC_GET_OBJECTS_METHODDEF
1974 GC_GET_STATS_METHODDEF
1975 GC_IS_TRACKED_METHODDEF
1976 GC_IS_FINALIZED_METHODDEF
1977 {"get_referrers", gc_get_referrers, METH_VARARGS,
1978 gc_get_referrers__doc__},
1979 {"get_referents", gc_get_referents, METH_VARARGS,
1980 gc_get_referents__doc__},
1981 GC_FREEZE_METHODDEF
1982 GC_UNFREEZE_METHODDEF
1983 GC_GET_FREEZE_COUNT_METHODDEF
1984 {NULL, NULL} /* Sentinel */
1985 };
1986
1987 static struct PyModuleDef gcmodule = {
1988 PyModuleDef_HEAD_INIT,
1989 "gc", /* m_name */
1990 gc__doc__, /* m_doc */
1991 -1, /* m_size */
1992 GcMethods, /* m_methods */
1993 NULL, /* m_reload */
1994 NULL, /* m_traverse */
1995 NULL, /* m_clear */
1996 NULL /* m_free */
1997 };
1998
1999 PyMODINIT_FUNC
PyInit_gc(void)2000 PyInit_gc(void)
2001 {
2002 PyThreadState *tstate = _PyThreadState_GET();
2003 GCState *gcstate = &tstate->interp->gc;
2004
2005 PyObject *m = PyModule_Create(&gcmodule);
2006
2007 if (m == NULL) {
2008 return NULL;
2009 }
2010
2011 if (gcstate->garbage == NULL) {
2012 gcstate->garbage = PyList_New(0);
2013 if (gcstate->garbage == NULL) {
2014 return NULL;
2015 }
2016 }
2017 Py_INCREF(gcstate->garbage);
2018 if (PyModule_AddObject(m, "garbage", gcstate->garbage) < 0) {
2019 return NULL;
2020 }
2021
2022 if (gcstate->callbacks == NULL) {
2023 gcstate->callbacks = PyList_New(0);
2024 if (gcstate->callbacks == NULL) {
2025 return NULL;
2026 }
2027 }
2028 Py_INCREF(gcstate->callbacks);
2029 if (PyModule_AddObject(m, "callbacks", gcstate->callbacks) < 0) {
2030 return NULL;
2031 }
2032
2033 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) { return NULL; }
2034 ADD_INT(DEBUG_STATS);
2035 ADD_INT(DEBUG_COLLECTABLE);
2036 ADD_INT(DEBUG_UNCOLLECTABLE);
2037 ADD_INT(DEBUG_SAVEALL);
2038 ADD_INT(DEBUG_LEAK);
2039 #undef ADD_INT
2040 return m;
2041 }
2042
2043 /* API to invoke gc.collect() from C */
2044 Py_ssize_t
PyGC_Collect(void)2045 PyGC_Collect(void)
2046 {
2047 PyThreadState *tstate = _PyThreadState_GET();
2048 GCState *gcstate = &tstate->interp->gc;
2049
2050 if (!gcstate->enabled) {
2051 return 0;
2052 }
2053
2054 Py_ssize_t n;
2055 if (gcstate->collecting) {
2056 /* already collecting, don't do anything */
2057 n = 0;
2058 }
2059 else {
2060 PyObject *exc, *value, *tb;
2061 gcstate->collecting = 1;
2062 _PyErr_Fetch(tstate, &exc, &value, &tb);
2063 n = collect_with_callback(tstate, NUM_GENERATIONS - 1);
2064 _PyErr_Restore(tstate, exc, value, tb);
2065 gcstate->collecting = 0;
2066 }
2067
2068 return n;
2069 }
2070
2071 Py_ssize_t
_PyGC_CollectIfEnabled(void)2072 _PyGC_CollectIfEnabled(void)
2073 {
2074 return PyGC_Collect();
2075 }
2076
2077 Py_ssize_t
_PyGC_CollectNoFail(void)2078 _PyGC_CollectNoFail(void)
2079 {
2080 PyThreadState *tstate = _PyThreadState_GET();
2081 assert(!_PyErr_Occurred(tstate));
2082
2083 GCState *gcstate = &tstate->interp->gc;
2084 Py_ssize_t n;
2085
2086 /* Ideally, this function is only called on interpreter shutdown,
2087 and therefore not recursively. Unfortunately, when there are daemon
2088 threads, a daemon thread can start a cyclic garbage collection
2089 during interpreter shutdown (and then never finish it).
2090 See http://bugs.python.org/issue8713#msg195178 for an example.
2091 */
2092 if (gcstate->collecting) {
2093 n = 0;
2094 }
2095 else {
2096 gcstate->collecting = 1;
2097 n = collect(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1);
2098 gcstate->collecting = 0;
2099 }
2100 return n;
2101 }
2102
2103 void
_PyGC_DumpShutdownStats(PyThreadState * tstate)2104 _PyGC_DumpShutdownStats(PyThreadState *tstate)
2105 {
2106 GCState *gcstate = &tstate->interp->gc;
2107 if (!(gcstate->debug & DEBUG_SAVEALL)
2108 && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
2109 const char *message;
2110 if (gcstate->debug & DEBUG_UNCOLLECTABLE)
2111 message = "gc: %zd uncollectable objects at " \
2112 "shutdown";
2113 else
2114 message = "gc: %zd uncollectable objects at " \
2115 "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
2116 /* PyErr_WarnFormat does too many things and we are at shutdown,
2117 the warnings module's dependencies (e.g. linecache) may be gone
2118 already. */
2119 if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2120 "gc", NULL, message,
2121 PyList_GET_SIZE(gcstate->garbage)))
2122 PyErr_WriteUnraisable(NULL);
2123 if (gcstate->debug & DEBUG_UNCOLLECTABLE) {
2124 PyObject *repr = NULL, *bytes = NULL;
2125 repr = PyObject_Repr(gcstate->garbage);
2126 if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
2127 PyErr_WriteUnraisable(gcstate->garbage);
2128 else {
2129 PySys_WriteStderr(
2130 " %s\n",
2131 PyBytes_AS_STRING(bytes)
2132 );
2133 }
2134 Py_XDECREF(repr);
2135 Py_XDECREF(bytes);
2136 }
2137 }
2138 }
2139
2140 void
_PyGC_Fini(PyThreadState * tstate)2141 _PyGC_Fini(PyThreadState *tstate)
2142 {
2143 GCState *gcstate = &tstate->interp->gc;
2144 Py_CLEAR(gcstate->garbage);
2145 Py_CLEAR(gcstate->callbacks);
2146 }
2147
2148 /* for debugging */
2149 void
_PyGC_Dump(PyGC_Head * g)2150 _PyGC_Dump(PyGC_Head *g)
2151 {
2152 _PyObject_Dump(FROM_GC(g));
2153 }
2154
2155
2156 #ifdef Py_DEBUG
2157 static int
visit_validate(PyObject * op,void * parent_raw)2158 visit_validate(PyObject *op, void *parent_raw)
2159 {
2160 PyObject *parent = _PyObject_CAST(parent_raw);
2161 if (_PyObject_IsFreed(op)) {
2162 _PyObject_ASSERT_FAILED_MSG(parent,
2163 "PyObject_GC_Track() object is not valid");
2164 }
2165 return 0;
2166 }
2167 #endif
2168
2169
2170 /* extension modules might be compiled with GC support so these
2171 functions must always be available */
2172
2173 void
PyObject_GC_Track(void * op_raw)2174 PyObject_GC_Track(void *op_raw)
2175 {
2176 PyObject *op = _PyObject_CAST(op_raw);
2177 if (_PyObject_GC_IS_TRACKED(op)) {
2178 _PyObject_ASSERT_FAILED_MSG(op,
2179 "object already tracked "
2180 "by the garbage collector");
2181 }
2182 _PyObject_GC_TRACK(op);
2183
2184 #ifdef Py_DEBUG
2185 /* Check that the object is valid: validate objects traversed
2186 by tp_traverse() */
2187 traverseproc traverse = Py_TYPE(op)->tp_traverse;
2188 (void)traverse(op, visit_validate, op);
2189 #endif
2190 }
2191
2192 void
PyObject_GC_UnTrack(void * op_raw)2193 PyObject_GC_UnTrack(void *op_raw)
2194 {
2195 PyObject *op = _PyObject_CAST(op_raw);
2196 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to
2197 * call PyObject_GC_UnTrack twice on an object.
2198 */
2199 if (_PyObject_GC_IS_TRACKED(op)) {
2200 _PyObject_GC_UNTRACK(op);
2201 }
2202 }
2203
2204 int
PyObject_IS_GC(PyObject * obj)2205 PyObject_IS_GC(PyObject *obj)
2206 {
2207 return _PyObject_IS_GC(obj);
2208 }
2209
2210 static PyObject *
_PyObject_GC_Alloc(int use_calloc,size_t basicsize)2211 _PyObject_GC_Alloc(int use_calloc, size_t basicsize)
2212 {
2213 PyThreadState *tstate = _PyThreadState_GET();
2214 GCState *gcstate = &tstate->interp->gc;
2215 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) {
2216 return _PyErr_NoMemory(tstate);
2217 }
2218 size_t size = sizeof(PyGC_Head) + basicsize;
2219
2220 PyGC_Head *g;
2221 if (use_calloc) {
2222 g = (PyGC_Head *)PyObject_Calloc(1, size);
2223 }
2224 else {
2225 g = (PyGC_Head *)PyObject_Malloc(size);
2226 }
2227 if (g == NULL) {
2228 return _PyErr_NoMemory(tstate);
2229 }
2230 assert(((uintptr_t)g & 3) == 0); // g must be aligned 4bytes boundary
2231
2232 g->_gc_next = 0;
2233 g->_gc_prev = 0;
2234 gcstate->generations[0].count++; /* number of allocated GC objects */
2235 if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
2236 gcstate->enabled &&
2237 gcstate->generations[0].threshold &&
2238 !gcstate->collecting &&
2239 !_PyErr_Occurred(tstate))
2240 {
2241 gcstate->collecting = 1;
2242 collect_generations(tstate);
2243 gcstate->collecting = 0;
2244 }
2245 PyObject *op = FROM_GC(g);
2246 return op;
2247 }
2248
2249 PyObject *
_PyObject_GC_Malloc(size_t basicsize)2250 _PyObject_GC_Malloc(size_t basicsize)
2251 {
2252 return _PyObject_GC_Alloc(0, basicsize);
2253 }
2254
2255 PyObject *
_PyObject_GC_Calloc(size_t basicsize)2256 _PyObject_GC_Calloc(size_t basicsize)
2257 {
2258 return _PyObject_GC_Alloc(1, basicsize);
2259 }
2260
2261 PyObject *
_PyObject_GC_New(PyTypeObject * tp)2262 _PyObject_GC_New(PyTypeObject *tp)
2263 {
2264 PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp));
2265 if (op != NULL)
2266 op = PyObject_INIT(op, tp);
2267 return op;
2268 }
2269
2270 PyVarObject *
_PyObject_GC_NewVar(PyTypeObject * tp,Py_ssize_t nitems)2271 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2272 {
2273 size_t size;
2274 PyVarObject *op;
2275
2276 if (nitems < 0) {
2277 PyErr_BadInternalCall();
2278 return NULL;
2279 }
2280 size = _PyObject_VAR_SIZE(tp, nitems);
2281 op = (PyVarObject *) _PyObject_GC_Malloc(size);
2282 if (op != NULL)
2283 op = PyObject_INIT_VAR(op, tp, nitems);
2284 return op;
2285 }
2286
2287 PyVarObject *
_PyObject_GC_Resize(PyVarObject * op,Py_ssize_t nitems)2288 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2289 {
2290 const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2291 _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2292 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) {
2293 return (PyVarObject *)PyErr_NoMemory();
2294 }
2295
2296 PyGC_Head *g = AS_GC(op);
2297 g = (PyGC_Head *)PyObject_REALLOC(g, sizeof(PyGC_Head) + basicsize);
2298 if (g == NULL)
2299 return (PyVarObject *)PyErr_NoMemory();
2300 op = (PyVarObject *) FROM_GC(g);
2301 Py_SET_SIZE(op, nitems);
2302 return op;
2303 }
2304
2305 void
PyObject_GC_Del(void * op)2306 PyObject_GC_Del(void *op)
2307 {
2308 PyGC_Head *g = AS_GC(op);
2309 if (_PyObject_GC_IS_TRACKED(op)) {
2310 gc_list_remove(g);
2311 }
2312 PyThreadState *tstate = _PyThreadState_GET();
2313 GCState *gcstate = &tstate->interp->gc;
2314 if (gcstate->generations[0].count > 0) {
2315 gcstate->generations[0].count--;
2316 }
2317 PyObject_FREE(g);
2318 }
2319
2320 int
PyObject_GC_IsTracked(PyObject * obj)2321 PyObject_GC_IsTracked(PyObject* obj)
2322 {
2323 if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
2324 return 1;
2325 }
2326 return 0;
2327 }
2328
2329 int
PyObject_GC_IsFinalized(PyObject * obj)2330 PyObject_GC_IsFinalized(PyObject *obj)
2331 {
2332 if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) {
2333 return 1;
2334 }
2335 return 0;
2336 }
2337