1 /*
2
3 Reference Cycle Garbage Collection
4 ==================================
5
6 Neil Schemenauer <nas@arctrix.com>
7
8 Based on a post on the python-dev list. Ideas from Guido van Rossum,
9 Eric Tiedemann, and various others.
10
11 http://www.arctrix.com/nas/python/gc/
12
13 The following mailing list threads provide a historical perspective on
14 the design of this module. Note that a fair amount of refinement has
15 occurred since those discussions.
16
17 http://mail.python.org/pipermail/python-dev/2000-March/002385.html
18 http://mail.python.org/pipermail/python-dev/2000-March/002434.html
19 http://mail.python.org/pipermail/python-dev/2000-March/002497.html
20
21 For a highlevel view of the collection process, read the collect
22 function.
23
24 */
25
26 #include "Python.h"
27 #include "pycore_context.h"
28 #include "pycore_initconfig.h"
29 #include "pycore_interp.h" // PyInterpreterState.gc
30 #include "pycore_object.h"
31 #include "pycore_pyerrors.h"
32 #include "pycore_pystate.h" // _PyThreadState_GET()
33 #include "pydtrace.h"
34
35 typedef struct _gc_runtime_state GCState;
36
37 /*[clinic input]
38 module gc
39 [clinic start generated code]*/
40 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/
41
42
43 #ifdef Py_DEBUG
44 # define GC_DEBUG
45 #endif
46
47 #define GC_NEXT _PyGCHead_NEXT
48 #define GC_PREV _PyGCHead_PREV
49
50 // update_refs() set this bit for all objects in current generation.
51 // subtract_refs() and move_unreachable() uses this to distinguish
52 // visited object is in GCing or not.
53 //
54 // move_unreachable() removes this flag from reachable objects.
55 // Only unreachable objects have this flag.
56 //
57 // No objects in interpreter have this flag after GC ends.
58 #define PREV_MASK_COLLECTING _PyGC_PREV_MASK_COLLECTING
59
60 // Lowest bit of _gc_next is used for UNREACHABLE flag.
61 //
62 // This flag represents the object is in unreachable list in move_unreachable()
63 //
64 // Although this flag is used only in move_unreachable(), move_unreachable()
65 // doesn't clear this flag to skip unnecessary iteration.
66 // move_legacy_finalizers() removes this flag instead.
67 // Between them, unreachable list is not normal list and we can not use
68 // most gc_list_* functions for it.
69 #define NEXT_MASK_UNREACHABLE (1)
70
71 /* Get an object's GC head */
72 #define AS_GC(o) ((PyGC_Head *)(o)-1)
73
74 /* Get the object given the GC head */
75 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
76
77 static inline int
gc_is_collecting(PyGC_Head * g)78 gc_is_collecting(PyGC_Head *g)
79 {
80 return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
81 }
82
83 static inline void
gc_clear_collecting(PyGC_Head * g)84 gc_clear_collecting(PyGC_Head *g)
85 {
86 g->_gc_prev &= ~PREV_MASK_COLLECTING;
87 }
88
89 static inline Py_ssize_t
gc_get_refs(PyGC_Head * g)90 gc_get_refs(PyGC_Head *g)
91 {
92 return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
93 }
94
95 static inline void
gc_set_refs(PyGC_Head * g,Py_ssize_t refs)96 gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
97 {
98 g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
99 | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
100 }
101
102 static inline void
gc_reset_refs(PyGC_Head * g,Py_ssize_t refs)103 gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
104 {
105 g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
106 | PREV_MASK_COLLECTING
107 | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
108 }
109
110 static inline void
gc_decref(PyGC_Head * g)111 gc_decref(PyGC_Head *g)
112 {
113 _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
114 gc_get_refs(g) > 0,
115 "refcount is too small");
116 g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
117 }
118
119 /* set for debugging information */
120 #define DEBUG_STATS (1<<0) /* print collection statistics */
121 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
122 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */
123 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */
124 #define DEBUG_LEAK DEBUG_COLLECTABLE | \
125 DEBUG_UNCOLLECTABLE | \
126 DEBUG_SAVEALL
127
128 #define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head)
129
130
131 static GCState *
get_gc_state(void)132 get_gc_state(void)
133 {
134 PyInterpreterState *interp = _PyInterpreterState_GET();
135 return &interp->gc;
136 }
137
138
139 void
_PyGC_InitState(GCState * gcstate)140 _PyGC_InitState(GCState *gcstate)
141 {
142 gcstate->enabled = 1; /* automatic collection enabled? */
143
144 #define _GEN_HEAD(n) GEN_HEAD(gcstate, n)
145 struct gc_generation generations[NUM_GENERATIONS] = {
146 /* PyGC_Head, threshold, count */
147 {{(uintptr_t)_GEN_HEAD(0), (uintptr_t)_GEN_HEAD(0)}, 700, 0},
148 {{(uintptr_t)_GEN_HEAD(1), (uintptr_t)_GEN_HEAD(1)}, 10, 0},
149 {{(uintptr_t)_GEN_HEAD(2), (uintptr_t)_GEN_HEAD(2)}, 10, 0},
150 };
151 for (int i = 0; i < NUM_GENERATIONS; i++) {
152 gcstate->generations[i] = generations[i];
153 };
154 gcstate->generation0 = GEN_HEAD(gcstate, 0);
155 struct gc_generation permanent_generation = {
156 {(uintptr_t)&gcstate->permanent_generation.head,
157 (uintptr_t)&gcstate->permanent_generation.head}, 0, 0
158 };
159 gcstate->permanent_generation = permanent_generation;
160 }
161
162
163 PyStatus
_PyGC_Init(PyInterpreterState * interp)164 _PyGC_Init(PyInterpreterState *interp)
165 {
166 GCState *gcstate = &interp->gc;
167
168 gcstate->garbage = PyList_New(0);
169 if (gcstate->garbage == NULL) {
170 return _PyStatus_NO_MEMORY();
171 }
172
173 gcstate->callbacks = PyList_New(0);
174 if (gcstate->callbacks == NULL) {
175 return _PyStatus_NO_MEMORY();
176 }
177
178 return _PyStatus_OK();
179 }
180
181
182 /*
183 _gc_prev values
184 ---------------
185
186 Between collections, _gc_prev is used for doubly linked list.
187
188 Lowest two bits of _gc_prev are used for flags.
189 PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
190 or _PyObject_GC_UNTRACK() is called.
191
192 During a collection, _gc_prev is temporary used for gc_refs, and the gc list
193 is singly linked until _gc_prev is restored.
194
195 gc_refs
196 At the start of a collection, update_refs() copies the true refcount
197 to gc_refs, for each object in the generation being collected.
198 subtract_refs() then adjusts gc_refs so that it equals the number of
199 times an object is referenced directly from outside the generation
200 being collected.
201
202 PREV_MASK_COLLECTING
203 Objects in generation being collected are marked PREV_MASK_COLLECTING in
204 update_refs().
205
206
207 _gc_next values
208 ---------------
209
210 _gc_next takes these values:
211
212 0
213 The object is not tracked
214
215 != 0
216 Pointer to the next object in the GC list.
217 Additionally, lowest bit is used temporary for
218 NEXT_MASK_UNREACHABLE flag described below.
219
220 NEXT_MASK_UNREACHABLE
221 move_unreachable() then moves objects not reachable (whether directly or
222 indirectly) from outside the generation into an "unreachable" set and
223 set this flag.
224
225 Objects that are found to be reachable have gc_refs set to 1.
226 When this flag is set for the reachable object, the object must be in
227 "unreachable" set.
228 The flag is unset and the object is moved back to "reachable" set.
229
230 move_legacy_finalizers() will remove this flag from "unreachable" set.
231 */
232
233 /*** list functions ***/
234
235 static inline void
gc_list_init(PyGC_Head * list)236 gc_list_init(PyGC_Head *list)
237 {
238 // List header must not have flags.
239 // We can assign pointer by simple cast.
240 list->_gc_prev = (uintptr_t)list;
241 list->_gc_next = (uintptr_t)list;
242 }
243
244 static inline int
gc_list_is_empty(PyGC_Head * list)245 gc_list_is_empty(PyGC_Head *list)
246 {
247 return (list->_gc_next == (uintptr_t)list);
248 }
249
250 /* Append `node` to `list`. */
251 static inline void
gc_list_append(PyGC_Head * node,PyGC_Head * list)252 gc_list_append(PyGC_Head *node, PyGC_Head *list)
253 {
254 PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
255
256 // last <-> node
257 _PyGCHead_SET_PREV(node, last);
258 _PyGCHead_SET_NEXT(last, node);
259
260 // node <-> list
261 _PyGCHead_SET_NEXT(node, list);
262 list->_gc_prev = (uintptr_t)node;
263 }
264
265 /* Remove `node` from the gc list it's currently in. */
266 static inline void
gc_list_remove(PyGC_Head * node)267 gc_list_remove(PyGC_Head *node)
268 {
269 PyGC_Head *prev = GC_PREV(node);
270 PyGC_Head *next = GC_NEXT(node);
271
272 _PyGCHead_SET_NEXT(prev, next);
273 _PyGCHead_SET_PREV(next, prev);
274
275 node->_gc_next = 0; /* object is not currently tracked */
276 }
277
278 /* Move `node` from the gc list it's currently in (which is not explicitly
279 * named here) to the end of `list`. This is semantically the same as
280 * gc_list_remove(node) followed by gc_list_append(node, list).
281 */
282 static void
gc_list_move(PyGC_Head * node,PyGC_Head * list)283 gc_list_move(PyGC_Head *node, PyGC_Head *list)
284 {
285 /* Unlink from current list. */
286 PyGC_Head *from_prev = GC_PREV(node);
287 PyGC_Head *from_next = GC_NEXT(node);
288 _PyGCHead_SET_NEXT(from_prev, from_next);
289 _PyGCHead_SET_PREV(from_next, from_prev);
290
291 /* Relink at end of new list. */
292 // list must not have flags. So we can skip macros.
293 PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
294 _PyGCHead_SET_PREV(node, to_prev);
295 _PyGCHead_SET_NEXT(to_prev, node);
296 list->_gc_prev = (uintptr_t)node;
297 _PyGCHead_SET_NEXT(node, list);
298 }
299
300 /* append list `from` onto list `to`; `from` becomes an empty list */
301 static void
gc_list_merge(PyGC_Head * from,PyGC_Head * to)302 gc_list_merge(PyGC_Head *from, PyGC_Head *to)
303 {
304 assert(from != to);
305 if (!gc_list_is_empty(from)) {
306 PyGC_Head *to_tail = GC_PREV(to);
307 PyGC_Head *from_head = GC_NEXT(from);
308 PyGC_Head *from_tail = GC_PREV(from);
309 assert(from_head != from);
310 assert(from_tail != from);
311
312 _PyGCHead_SET_NEXT(to_tail, from_head);
313 _PyGCHead_SET_PREV(from_head, to_tail);
314
315 _PyGCHead_SET_NEXT(from_tail, to);
316 _PyGCHead_SET_PREV(to, from_tail);
317 }
318 gc_list_init(from);
319 }
320
321 static Py_ssize_t
gc_list_size(PyGC_Head * list)322 gc_list_size(PyGC_Head *list)
323 {
324 PyGC_Head *gc;
325 Py_ssize_t n = 0;
326 for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
327 n++;
328 }
329 return n;
330 }
331
332 /* Walk the list and mark all objects as non-collecting */
333 static inline void
gc_list_clear_collecting(PyGC_Head * collectable)334 gc_list_clear_collecting(PyGC_Head *collectable)
335 {
336 PyGC_Head *gc;
337 for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
338 gc_clear_collecting(gc);
339 }
340 }
341
342 /* Append objects in a GC list to a Python list.
343 * Return 0 if all OK, < 0 if error (out of memory for list)
344 */
345 static int
append_objects(PyObject * py_list,PyGC_Head * gc_list)346 append_objects(PyObject *py_list, PyGC_Head *gc_list)
347 {
348 PyGC_Head *gc;
349 for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
350 PyObject *op = FROM_GC(gc);
351 if (op != py_list) {
352 if (PyList_Append(py_list, op)) {
353 return -1; /* exception */
354 }
355 }
356 }
357 return 0;
358 }
359
360 // Constants for validate_list's flags argument.
361 enum flagstates {collecting_clear_unreachable_clear,
362 collecting_clear_unreachable_set,
363 collecting_set_unreachable_clear,
364 collecting_set_unreachable_set};
365
366 #ifdef GC_DEBUG
367 // validate_list checks list consistency. And it works as document
368 // describing when flags are expected to be set / unset.
369 // `head` must be a doubly-linked gc list, although it's fine (expected!) if
370 // the prev and next pointers are "polluted" with flags.
371 // What's checked:
372 // - The `head` pointers are not polluted.
373 // - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
374 // `set or clear, as specified by the 'flags' argument.
375 // - The prev and next pointers are mutually consistent.
376 static void
validate_list(PyGC_Head * head,enum flagstates flags)377 validate_list(PyGC_Head *head, enum flagstates flags)
378 {
379 assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0);
380 assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
381 uintptr_t prev_value = 0, next_value = 0;
382 switch (flags) {
383 case collecting_clear_unreachable_clear:
384 break;
385 case collecting_set_unreachable_clear:
386 prev_value = PREV_MASK_COLLECTING;
387 break;
388 case collecting_clear_unreachable_set:
389 next_value = NEXT_MASK_UNREACHABLE;
390 break;
391 case collecting_set_unreachable_set:
392 prev_value = PREV_MASK_COLLECTING;
393 next_value = NEXT_MASK_UNREACHABLE;
394 break;
395 default:
396 assert(! "bad internal flags argument");
397 }
398 PyGC_Head *prev = head;
399 PyGC_Head *gc = GC_NEXT(head);
400 while (gc != head) {
401 PyGC_Head *trueprev = GC_PREV(gc);
402 PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
403 assert(truenext != NULL);
404 assert(trueprev == prev);
405 assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
406 assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
407 prev = gc;
408 gc = truenext;
409 }
410 assert(prev == GC_PREV(head));
411 }
412 #else
413 #define validate_list(x, y) do{}while(0)
414 #endif
415
416 /*** end of list stuff ***/
417
418
419 /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and
420 * PREV_MASK_COLLECTING bit is set for all objects in containers.
421 */
422 static void
update_refs(PyGC_Head * containers)423 update_refs(PyGC_Head *containers)
424 {
425 PyGC_Head *gc = GC_NEXT(containers);
426 for (; gc != containers; gc = GC_NEXT(gc)) {
427 gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc)));
428 /* Python's cyclic gc should never see an incoming refcount
429 * of 0: if something decref'ed to 0, it should have been
430 * deallocated immediately at that time.
431 * Possible cause (if the assert triggers): a tp_dealloc
432 * routine left a gc-aware object tracked during its teardown
433 * phase, and did something-- or allowed something to happen --
434 * that called back into Python. gc can trigger then, and may
435 * see the still-tracked dying object. Before this assert
436 * was added, such mistakes went on to allow gc to try to
437 * delete the object again. In a debug build, that caused
438 * a mysterious segfault, when _Py_ForgetReference tried
439 * to remove the object from the doubly-linked list of all
440 * objects a second time. In a release build, an actual
441 * double deallocation occurred, which leads to corruption
442 * of the allocator's internal bookkeeping pointers. That's
443 * so serious that maybe this should be a release-build
444 * check instead of an assert?
445 */
446 _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0);
447 }
448 }
449
450 /* A traversal callback for subtract_refs. */
451 static int
visit_decref(PyObject * op,void * parent)452 visit_decref(PyObject *op, void *parent)
453 {
454 _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op));
455
456 if (_PyObject_IS_GC(op)) {
457 PyGC_Head *gc = AS_GC(op);
458 /* We're only interested in gc_refs for objects in the
459 * generation being collected, which can be recognized
460 * because only they have positive gc_refs.
461 */
462 if (gc_is_collecting(gc)) {
463 gc_decref(gc);
464 }
465 }
466 return 0;
467 }
468
469 /* Subtract internal references from gc_refs. After this, gc_refs is >= 0
470 * for all objects in containers, and is GC_REACHABLE for all tracked gc
471 * objects not in containers. The ones with gc_refs > 0 are directly
472 * reachable from outside containers, and so can't be collected.
473 */
474 static void
subtract_refs(PyGC_Head * containers)475 subtract_refs(PyGC_Head *containers)
476 {
477 traverseproc traverse;
478 PyGC_Head *gc = GC_NEXT(containers);
479 for (; gc != containers; gc = GC_NEXT(gc)) {
480 PyObject *op = FROM_GC(gc);
481 traverse = Py_TYPE(op)->tp_traverse;
482 (void) traverse(FROM_GC(gc),
483 (visitproc)visit_decref,
484 op);
485 }
486 }
487
488 /* A traversal callback for move_unreachable. */
489 static int
visit_reachable(PyObject * op,PyGC_Head * reachable)490 visit_reachable(PyObject *op, PyGC_Head *reachable)
491 {
492 if (!_PyObject_IS_GC(op)) {
493 return 0;
494 }
495
496 PyGC_Head *gc = AS_GC(op);
497 const Py_ssize_t gc_refs = gc_get_refs(gc);
498
499 // Ignore objects in other generation.
500 // This also skips objects "to the left" of the current position in
501 // move_unreachable's scan of the 'young' list - they've already been
502 // traversed, and no longer have the PREV_MASK_COLLECTING flag.
503 if (! gc_is_collecting(gc)) {
504 return 0;
505 }
506 // It would be a logic error elsewhere if the collecting flag were set on
507 // an untracked object.
508 assert(gc->_gc_next != 0);
509
510 if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
511 /* This had gc_refs = 0 when move_unreachable got
512 * to it, but turns out it's reachable after all.
513 * Move it back to move_unreachable's 'young' list,
514 * and move_unreachable will eventually get to it
515 * again.
516 */
517 // Manually unlink gc from unreachable list because the list functions
518 // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
519 PyGC_Head *prev = GC_PREV(gc);
520 PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
521 _PyObject_ASSERT(FROM_GC(prev),
522 prev->_gc_next & NEXT_MASK_UNREACHABLE);
523 _PyObject_ASSERT(FROM_GC(next),
524 next->_gc_next & NEXT_MASK_UNREACHABLE);
525 prev->_gc_next = gc->_gc_next; // copy NEXT_MASK_UNREACHABLE
526 _PyGCHead_SET_PREV(next, prev);
527
528 gc_list_append(gc, reachable);
529 gc_set_refs(gc, 1);
530 }
531 else if (gc_refs == 0) {
532 /* This is in move_unreachable's 'young' list, but
533 * the traversal hasn't yet gotten to it. All
534 * we need to do is tell move_unreachable that it's
535 * reachable.
536 */
537 gc_set_refs(gc, 1);
538 }
539 /* Else there's nothing to do.
540 * If gc_refs > 0, it must be in move_unreachable's 'young'
541 * list, and move_unreachable will eventually get to it.
542 */
543 else {
544 _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
545 }
546 return 0;
547 }
548
549 /* Move the unreachable objects from young to unreachable. After this,
550 * all objects in young don't have PREV_MASK_COLLECTING flag and
551 * unreachable have the flag.
552 * All objects in young after this are directly or indirectly reachable
553 * from outside the original young; and all objects in unreachable are
554 * not.
555 *
556 * This function restores _gc_prev pointer. young and unreachable are
557 * doubly linked list after this function.
558 * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
559 * So we can not gc_list_* functions for unreachable until we remove the flag.
560 */
561 static void
move_unreachable(PyGC_Head * young,PyGC_Head * unreachable)562 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
563 {
564 // previous elem in the young list, used for restore gc_prev.
565 PyGC_Head *prev = young;
566 PyGC_Head *gc = GC_NEXT(young);
567
568 /* Invariants: all objects "to the left" of us in young are reachable
569 * (directly or indirectly) from outside the young list as it was at entry.
570 *
571 * All other objects from the original young "to the left" of us are in
572 * unreachable now, and have NEXT_MASK_UNREACHABLE. All objects to the
573 * left of us in 'young' now have been scanned, and no objects here
574 * or to the right have been scanned yet.
575 */
576
577 while (gc != young) {
578 if (gc_get_refs(gc)) {
579 /* gc is definitely reachable from outside the
580 * original 'young'. Mark it as such, and traverse
581 * its pointers to find any other objects that may
582 * be directly reachable from it. Note that the
583 * call to tp_traverse may append objects to young,
584 * so we have to wait until it returns to determine
585 * the next object to visit.
586 */
587 PyObject *op = FROM_GC(gc);
588 traverseproc traverse = Py_TYPE(op)->tp_traverse;
589 _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
590 "refcount is too small");
591 // NOTE: visit_reachable may change gc->_gc_next when
592 // young->_gc_prev == gc. Don't do gc = GC_NEXT(gc) before!
593 (void) traverse(op,
594 (visitproc)visit_reachable,
595 (void *)young);
596 // relink gc_prev to prev element.
597 _PyGCHead_SET_PREV(gc, prev);
598 // gc is not COLLECTING state after here.
599 gc_clear_collecting(gc);
600 prev = gc;
601 }
602 else {
603 /* This *may* be unreachable. To make progress,
604 * assume it is. gc isn't directly reachable from
605 * any object we've already traversed, but may be
606 * reachable from an object we haven't gotten to yet.
607 * visit_reachable will eventually move gc back into
608 * young if that's so, and we'll see it again.
609 */
610 // Move gc to unreachable.
611 // No need to gc->next->prev = prev because it is single linked.
612 prev->_gc_next = gc->_gc_next;
613
614 // We can't use gc_list_append() here because we use
615 // NEXT_MASK_UNREACHABLE here.
616 PyGC_Head *last = GC_PREV(unreachable);
617 // NOTE: Since all objects in unreachable set has
618 // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
619 // But this may pollute the unreachable list head's 'next' pointer
620 // too. That's semantically senseless but expedient here - the
621 // damage is repaired when this function ends.
622 last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
623 _PyGCHead_SET_PREV(gc, last);
624 gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
625 unreachable->_gc_prev = (uintptr_t)gc;
626 }
627 gc = (PyGC_Head*)prev->_gc_next;
628 }
629 // young->_gc_prev must be last element remained in the list.
630 young->_gc_prev = (uintptr_t)prev;
631 // don't let the pollution of the list head's next pointer leak
632 unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE;
633 }
634
635 static void
untrack_tuples(PyGC_Head * head)636 untrack_tuples(PyGC_Head *head)
637 {
638 PyGC_Head *next, *gc = GC_NEXT(head);
639 while (gc != head) {
640 PyObject *op = FROM_GC(gc);
641 next = GC_NEXT(gc);
642 if (PyTuple_CheckExact(op)) {
643 _PyTuple_MaybeUntrack(op);
644 }
645 gc = next;
646 }
647 }
648
649 /* Try to untrack all currently tracked dictionaries */
650 static void
untrack_dicts(PyGC_Head * head)651 untrack_dicts(PyGC_Head *head)
652 {
653 PyGC_Head *next, *gc = GC_NEXT(head);
654 while (gc != head) {
655 PyObject *op = FROM_GC(gc);
656 next = GC_NEXT(gc);
657 if (PyDict_CheckExact(op)) {
658 _PyDict_MaybeUntrack(op);
659 }
660 gc = next;
661 }
662 }
663
664 /* Return true if object has a pre-PEP 442 finalization method. */
665 static int
has_legacy_finalizer(PyObject * op)666 has_legacy_finalizer(PyObject *op)
667 {
668 return Py_TYPE(op)->tp_del != NULL;
669 }
670
671 /* Move the objects in unreachable with tp_del slots into `finalizers`.
672 *
673 * This function also removes NEXT_MASK_UNREACHABLE flag
674 * from _gc_next in unreachable.
675 */
676 static void
move_legacy_finalizers(PyGC_Head * unreachable,PyGC_Head * finalizers)677 move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
678 {
679 PyGC_Head *gc, *next;
680 assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
681
682 /* March over unreachable. Move objects with finalizers into
683 * `finalizers`.
684 */
685 for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
686 PyObject *op = FROM_GC(gc);
687
688 _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
689 gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
690 next = (PyGC_Head*)gc->_gc_next;
691
692 if (has_legacy_finalizer(op)) {
693 gc_clear_collecting(gc);
694 gc_list_move(gc, finalizers);
695 }
696 }
697 }
698
699 static inline void
clear_unreachable_mask(PyGC_Head * unreachable)700 clear_unreachable_mask(PyGC_Head *unreachable)
701 {
702 /* Check that the list head does not have the unreachable bit set */
703 assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0);
704
705 PyGC_Head *gc, *next;
706 assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
707 for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
708 _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
709 gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
710 next = (PyGC_Head*)gc->_gc_next;
711 }
712 validate_list(unreachable, collecting_set_unreachable_clear);
713 }
714
715 /* A traversal callback for move_legacy_finalizer_reachable. */
716 static int
visit_move(PyObject * op,PyGC_Head * tolist)717 visit_move(PyObject *op, PyGC_Head *tolist)
718 {
719 if (_PyObject_IS_GC(op)) {
720 PyGC_Head *gc = AS_GC(op);
721 if (gc_is_collecting(gc)) {
722 gc_list_move(gc, tolist);
723 gc_clear_collecting(gc);
724 }
725 }
726 return 0;
727 }
728
729 /* Move objects that are reachable from finalizers, from the unreachable set
730 * into finalizers set.
731 */
732 static void
move_legacy_finalizer_reachable(PyGC_Head * finalizers)733 move_legacy_finalizer_reachable(PyGC_Head *finalizers)
734 {
735 traverseproc traverse;
736 PyGC_Head *gc = GC_NEXT(finalizers);
737 for (; gc != finalizers; gc = GC_NEXT(gc)) {
738 /* Note that the finalizers list may grow during this. */
739 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
740 (void) traverse(FROM_GC(gc),
741 (visitproc)visit_move,
742 (void *)finalizers);
743 }
744 }
745
746 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
747 * callback, invoke it if necessary. Note that it's possible for such
748 * weakrefs to be outside the unreachable set -- indeed, those are precisely
749 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for
750 * overview & some details. Some weakrefs with callbacks may be reclaimed
751 * directly by this routine; the number reclaimed is the return value. Other
752 * weakrefs with callbacks may be moved into the `old` generation. Objects
753 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
754 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns,
755 * no object in `unreachable` is weakly referenced anymore.
756 */
757 static int
handle_weakrefs(PyGC_Head * unreachable,PyGC_Head * old)758 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
759 {
760 PyGC_Head *gc;
761 PyObject *op; /* generally FROM_GC(gc) */
762 PyWeakReference *wr; /* generally a cast of op */
763 PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */
764 PyGC_Head *next;
765 int num_freed = 0;
766
767 gc_list_init(&wrcb_to_call);
768
769 /* Clear all weakrefs to the objects in unreachable. If such a weakref
770 * also has a callback, move it into `wrcb_to_call` if the callback
771 * needs to be invoked. Note that we cannot invoke any callbacks until
772 * all weakrefs to unreachable objects are cleared, lest the callback
773 * resurrect an unreachable object via a still-active weakref. We
774 * make another pass over wrcb_to_call, invoking callbacks, after this
775 * pass completes.
776 */
777 for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
778 PyWeakReference **wrlist;
779
780 op = FROM_GC(gc);
781 next = GC_NEXT(gc);
782
783 if (PyWeakref_Check(op)) {
784 /* A weakref inside the unreachable set must be cleared. If we
785 * allow its callback to execute inside delete_garbage(), it
786 * could expose objects that have tp_clear already called on
787 * them. Or, it could resurrect unreachable objects. One way
788 * this can happen is if some container objects do not implement
789 * tp_traverse. Then, wr_object can be outside the unreachable
790 * set but can be deallocated as a result of breaking the
791 * reference cycle. If we don't clear the weakref, the callback
792 * will run and potentially cause a crash. See bpo-38006 for
793 * one example.
794 */
795 _PyWeakref_ClearRef((PyWeakReference *)op);
796 }
797
798 if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
799 continue;
800
801 /* It supports weakrefs. Does it have any? */
802 wrlist = (PyWeakReference **)
803 _PyObject_GET_WEAKREFS_LISTPTR(op);
804
805 /* `op` may have some weakrefs. March over the list, clear
806 * all the weakrefs, and move the weakrefs with callbacks
807 * that must be called into wrcb_to_call.
808 */
809 for (wr = *wrlist; wr != NULL; wr = *wrlist) {
810 PyGC_Head *wrasgc; /* AS_GC(wr) */
811
812 /* _PyWeakref_ClearRef clears the weakref but leaves
813 * the callback pointer intact. Obscure: it also
814 * changes *wrlist.
815 */
816 _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
817 _PyWeakref_ClearRef(wr);
818 _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
819 if (wr->wr_callback == NULL) {
820 /* no callback */
821 continue;
822 }
823
824 /* Headache time. `op` is going away, and is weakly referenced by
825 * `wr`, which has a callback. Should the callback be invoked? If wr
826 * is also trash, no:
827 *
828 * 1. There's no need to call it. The object and the weakref are
829 * both going away, so it's legitimate to pretend the weakref is
830 * going away first. The user has to ensure a weakref outlives its
831 * referent if they want a guarantee that the wr callback will get
832 * invoked.
833 *
834 * 2. It may be catastrophic to call it. If the callback is also in
835 * cyclic trash (CT), then although the CT is unreachable from
836 * outside the current generation, CT may be reachable from the
837 * callback. Then the callback could resurrect insane objects.
838 *
839 * Since the callback is never needed and may be unsafe in this case,
840 * wr is simply left in the unreachable set. Note that because we
841 * already called _PyWeakref_ClearRef(wr), its callback will never
842 * trigger.
843 *
844 * OTOH, if wr isn't part of CT, we should invoke the callback: the
845 * weakref outlived the trash. Note that since wr isn't CT in this
846 * case, its callback can't be CT either -- wr acted as an external
847 * root to this generation, and therefore its callback did too. So
848 * nothing in CT is reachable from the callback either, so it's hard
849 * to imagine how calling it later could create a problem for us. wr
850 * is moved to wrcb_to_call in this case.
851 */
852 if (gc_is_collecting(AS_GC(wr))) {
853 /* it should already have been cleared above */
854 assert(wr->wr_object == Py_None);
855 continue;
856 }
857
858 /* Create a new reference so that wr can't go away
859 * before we can process it again.
860 */
861 Py_INCREF(wr);
862
863 /* Move wr to wrcb_to_call, for the next pass. */
864 wrasgc = AS_GC(wr);
865 assert(wrasgc != next); /* wrasgc is reachable, but
866 next isn't, so they can't
867 be the same */
868 gc_list_move(wrasgc, &wrcb_to_call);
869 }
870 }
871
872 /* Invoke the callbacks we decided to honor. It's safe to invoke them
873 * because they can't reference unreachable objects.
874 */
875 while (! gc_list_is_empty(&wrcb_to_call)) {
876 PyObject *temp;
877 PyObject *callback;
878
879 gc = (PyGC_Head*)wrcb_to_call._gc_next;
880 op = FROM_GC(gc);
881 _PyObject_ASSERT(op, PyWeakref_Check(op));
882 wr = (PyWeakReference *)op;
883 callback = wr->wr_callback;
884 _PyObject_ASSERT(op, callback != NULL);
885
886 /* copy-paste of weakrefobject.c's handle_callback() */
887 temp = PyObject_CallOneArg(callback, (PyObject *)wr);
888 if (temp == NULL)
889 PyErr_WriteUnraisable(callback);
890 else
891 Py_DECREF(temp);
892
893 /* Give up the reference we created in the first pass. When
894 * op's refcount hits 0 (which it may or may not do right now),
895 * op's tp_dealloc will decref op->wr_callback too. Note
896 * that the refcount probably will hit 0 now, and because this
897 * weakref was reachable to begin with, gc didn't already
898 * add it to its count of freed objects. Example: a reachable
899 * weak value dict maps some key to this reachable weakref.
900 * The callback removes this key->weakref mapping from the
901 * dict, leaving no other references to the weakref (excepting
902 * ours).
903 */
904 Py_DECREF(op);
905 if (wrcb_to_call._gc_next == (uintptr_t)gc) {
906 /* object is still alive -- move it */
907 gc_list_move(gc, old);
908 }
909 else {
910 ++num_freed;
911 }
912 }
913
914 return num_freed;
915 }
916
917 static void
debug_cycle(const char * msg,PyObject * op)918 debug_cycle(const char *msg, PyObject *op)
919 {
920 PySys_FormatStderr("gc: %s <%s %p>\n",
921 msg, Py_TYPE(op)->tp_name, op);
922 }
923
924 /* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
925 * only from such cycles).
926 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
927 * garbage list (a Python list), else only the objects in finalizers with
928 * __del__ methods are appended to garbage. All objects in finalizers are
929 * merged into the old list regardless.
930 */
931 static void
handle_legacy_finalizers(PyThreadState * tstate,GCState * gcstate,PyGC_Head * finalizers,PyGC_Head * old)932 handle_legacy_finalizers(PyThreadState *tstate,
933 GCState *gcstate,
934 PyGC_Head *finalizers, PyGC_Head *old)
935 {
936 assert(!_PyErr_Occurred(tstate));
937 assert(gcstate->garbage != NULL);
938
939 PyGC_Head *gc = GC_NEXT(finalizers);
940 for (; gc != finalizers; gc = GC_NEXT(gc)) {
941 PyObject *op = FROM_GC(gc);
942
943 if ((gcstate->debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
944 if (PyList_Append(gcstate->garbage, op) < 0) {
945 _PyErr_Clear(tstate);
946 break;
947 }
948 }
949 }
950
951 gc_list_merge(finalizers, old);
952 }
953
954 /* Run first-time finalizers (if any) on all the objects in collectable.
955 * Note that this may remove some (or even all) of the objects from the
956 * list, due to refcounts falling to 0.
957 */
958 static void
finalize_garbage(PyThreadState * tstate,PyGC_Head * collectable)959 finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
960 {
961 destructor finalize;
962 PyGC_Head seen;
963
964 /* While we're going through the loop, `finalize(op)` may cause op, or
965 * other objects, to be reclaimed via refcounts falling to zero. So
966 * there's little we can rely on about the structure of the input
967 * `collectable` list across iterations. For safety, we always take the
968 * first object in that list and move it to a temporary `seen` list.
969 * If objects vanish from the `collectable` and `seen` lists we don't
970 * care.
971 */
972 gc_list_init(&seen);
973
974 while (!gc_list_is_empty(collectable)) {
975 PyGC_Head *gc = GC_NEXT(collectable);
976 PyObject *op = FROM_GC(gc);
977 gc_list_move(gc, &seen);
978 if (!_PyGCHead_FINALIZED(gc) &&
979 (finalize = Py_TYPE(op)->tp_finalize) != NULL) {
980 _PyGCHead_SET_FINALIZED(gc);
981 Py_INCREF(op);
982 finalize(op);
983 assert(!_PyErr_Occurred(tstate));
984 Py_DECREF(op);
985 }
986 }
987 gc_list_merge(&seen, collectable);
988 }
989
990 /* Break reference cycles by clearing the containers involved. This is
991 * tricky business as the lists can be changing and we don't know which
992 * objects may be freed. It is possible I screwed something up here.
993 */
994 static void
delete_garbage(PyThreadState * tstate,GCState * gcstate,PyGC_Head * collectable,PyGC_Head * old)995 delete_garbage(PyThreadState *tstate, GCState *gcstate,
996 PyGC_Head *collectable, PyGC_Head *old)
997 {
998 assert(!_PyErr_Occurred(tstate));
999
1000 while (!gc_list_is_empty(collectable)) {
1001 PyGC_Head *gc = GC_NEXT(collectable);
1002 PyObject *op = FROM_GC(gc);
1003
1004 _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
1005 "refcount is too small");
1006
1007 if (gcstate->debug & DEBUG_SAVEALL) {
1008 assert(gcstate->garbage != NULL);
1009 if (PyList_Append(gcstate->garbage, op) < 0) {
1010 _PyErr_Clear(tstate);
1011 }
1012 }
1013 else {
1014 inquiry clear;
1015 if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
1016 Py_INCREF(op);
1017 (void) clear(op);
1018 if (_PyErr_Occurred(tstate)) {
1019 _PyErr_WriteUnraisableMsg("in tp_clear of",
1020 (PyObject*)Py_TYPE(op));
1021 }
1022 Py_DECREF(op);
1023 }
1024 }
1025 if (GC_NEXT(collectable) == gc) {
1026 /* object is still alive, move it, it may die later */
1027 gc_clear_collecting(gc);
1028 gc_list_move(gc, old);
1029 }
1030 }
1031 }
1032
1033 /* Clear all free lists
1034 * All free lists are cleared during the collection of the highest generation.
1035 * Allocated items in the free list may keep a pymalloc arena occupied.
1036 * Clearing the free lists may give back memory to the OS earlier.
1037 */
1038 static void
clear_freelists(PyInterpreterState * interp)1039 clear_freelists(PyInterpreterState *interp)
1040 {
1041 _PyFrame_ClearFreeList(interp);
1042 _PyTuple_ClearFreeList(interp);
1043 _PyFloat_ClearFreeList(interp);
1044 _PyList_ClearFreeList(interp);
1045 _PyDict_ClearFreeList(interp);
1046 _PyAsyncGen_ClearFreeLists(interp);
1047 _PyContext_ClearFreeList(interp);
1048 }
1049
1050 // Show stats for objects in each generations
1051 static void
show_stats_each_generations(GCState * gcstate)1052 show_stats_each_generations(GCState *gcstate)
1053 {
1054 char buf[100];
1055 size_t pos = 0;
1056
1057 for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
1058 pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
1059 " %zd",
1060 gc_list_size(GEN_HEAD(gcstate, i)));
1061 }
1062
1063 PySys_FormatStderr(
1064 "gc: objects in each generation:%s\n"
1065 "gc: objects in permanent generation: %zd\n",
1066 buf, gc_list_size(&gcstate->permanent_generation.head));
1067 }
1068
1069 /* Deduce which objects among "base" are unreachable from outside the list
1070 and move them to 'unreachable'. The process consist in the following steps:
1071
1072 1. Copy all reference counts to a different field (gc_prev is used to hold
1073 this copy to save memory).
1074 2. Traverse all objects in "base" and visit all referred objects using
1075 "tp_traverse" and for every visited object, subtract 1 to the reference
1076 count (the one that we copied in the previous step). After this step, all
1077 objects that can be reached directly from outside must have strictly positive
1078 reference count, while all unreachable objects must have a count of exactly 0.
1079 3. Identify all unreachable objects (the ones with 0 reference count) and move
1080 them to the "unreachable" list. This step also needs to move back to "base" all
1081 objects that were initially marked as unreachable but are referred transitively
1082 by the reachable objects (the ones with strictly positive reference count).
1083
1084 Contracts:
1085
1086 * The "base" has to be a valid list with no mask set.
1087
1088 * The "unreachable" list must be uninitialized (this function calls
1089 gc_list_init over 'unreachable').
1090
1091 IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
1092 flag set but it does not clear it to skip unnecessary iteration. Before the
1093 flag is cleared (for example, by using 'clear_unreachable_mask' function or
1094 by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
1095 list and we can not use most gc_list_* functions for it. */
1096 static inline void
deduce_unreachable(PyGC_Head * base,PyGC_Head * unreachable)1097 deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
1098 validate_list(base, collecting_clear_unreachable_clear);
1099 /* Using ob_refcnt and gc_refs, calculate which objects in the
1100 * container set are reachable from outside the set (i.e., have a
1101 * refcount greater than 0 when all the references within the
1102 * set are taken into account).
1103 */
1104 update_refs(base); // gc_prev is used for gc_refs
1105 subtract_refs(base);
1106
1107 /* Leave everything reachable from outside base in base, and move
1108 * everything else (in base) to unreachable.
1109 *
1110 * NOTE: This used to move the reachable objects into a reachable
1111 * set instead. But most things usually turn out to be reachable,
1112 * so it's more efficient to move the unreachable things. It "sounds slick"
1113 * to move the unreachable objects, until you think about it - the reason it
1114 * pays isn't actually obvious.
1115 *
1116 * Suppose we create objects A, B, C in that order. They appear in the young
1117 * generation in the same order. If B points to A, and C to B, and C is
1118 * reachable from outside, then the adjusted refcounts will be 0, 0, and 1
1119 * respectively.
1120 *
1121 * When move_unreachable finds A, A is moved to the unreachable list. The
1122 * same for B when it's first encountered. Then C is traversed, B is moved
1123 * _back_ to the reachable list. B is eventually traversed, and then A is
1124 * moved back to the reachable list.
1125 *
1126 * So instead of not moving at all, the reachable objects B and A are moved
1127 * twice each. Why is this a win? A straightforward algorithm to move the
1128 * reachable objects instead would move A, B, and C once each.
1129 *
1130 * The key is that this dance leaves the objects in order C, B, A - it's
1131 * reversed from the original order. On all _subsequent_ scans, none of
1132 * them will move. Since most objects aren't in cycles, this can save an
1133 * unbounded number of moves across an unbounded number of later collections.
1134 * It can cost more only the first time the chain is scanned.
1135 *
1136 * Drawback: move_unreachable is also used to find out what's still trash
1137 * after finalizers may resurrect objects. In _that_ case most unreachable
1138 * objects will remain unreachable, so it would be more efficient to move
1139 * the reachable objects instead. But this is a one-time cost, probably not
1140 * worth complicating the code to speed just a little.
1141 */
1142 gc_list_init(unreachable);
1143 move_unreachable(base, unreachable); // gc_prev is pointer again
1144 validate_list(base, collecting_clear_unreachable_clear);
1145 validate_list(unreachable, collecting_set_unreachable_set);
1146 }
1147
1148 /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
1149 them to 'old_generation' and placing the rest on 'still_unreachable'.
1150
1151 Contracts:
1152 * After this function 'unreachable' must not be used anymore and 'still_unreachable'
1153 will contain the objects that did not resurrect.
1154
1155 * The "still_unreachable" list must be uninitialized (this function calls
1156 gc_list_init over 'still_unreachable').
1157
1158 IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
1159 PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
1160 we can skip the expense of clearing the flag to avoid extra iteration. */
1161 static inline void
handle_resurrected_objects(PyGC_Head * unreachable,PyGC_Head * still_unreachable,PyGC_Head * old_generation)1162 handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
1163 PyGC_Head *old_generation)
1164 {
1165 // Remove the PREV_MASK_COLLECTING from unreachable
1166 // to prepare it for a new call to 'deduce_unreachable'
1167 gc_list_clear_collecting(unreachable);
1168
1169 // After the call to deduce_unreachable, the 'still_unreachable' set will
1170 // have the PREV_MARK_COLLECTING set, but the objects are going to be
1171 // removed so we can skip the expense of clearing the flag.
1172 PyGC_Head* resurrected = unreachable;
1173 deduce_unreachable(resurrected, still_unreachable);
1174 clear_unreachable_mask(still_unreachable);
1175
1176 // Move the resurrected objects to the old generation for future collection.
1177 gc_list_merge(resurrected, old_generation);
1178 }
1179
1180 /* This is the main function. Read this to understand how the
1181 * collection process works. */
1182 static Py_ssize_t
gc_collect_main(PyThreadState * tstate,int generation,Py_ssize_t * n_collected,Py_ssize_t * n_uncollectable,int nofail)1183 gc_collect_main(PyThreadState *tstate, int generation,
1184 Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
1185 int nofail)
1186 {
1187 int i;
1188 Py_ssize_t m = 0; /* # objects collected */
1189 Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
1190 PyGC_Head *young; /* the generation we are examining */
1191 PyGC_Head *old; /* next older generation */
1192 PyGC_Head unreachable; /* non-problematic unreachable trash */
1193 PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
1194 PyGC_Head *gc;
1195 _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */
1196 GCState *gcstate = &tstate->interp->gc;
1197
1198 // gc_collect_main() must not be called before _PyGC_Init
1199 // or after _PyGC_Fini()
1200 assert(gcstate->garbage != NULL);
1201 assert(!_PyErr_Occurred(tstate));
1202
1203 #ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
1204 if (tstate->interp->config._isolated_interpreter) {
1205 // bpo-40533: The garbage collector must not be run on parallel on
1206 // Python objects shared by multiple interpreters.
1207 return 0;
1208 }
1209 #endif
1210
1211 if (gcstate->debug & DEBUG_STATS) {
1212 PySys_WriteStderr("gc: collecting generation %d...\n", generation);
1213 show_stats_each_generations(gcstate);
1214 t1 = _PyTime_GetMonotonicClock();
1215 }
1216
1217 if (PyDTrace_GC_START_ENABLED())
1218 PyDTrace_GC_START(generation);
1219
1220 /* update collection and allocation counters */
1221 if (generation+1 < NUM_GENERATIONS)
1222 gcstate->generations[generation+1].count += 1;
1223 for (i = 0; i <= generation; i++)
1224 gcstate->generations[i].count = 0;
1225
1226 /* merge younger generations with one we are currently collecting */
1227 for (i = 0; i < generation; i++) {
1228 gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
1229 }
1230
1231 /* handy references */
1232 young = GEN_HEAD(gcstate, generation);
1233 if (generation < NUM_GENERATIONS-1)
1234 old = GEN_HEAD(gcstate, generation+1);
1235 else
1236 old = young;
1237 validate_list(old, collecting_clear_unreachable_clear);
1238
1239 deduce_unreachable(young, &unreachable);
1240
1241 untrack_tuples(young);
1242 /* Move reachable objects to next generation. */
1243 if (young != old) {
1244 if (generation == NUM_GENERATIONS - 2) {
1245 gcstate->long_lived_pending += gc_list_size(young);
1246 }
1247 gc_list_merge(young, old);
1248 }
1249 else {
1250 /* We only un-track dicts in full collections, to avoid quadratic
1251 dict build-up. See issue #14775. */
1252 untrack_dicts(young);
1253 gcstate->long_lived_pending = 0;
1254 gcstate->long_lived_total = gc_list_size(young);
1255 }
1256
1257 /* All objects in unreachable are trash, but objects reachable from
1258 * legacy finalizers (e.g. tp_del) can't safely be deleted.
1259 */
1260 gc_list_init(&finalizers);
1261 // NEXT_MASK_UNREACHABLE is cleared here.
1262 // After move_legacy_finalizers(), unreachable is normal list.
1263 move_legacy_finalizers(&unreachable, &finalizers);
1264 /* finalizers contains the unreachable objects with a legacy finalizer;
1265 * unreachable objects reachable *from* those are also uncollectable,
1266 * and we move those into the finalizers list too.
1267 */
1268 move_legacy_finalizer_reachable(&finalizers);
1269
1270 validate_list(&finalizers, collecting_clear_unreachable_clear);
1271 validate_list(&unreachable, collecting_set_unreachable_clear);
1272
1273 /* Print debugging information. */
1274 if (gcstate->debug & DEBUG_COLLECTABLE) {
1275 for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1276 debug_cycle("collectable", FROM_GC(gc));
1277 }
1278 }
1279
1280 /* Clear weakrefs and invoke callbacks as necessary. */
1281 m += handle_weakrefs(&unreachable, old);
1282
1283 validate_list(old, collecting_clear_unreachable_clear);
1284 validate_list(&unreachable, collecting_set_unreachable_clear);
1285
1286 /* Call tp_finalize on objects which have one. */
1287 finalize_garbage(tstate, &unreachable);
1288
1289 /* Handle any objects that may have resurrected after the call
1290 * to 'finalize_garbage' and continue the collection with the
1291 * objects that are still unreachable */
1292 PyGC_Head final_unreachable;
1293 handle_resurrected_objects(&unreachable, &final_unreachable, old);
1294
1295 /* Call tp_clear on objects in the final_unreachable set. This will cause
1296 * the reference cycles to be broken. It may also cause some objects
1297 * in finalizers to be freed.
1298 */
1299 m += gc_list_size(&final_unreachable);
1300 delete_garbage(tstate, gcstate, &final_unreachable, old);
1301
1302 /* Collect statistics on uncollectable objects found and print
1303 * debugging information. */
1304 for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1305 n++;
1306 if (gcstate->debug & DEBUG_UNCOLLECTABLE)
1307 debug_cycle("uncollectable", FROM_GC(gc));
1308 }
1309 if (gcstate->debug & DEBUG_STATS) {
1310 double d = _PyTime_AsSecondsDouble(_PyTime_GetMonotonicClock() - t1);
1311 PySys_WriteStderr(
1312 "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
1313 n+m, n, d);
1314 }
1315
1316 /* Append instances in the uncollectable set to a Python
1317 * reachable list of garbage. The programmer has to deal with
1318 * this if they insist on creating this type of structure.
1319 */
1320 handle_legacy_finalizers(tstate, gcstate, &finalizers, old);
1321 validate_list(old, collecting_clear_unreachable_clear);
1322
1323 /* Clear free list only during the collection of the highest
1324 * generation */
1325 if (generation == NUM_GENERATIONS-1) {
1326 clear_freelists(tstate->interp);
1327 }
1328
1329 if (_PyErr_Occurred(tstate)) {
1330 if (nofail) {
1331 _PyErr_Clear(tstate);
1332 }
1333 else {
1334 _PyErr_WriteUnraisableMsg("in garbage collection", NULL);
1335 }
1336 }
1337
1338 /* Update stats */
1339 if (n_collected) {
1340 *n_collected = m;
1341 }
1342 if (n_uncollectable) {
1343 *n_uncollectable = n;
1344 }
1345
1346 struct gc_generation_stats *stats = &gcstate->generation_stats[generation];
1347 stats->collections++;
1348 stats->collected += m;
1349 stats->uncollectable += n;
1350
1351 if (PyDTrace_GC_DONE_ENABLED()) {
1352 PyDTrace_GC_DONE(n + m);
1353 }
1354
1355 assert(!_PyErr_Occurred(tstate));
1356 return n + m;
1357 }
1358
1359 /* Invoke progress callbacks to notify clients that garbage collection
1360 * is starting or stopping
1361 */
1362 static void
invoke_gc_callback(PyThreadState * tstate,const char * phase,int generation,Py_ssize_t collected,Py_ssize_t uncollectable)1363 invoke_gc_callback(PyThreadState *tstate, const char *phase,
1364 int generation, Py_ssize_t collected,
1365 Py_ssize_t uncollectable)
1366 {
1367 assert(!_PyErr_Occurred(tstate));
1368
1369 /* we may get called very early */
1370 GCState *gcstate = &tstate->interp->gc;
1371 if (gcstate->callbacks == NULL) {
1372 return;
1373 }
1374
1375 /* The local variable cannot be rebound, check it for sanity */
1376 assert(PyList_CheckExact(gcstate->callbacks));
1377 PyObject *info = NULL;
1378 if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1379 info = Py_BuildValue("{sisnsn}",
1380 "generation", generation,
1381 "collected", collected,
1382 "uncollectable", uncollectable);
1383 if (info == NULL) {
1384 PyErr_WriteUnraisable(NULL);
1385 return;
1386 }
1387 }
1388 for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
1389 PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
1390 Py_INCREF(cb); /* make sure cb doesn't go away */
1391 r = PyObject_CallFunction(cb, "sO", phase, info);
1392 if (r == NULL) {
1393 PyErr_WriteUnraisable(cb);
1394 }
1395 else {
1396 Py_DECREF(r);
1397 }
1398 Py_DECREF(cb);
1399 }
1400 Py_XDECREF(info);
1401 assert(!_PyErr_Occurred(tstate));
1402 }
1403
1404 /* Perform garbage collection of a generation and invoke
1405 * progress callbacks.
1406 */
1407 static Py_ssize_t
gc_collect_with_callback(PyThreadState * tstate,int generation)1408 gc_collect_with_callback(PyThreadState *tstate, int generation)
1409 {
1410 assert(!_PyErr_Occurred(tstate));
1411 Py_ssize_t result, collected, uncollectable;
1412 invoke_gc_callback(tstate, "start", generation, 0, 0);
1413 result = gc_collect_main(tstate, generation, &collected, &uncollectable, 0);
1414 invoke_gc_callback(tstate, "stop", generation, collected, uncollectable);
1415 assert(!_PyErr_Occurred(tstate));
1416 return result;
1417 }
1418
1419 static Py_ssize_t
gc_collect_generations(PyThreadState * tstate)1420 gc_collect_generations(PyThreadState *tstate)
1421 {
1422 GCState *gcstate = &tstate->interp->gc;
1423 /* Find the oldest generation (highest numbered) where the count
1424 * exceeds the threshold. Objects in the that generation and
1425 * generations younger than it will be collected. */
1426 Py_ssize_t n = 0;
1427 for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
1428 if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
1429 /* Avoid quadratic performance degradation in number
1430 of tracked objects (see also issue #4074):
1431
1432 To limit the cost of garbage collection, there are two strategies;
1433 - make each collection faster, e.g. by scanning fewer objects
1434 - do less collections
1435 This heuristic is about the latter strategy.
1436
1437 In addition to the various configurable thresholds, we only trigger a
1438 full collection if the ratio
1439
1440 long_lived_pending / long_lived_total
1441
1442 is above a given value (hardwired to 25%).
1443
1444 The reason is that, while "non-full" collections (i.e., collections of
1445 the young and middle generations) will always examine roughly the same
1446 number of objects -- determined by the aforementioned thresholds --,
1447 the cost of a full collection is proportional to the total number of
1448 long-lived objects, which is virtually unbounded.
1449
1450 Indeed, it has been remarked that doing a full collection every
1451 <constant number> of object creations entails a dramatic performance
1452 degradation in workloads which consist in creating and storing lots of
1453 long-lived objects (e.g. building a large list of GC-tracked objects would
1454 show quadratic performance, instead of linear as expected: see issue #4074).
1455
1456 Using the above ratio, instead, yields amortized linear performance in
1457 the total number of objects (the effect of which can be summarized
1458 thusly: "each full garbage collection is more and more costly as the
1459 number of objects grows, but we do fewer and fewer of them").
1460
1461 This heuristic was suggested by Martin von Löwis on python-dev in
1462 June 2008. His original analysis and proposal can be found at:
1463 http://mail.python.org/pipermail/python-dev/2008-June/080579.html
1464 */
1465 if (i == NUM_GENERATIONS - 1
1466 && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
1467 continue;
1468 n = gc_collect_with_callback(tstate, i);
1469 break;
1470 }
1471 }
1472 return n;
1473 }
1474
1475 #include "clinic/gcmodule.c.h"
1476
1477 /*[clinic input]
1478 gc.enable
1479
1480 Enable automatic garbage collection.
1481 [clinic start generated code]*/
1482
1483 static PyObject *
gc_enable_impl(PyObject * module)1484 gc_enable_impl(PyObject *module)
1485 /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/
1486 {
1487 PyGC_Enable();
1488 Py_RETURN_NONE;
1489 }
1490
1491 /*[clinic input]
1492 gc.disable
1493
1494 Disable automatic garbage collection.
1495 [clinic start generated code]*/
1496
1497 static PyObject *
gc_disable_impl(PyObject * module)1498 gc_disable_impl(PyObject *module)
1499 /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/
1500 {
1501 PyGC_Disable();
1502 Py_RETURN_NONE;
1503 }
1504
1505 /*[clinic input]
1506 gc.isenabled -> bool
1507
1508 Returns true if automatic garbage collection is enabled.
1509 [clinic start generated code]*/
1510
1511 static int
gc_isenabled_impl(PyObject * module)1512 gc_isenabled_impl(PyObject *module)
1513 /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/
1514 {
1515 return PyGC_IsEnabled();
1516 }
1517
1518 /*[clinic input]
1519 gc.collect -> Py_ssize_t
1520
1521 generation: int(c_default="NUM_GENERATIONS - 1") = 2
1522
1523 Run the garbage collector.
1524
1525 With no arguments, run a full collection. The optional argument
1526 may be an integer specifying which generation to collect. A ValueError
1527 is raised if the generation number is invalid.
1528
1529 The number of unreachable objects is returned.
1530 [clinic start generated code]*/
1531
1532 static Py_ssize_t
gc_collect_impl(PyObject * module,int generation)1533 gc_collect_impl(PyObject *module, int generation)
1534 /*[clinic end generated code: output=b697e633043233c7 input=40720128b682d879]*/
1535 {
1536 PyThreadState *tstate = _PyThreadState_GET();
1537
1538 if (generation < 0 || generation >= NUM_GENERATIONS) {
1539 _PyErr_SetString(tstate, PyExc_ValueError, "invalid generation");
1540 return -1;
1541 }
1542
1543 GCState *gcstate = &tstate->interp->gc;
1544 Py_ssize_t n;
1545 if (gcstate->collecting) {
1546 /* already collecting, don't do anything */
1547 n = 0;
1548 }
1549 else {
1550 gcstate->collecting = 1;
1551 n = gc_collect_with_callback(tstate, generation);
1552 gcstate->collecting = 0;
1553 }
1554 return n;
1555 }
1556
1557 /*[clinic input]
1558 gc.set_debug
1559
1560 flags: int
1561 An integer that can have the following bits turned on:
1562 DEBUG_STATS - Print statistics during collection.
1563 DEBUG_COLLECTABLE - Print collectable objects found.
1564 DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects
1565 found.
1566 DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.
1567 DEBUG_LEAK - Debug leaking programs (everything but STATS).
1568 /
1569
1570 Set the garbage collection debugging flags.
1571
1572 Debugging information is written to sys.stderr.
1573 [clinic start generated code]*/
1574
1575 static PyObject *
gc_set_debug_impl(PyObject * module,int flags)1576 gc_set_debug_impl(PyObject *module, int flags)
1577 /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/
1578 {
1579 GCState *gcstate = get_gc_state();
1580 gcstate->debug = flags;
1581 Py_RETURN_NONE;
1582 }
1583
1584 /*[clinic input]
1585 gc.get_debug -> int
1586
1587 Get the garbage collection debugging flags.
1588 [clinic start generated code]*/
1589
1590 static int
gc_get_debug_impl(PyObject * module)1591 gc_get_debug_impl(PyObject *module)
1592 /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/
1593 {
1594 GCState *gcstate = get_gc_state();
1595 return gcstate->debug;
1596 }
1597
1598 PyDoc_STRVAR(gc_set_thresh__doc__,
1599 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1600 "\n"
1601 "Sets the collection thresholds. Setting threshold0 to zero disables\n"
1602 "collection.\n");
1603
1604 static PyObject *
gc_set_threshold(PyObject * self,PyObject * args)1605 gc_set_threshold(PyObject *self, PyObject *args)
1606 {
1607 GCState *gcstate = get_gc_state();
1608 if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
1609 &gcstate->generations[0].threshold,
1610 &gcstate->generations[1].threshold,
1611 &gcstate->generations[2].threshold))
1612 return NULL;
1613 for (int i = 3; i < NUM_GENERATIONS; i++) {
1614 /* generations higher than 2 get the same threshold */
1615 gcstate->generations[i].threshold = gcstate->generations[2].threshold;
1616 }
1617 Py_RETURN_NONE;
1618 }
1619
1620 /*[clinic input]
1621 gc.get_threshold
1622
1623 Return the current collection thresholds.
1624 [clinic start generated code]*/
1625
1626 static PyObject *
gc_get_threshold_impl(PyObject * module)1627 gc_get_threshold_impl(PyObject *module)
1628 /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/
1629 {
1630 GCState *gcstate = get_gc_state();
1631 return Py_BuildValue("(iii)",
1632 gcstate->generations[0].threshold,
1633 gcstate->generations[1].threshold,
1634 gcstate->generations[2].threshold);
1635 }
1636
1637 /*[clinic input]
1638 gc.get_count
1639
1640 Return a three-tuple of the current collection counts.
1641 [clinic start generated code]*/
1642
1643 static PyObject *
gc_get_count_impl(PyObject * module)1644 gc_get_count_impl(PyObject *module)
1645 /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
1646 {
1647 GCState *gcstate = get_gc_state();
1648 return Py_BuildValue("(iii)",
1649 gcstate->generations[0].count,
1650 gcstate->generations[1].count,
1651 gcstate->generations[2].count);
1652 }
1653
1654 static int
referrersvisit(PyObject * obj,PyObject * objs)1655 referrersvisit(PyObject* obj, PyObject *objs)
1656 {
1657 Py_ssize_t i;
1658 for (i = 0; i < PyTuple_GET_SIZE(objs); i++)
1659 if (PyTuple_GET_ITEM(objs, i) == obj)
1660 return 1;
1661 return 0;
1662 }
1663
1664 static int
gc_referrers_for(PyObject * objs,PyGC_Head * list,PyObject * resultlist)1665 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1666 {
1667 PyGC_Head *gc;
1668 PyObject *obj;
1669 traverseproc traverse;
1670 for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1671 obj = FROM_GC(gc);
1672 traverse = Py_TYPE(obj)->tp_traverse;
1673 if (obj == objs || obj == resultlist)
1674 continue;
1675 if (traverse(obj, (visitproc)referrersvisit, objs)) {
1676 if (PyList_Append(resultlist, obj) < 0)
1677 return 0; /* error */
1678 }
1679 }
1680 return 1; /* no error */
1681 }
1682
1683 PyDoc_STRVAR(gc_get_referrers__doc__,
1684 "get_referrers(*objs) -> list\n\
1685 Return the list of objects that directly refer to any of objs.");
1686
1687 static PyObject *
gc_get_referrers(PyObject * self,PyObject * args)1688 gc_get_referrers(PyObject *self, PyObject *args)
1689 {
1690 if (PySys_Audit("gc.get_referrers", "(O)", args) < 0) {
1691 return NULL;
1692 }
1693
1694 PyObject *result = PyList_New(0);
1695 if (!result) {
1696 return NULL;
1697 }
1698
1699 GCState *gcstate = get_gc_state();
1700 for (int i = 0; i < NUM_GENERATIONS; i++) {
1701 if (!(gc_referrers_for(args, GEN_HEAD(gcstate, i), result))) {
1702 Py_DECREF(result);
1703 return NULL;
1704 }
1705 }
1706 return result;
1707 }
1708
1709 /* Append obj to list; return true if error (out of memory), false if OK. */
1710 static int
referentsvisit(PyObject * obj,PyObject * list)1711 referentsvisit(PyObject *obj, PyObject *list)
1712 {
1713 return PyList_Append(list, obj) < 0;
1714 }
1715
1716 PyDoc_STRVAR(gc_get_referents__doc__,
1717 "get_referents(*objs) -> list\n\
1718 Return the list of objects that are directly referred to by objs.");
1719
1720 static PyObject *
gc_get_referents(PyObject * self,PyObject * args)1721 gc_get_referents(PyObject *self, PyObject *args)
1722 {
1723 Py_ssize_t i;
1724 if (PySys_Audit("gc.get_referents", "(O)", args) < 0) {
1725 return NULL;
1726 }
1727 PyObject *result = PyList_New(0);
1728
1729 if (result == NULL)
1730 return NULL;
1731
1732 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
1733 traverseproc traverse;
1734 PyObject *obj = PyTuple_GET_ITEM(args, i);
1735
1736 if (!_PyObject_IS_GC(obj))
1737 continue;
1738 traverse = Py_TYPE(obj)->tp_traverse;
1739 if (! traverse)
1740 continue;
1741 if (traverse(obj, (visitproc)referentsvisit, result)) {
1742 Py_DECREF(result);
1743 return NULL;
1744 }
1745 }
1746 return result;
1747 }
1748
1749 /*[clinic input]
1750 gc.get_objects
1751 generation: Py_ssize_t(accept={int, NoneType}, c_default="-1") = None
1752 Generation to extract the objects from.
1753
1754 Return a list of objects tracked by the collector (excluding the list returned).
1755
1756 If generation is not None, return only the objects tracked by the collector
1757 that are in that generation.
1758 [clinic start generated code]*/
1759
1760 static PyObject *
gc_get_objects_impl(PyObject * module,Py_ssize_t generation)1761 gc_get_objects_impl(PyObject *module, Py_ssize_t generation)
1762 /*[clinic end generated code: output=48b35fea4ba6cb0e input=ef7da9df9806754c]*/
1763 {
1764 PyThreadState *tstate = _PyThreadState_GET();
1765 int i;
1766 PyObject* result;
1767 GCState *gcstate = &tstate->interp->gc;
1768
1769 if (PySys_Audit("gc.get_objects", "n", generation) < 0) {
1770 return NULL;
1771 }
1772
1773 result = PyList_New(0);
1774 if (result == NULL) {
1775 return NULL;
1776 }
1777
1778 /* If generation is passed, we extract only that generation */
1779 if (generation != -1) {
1780 if (generation >= NUM_GENERATIONS) {
1781 _PyErr_Format(tstate, PyExc_ValueError,
1782 "generation parameter must be less than the number of "
1783 "available generations (%i)",
1784 NUM_GENERATIONS);
1785 goto error;
1786 }
1787
1788 if (generation < 0) {
1789 _PyErr_SetString(tstate, PyExc_ValueError,
1790 "generation parameter cannot be negative");
1791 goto error;
1792 }
1793
1794 if (append_objects(result, GEN_HEAD(gcstate, generation))) {
1795 goto error;
1796 }
1797
1798 return result;
1799 }
1800
1801 /* If generation is not passed or None, get all objects from all generations */
1802 for (i = 0; i < NUM_GENERATIONS; i++) {
1803 if (append_objects(result, GEN_HEAD(gcstate, i))) {
1804 goto error;
1805 }
1806 }
1807 return result;
1808
1809 error:
1810 Py_DECREF(result);
1811 return NULL;
1812 }
1813
1814 /*[clinic input]
1815 gc.get_stats
1816
1817 Return a list of dictionaries containing per-generation statistics.
1818 [clinic start generated code]*/
1819
1820 static PyObject *
gc_get_stats_impl(PyObject * module)1821 gc_get_stats_impl(PyObject *module)
1822 /*[clinic end generated code: output=a8ab1d8a5d26f3ab input=1ef4ed9d17b1a470]*/
1823 {
1824 int i;
1825 struct gc_generation_stats stats[NUM_GENERATIONS], *st;
1826
1827 /* To get consistent values despite allocations while constructing
1828 the result list, we use a snapshot of the running stats. */
1829 GCState *gcstate = get_gc_state();
1830 for (i = 0; i < NUM_GENERATIONS; i++) {
1831 stats[i] = gcstate->generation_stats[i];
1832 }
1833
1834 PyObject *result = PyList_New(0);
1835 if (result == NULL)
1836 return NULL;
1837
1838 for (i = 0; i < NUM_GENERATIONS; i++) {
1839 PyObject *dict;
1840 st = &stats[i];
1841 dict = Py_BuildValue("{snsnsn}",
1842 "collections", st->collections,
1843 "collected", st->collected,
1844 "uncollectable", st->uncollectable
1845 );
1846 if (dict == NULL)
1847 goto error;
1848 if (PyList_Append(result, dict)) {
1849 Py_DECREF(dict);
1850 goto error;
1851 }
1852 Py_DECREF(dict);
1853 }
1854 return result;
1855
1856 error:
1857 Py_XDECREF(result);
1858 return NULL;
1859 }
1860
1861
1862 /*[clinic input]
1863 gc.is_tracked
1864
1865 obj: object
1866 /
1867
1868 Returns true if the object is tracked by the garbage collector.
1869
1870 Simple atomic objects will return false.
1871 [clinic start generated code]*/
1872
1873 static PyObject *
gc_is_tracked(PyObject * module,PyObject * obj)1874 gc_is_tracked(PyObject *module, PyObject *obj)
1875 /*[clinic end generated code: output=14f0103423b28e31 input=d83057f170ea2723]*/
1876 {
1877 PyObject *result;
1878
1879 if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj))
1880 result = Py_True;
1881 else
1882 result = Py_False;
1883 Py_INCREF(result);
1884 return result;
1885 }
1886
1887 /*[clinic input]
1888 gc.is_finalized
1889
1890 obj: object
1891 /
1892
1893 Returns true if the object has been already finalized by the GC.
1894 [clinic start generated code]*/
1895
1896 static PyObject *
gc_is_finalized(PyObject * module,PyObject * obj)1897 gc_is_finalized(PyObject *module, PyObject *obj)
1898 /*[clinic end generated code: output=e1516ac119a918ed input=201d0c58f69ae390]*/
1899 {
1900 if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) {
1901 Py_RETURN_TRUE;
1902 }
1903 Py_RETURN_FALSE;
1904 }
1905
1906 /*[clinic input]
1907 gc.freeze
1908
1909 Freeze all current tracked objects and ignore them for future collections.
1910
1911 This can be used before a POSIX fork() call to make the gc copy-on-write friendly.
1912 Note: collection before a POSIX fork() call may free pages for future allocation
1913 which can cause copy-on-write.
1914 [clinic start generated code]*/
1915
1916 static PyObject *
gc_freeze_impl(PyObject * module)1917 gc_freeze_impl(PyObject *module)
1918 /*[clinic end generated code: output=502159d9cdc4c139 input=b602b16ac5febbe5]*/
1919 {
1920 GCState *gcstate = get_gc_state();
1921 for (int i = 0; i < NUM_GENERATIONS; ++i) {
1922 gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head);
1923 gcstate->generations[i].count = 0;
1924 }
1925 Py_RETURN_NONE;
1926 }
1927
1928 /*[clinic input]
1929 gc.unfreeze
1930
1931 Unfreeze all objects in the permanent generation.
1932
1933 Put all objects in the permanent generation back into oldest generation.
1934 [clinic start generated code]*/
1935
1936 static PyObject *
gc_unfreeze_impl(PyObject * module)1937 gc_unfreeze_impl(PyObject *module)
1938 /*[clinic end generated code: output=1c15f2043b25e169 input=2dd52b170f4cef6c]*/
1939 {
1940 GCState *gcstate = get_gc_state();
1941 gc_list_merge(&gcstate->permanent_generation.head,
1942 GEN_HEAD(gcstate, NUM_GENERATIONS-1));
1943 Py_RETURN_NONE;
1944 }
1945
1946 /*[clinic input]
1947 gc.get_freeze_count -> Py_ssize_t
1948
1949 Return the number of objects in the permanent generation.
1950 [clinic start generated code]*/
1951
1952 static Py_ssize_t
gc_get_freeze_count_impl(PyObject * module)1953 gc_get_freeze_count_impl(PyObject *module)
1954 /*[clinic end generated code: output=61cbd9f43aa032e1 input=45ffbc65cfe2a6ed]*/
1955 {
1956 GCState *gcstate = get_gc_state();
1957 return gc_list_size(&gcstate->permanent_generation.head);
1958 }
1959
1960
1961 PyDoc_STRVAR(gc__doc__,
1962 "This module provides access to the garbage collector for reference cycles.\n"
1963 "\n"
1964 "enable() -- Enable automatic garbage collection.\n"
1965 "disable() -- Disable automatic garbage collection.\n"
1966 "isenabled() -- Returns true if automatic collection is enabled.\n"
1967 "collect() -- Do a full collection right now.\n"
1968 "get_count() -- Return the current collection counts.\n"
1969 "get_stats() -- Return list of dictionaries containing per-generation stats.\n"
1970 "set_debug() -- Set debugging flags.\n"
1971 "get_debug() -- Get debugging flags.\n"
1972 "set_threshold() -- Set the collection thresholds.\n"
1973 "get_threshold() -- Return the current the collection thresholds.\n"
1974 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1975 "is_tracked() -- Returns true if a given object is tracked.\n"
1976 "is_finalized() -- Returns true if a given object has been already finalized.\n"
1977 "get_referrers() -- Return the list of objects that refer to an object.\n"
1978 "get_referents() -- Return the list of objects that an object refers to.\n"
1979 "freeze() -- Freeze all tracked objects and ignore them for future collections.\n"
1980 "unfreeze() -- Unfreeze all objects in the permanent generation.\n"
1981 "get_freeze_count() -- Return the number of objects in the permanent generation.\n");
1982
1983 static PyMethodDef GcMethods[] = {
1984 GC_ENABLE_METHODDEF
1985 GC_DISABLE_METHODDEF
1986 GC_ISENABLED_METHODDEF
1987 GC_SET_DEBUG_METHODDEF
1988 GC_GET_DEBUG_METHODDEF
1989 GC_GET_COUNT_METHODDEF
1990 {"set_threshold", gc_set_threshold, METH_VARARGS, gc_set_thresh__doc__},
1991 GC_GET_THRESHOLD_METHODDEF
1992 GC_COLLECT_METHODDEF
1993 GC_GET_OBJECTS_METHODDEF
1994 GC_GET_STATS_METHODDEF
1995 GC_IS_TRACKED_METHODDEF
1996 GC_IS_FINALIZED_METHODDEF
1997 {"get_referrers", gc_get_referrers, METH_VARARGS,
1998 gc_get_referrers__doc__},
1999 {"get_referents", gc_get_referents, METH_VARARGS,
2000 gc_get_referents__doc__},
2001 GC_FREEZE_METHODDEF
2002 GC_UNFREEZE_METHODDEF
2003 GC_GET_FREEZE_COUNT_METHODDEF
2004 {NULL, NULL} /* Sentinel */
2005 };
2006
2007 static int
gcmodule_exec(PyObject * module)2008 gcmodule_exec(PyObject *module)
2009 {
2010 GCState *gcstate = get_gc_state();
2011
2012 /* garbage and callbacks are initialized by _PyGC_Init() early in
2013 * interpreter lifecycle. */
2014 assert(gcstate->garbage != NULL);
2015 if (PyModule_AddObjectRef(module, "garbage", gcstate->garbage) < 0) {
2016 return -1;
2017 }
2018 assert(gcstate->callbacks != NULL);
2019 if (PyModule_AddObjectRef(module, "callbacks", gcstate->callbacks) < 0) {
2020 return -1;
2021 }
2022
2023 #define ADD_INT(NAME) if (PyModule_AddIntConstant(module, #NAME, NAME) < 0) { return -1; }
2024 ADD_INT(DEBUG_STATS);
2025 ADD_INT(DEBUG_COLLECTABLE);
2026 ADD_INT(DEBUG_UNCOLLECTABLE);
2027 ADD_INT(DEBUG_SAVEALL);
2028 ADD_INT(DEBUG_LEAK);
2029 #undef ADD_INT
2030 return 0;
2031 }
2032
2033 static PyModuleDef_Slot gcmodule_slots[] = {
2034 {Py_mod_exec, gcmodule_exec},
2035 {0, NULL}
2036 };
2037
2038 static struct PyModuleDef gcmodule = {
2039 PyModuleDef_HEAD_INIT,
2040 .m_name = "gc",
2041 .m_doc = gc__doc__,
2042 .m_size = 0, // per interpreter state, see: get_gc_state()
2043 .m_methods = GcMethods,
2044 .m_slots = gcmodule_slots
2045 };
2046
2047 PyMODINIT_FUNC
PyInit_gc(void)2048 PyInit_gc(void)
2049 {
2050 return PyModuleDef_Init(&gcmodule);
2051 }
2052
2053 /* C API for controlling the state of the garbage collector */
2054 int
PyGC_Enable(void)2055 PyGC_Enable(void)
2056 {
2057 GCState *gcstate = get_gc_state();
2058 int old_state = gcstate->enabled;
2059 gcstate->enabled = 1;
2060 return old_state;
2061 }
2062
2063 int
PyGC_Disable(void)2064 PyGC_Disable(void)
2065 {
2066 GCState *gcstate = get_gc_state();
2067 int old_state = gcstate->enabled;
2068 gcstate->enabled = 0;
2069 return old_state;
2070 }
2071
2072 int
PyGC_IsEnabled(void)2073 PyGC_IsEnabled(void)
2074 {
2075 GCState *gcstate = get_gc_state();
2076 return gcstate->enabled;
2077 }
2078
2079 /* Public API to invoke gc.collect() from C */
2080 Py_ssize_t
PyGC_Collect(void)2081 PyGC_Collect(void)
2082 {
2083 PyThreadState *tstate = _PyThreadState_GET();
2084 GCState *gcstate = &tstate->interp->gc;
2085
2086 if (!gcstate->enabled) {
2087 return 0;
2088 }
2089
2090 Py_ssize_t n;
2091 if (gcstate->collecting) {
2092 /* already collecting, don't do anything */
2093 n = 0;
2094 }
2095 else {
2096 PyObject *exc, *value, *tb;
2097 gcstate->collecting = 1;
2098 _PyErr_Fetch(tstate, &exc, &value, &tb);
2099 n = gc_collect_with_callback(tstate, NUM_GENERATIONS - 1);
2100 _PyErr_Restore(tstate, exc, value, tb);
2101 gcstate->collecting = 0;
2102 }
2103
2104 return n;
2105 }
2106
2107 Py_ssize_t
_PyGC_CollectNoFail(PyThreadState * tstate)2108 _PyGC_CollectNoFail(PyThreadState *tstate)
2109 {
2110 /* Ideally, this function is only called on interpreter shutdown,
2111 and therefore not recursively. Unfortunately, when there are daemon
2112 threads, a daemon thread can start a cyclic garbage collection
2113 during interpreter shutdown (and then never finish it).
2114 See http://bugs.python.org/issue8713#msg195178 for an example.
2115 */
2116 GCState *gcstate = &tstate->interp->gc;
2117 if (gcstate->collecting) {
2118 return 0;
2119 }
2120
2121 Py_ssize_t n;
2122 gcstate->collecting = 1;
2123 n = gc_collect_main(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1);
2124 gcstate->collecting = 0;
2125 return n;
2126 }
2127
2128 void
_PyGC_DumpShutdownStats(PyInterpreterState * interp)2129 _PyGC_DumpShutdownStats(PyInterpreterState *interp)
2130 {
2131 GCState *gcstate = &interp->gc;
2132 if (!(gcstate->debug & DEBUG_SAVEALL)
2133 && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
2134 const char *message;
2135 if (gcstate->debug & DEBUG_UNCOLLECTABLE)
2136 message = "gc: %zd uncollectable objects at " \
2137 "shutdown";
2138 else
2139 message = "gc: %zd uncollectable objects at " \
2140 "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
2141 /* PyErr_WarnFormat does too many things and we are at shutdown,
2142 the warnings module's dependencies (e.g. linecache) may be gone
2143 already. */
2144 if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2145 "gc", NULL, message,
2146 PyList_GET_SIZE(gcstate->garbage)))
2147 PyErr_WriteUnraisable(NULL);
2148 if (gcstate->debug & DEBUG_UNCOLLECTABLE) {
2149 PyObject *repr = NULL, *bytes = NULL;
2150 repr = PyObject_Repr(gcstate->garbage);
2151 if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
2152 PyErr_WriteUnraisable(gcstate->garbage);
2153 else {
2154 PySys_WriteStderr(
2155 " %s\n",
2156 PyBytes_AS_STRING(bytes)
2157 );
2158 }
2159 Py_XDECREF(repr);
2160 Py_XDECREF(bytes);
2161 }
2162 }
2163 }
2164
2165
2166 static void
gc_fini_untrack(PyGC_Head * list)2167 gc_fini_untrack(PyGC_Head *list)
2168 {
2169 PyGC_Head *gc;
2170 for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(list)) {
2171 PyObject *op = FROM_GC(gc);
2172 _PyObject_GC_UNTRACK(op);
2173 }
2174 }
2175
2176
2177 void
_PyGC_Fini(PyInterpreterState * interp)2178 _PyGC_Fini(PyInterpreterState *interp)
2179 {
2180 GCState *gcstate = &interp->gc;
2181 Py_CLEAR(gcstate->garbage);
2182 Py_CLEAR(gcstate->callbacks);
2183
2184 if (!_Py_IsMainInterpreter(interp)) {
2185 // bpo-46070: Explicitly untrack all objects currently tracked by the
2186 // GC. Otherwise, if an object is used later by another interpreter,
2187 // calling PyObject_GC_UnTrack() on the object crashs if the previous
2188 // or the next object of the PyGC_Head structure became a dangling
2189 // pointer.
2190 for (int i = 0; i < NUM_GENERATIONS; i++) {
2191 PyGC_Head *gen = GEN_HEAD(gcstate, i);
2192 gc_fini_untrack(gen);
2193 }
2194 }
2195 }
2196
2197 /* for debugging */
2198 void
_PyGC_Dump(PyGC_Head * g)2199 _PyGC_Dump(PyGC_Head *g)
2200 {
2201 _PyObject_Dump(FROM_GC(g));
2202 }
2203
2204
2205 #ifdef Py_DEBUG
2206 static int
visit_validate(PyObject * op,void * parent_raw)2207 visit_validate(PyObject *op, void *parent_raw)
2208 {
2209 PyObject *parent = _PyObject_CAST(parent_raw);
2210 if (_PyObject_IsFreed(op)) {
2211 _PyObject_ASSERT_FAILED_MSG(parent,
2212 "PyObject_GC_Track() object is not valid");
2213 }
2214 return 0;
2215 }
2216 #endif
2217
2218
2219 /* extension modules might be compiled with GC support so these
2220 functions must always be available */
2221
2222 void
PyObject_GC_Track(void * op_raw)2223 PyObject_GC_Track(void *op_raw)
2224 {
2225 PyObject *op = _PyObject_CAST(op_raw);
2226 if (_PyObject_GC_IS_TRACKED(op)) {
2227 _PyObject_ASSERT_FAILED_MSG(op,
2228 "object already tracked "
2229 "by the garbage collector");
2230 }
2231 _PyObject_GC_TRACK(op);
2232
2233 #ifdef Py_DEBUG
2234 /* Check that the object is valid: validate objects traversed
2235 by tp_traverse() */
2236 traverseproc traverse = Py_TYPE(op)->tp_traverse;
2237 (void)traverse(op, visit_validate, op);
2238 #endif
2239 }
2240
2241 void
PyObject_GC_UnTrack(void * op_raw)2242 PyObject_GC_UnTrack(void *op_raw)
2243 {
2244 PyObject *op = _PyObject_CAST(op_raw);
2245 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to
2246 * call PyObject_GC_UnTrack twice on an object.
2247 */
2248 if (_PyObject_GC_IS_TRACKED(op)) {
2249 _PyObject_GC_UNTRACK(op);
2250 }
2251 }
2252
2253 int
PyObject_IS_GC(PyObject * obj)2254 PyObject_IS_GC(PyObject *obj)
2255 {
2256 return _PyObject_IS_GC(obj);
2257 }
2258
2259 static PyObject *
_PyObject_GC_Alloc(int use_calloc,size_t basicsize)2260 _PyObject_GC_Alloc(int use_calloc, size_t basicsize)
2261 {
2262 PyThreadState *tstate = _PyThreadState_GET();
2263 GCState *gcstate = &tstate->interp->gc;
2264 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) {
2265 return _PyErr_NoMemory(tstate);
2266 }
2267 size_t size = sizeof(PyGC_Head) + basicsize;
2268
2269 PyGC_Head *g;
2270 if (use_calloc) {
2271 g = (PyGC_Head *)PyObject_Calloc(1, size);
2272 }
2273 else {
2274 g = (PyGC_Head *)PyObject_Malloc(size);
2275 }
2276 if (g == NULL) {
2277 return _PyErr_NoMemory(tstate);
2278 }
2279 assert(((uintptr_t)g & 3) == 0); // g must be aligned 4bytes boundary
2280
2281 g->_gc_next = 0;
2282 g->_gc_prev = 0;
2283 gcstate->generations[0].count++; /* number of allocated GC objects */
2284 if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
2285 gcstate->enabled &&
2286 gcstate->generations[0].threshold &&
2287 !gcstate->collecting &&
2288 !_PyErr_Occurred(tstate))
2289 {
2290 gcstate->collecting = 1;
2291 gc_collect_generations(tstate);
2292 gcstate->collecting = 0;
2293 }
2294 PyObject *op = FROM_GC(g);
2295 return op;
2296 }
2297
2298 PyObject *
_PyObject_GC_Malloc(size_t basicsize)2299 _PyObject_GC_Malloc(size_t basicsize)
2300 {
2301 return _PyObject_GC_Alloc(0, basicsize);
2302 }
2303
2304 PyObject *
_PyObject_GC_Calloc(size_t basicsize)2305 _PyObject_GC_Calloc(size_t basicsize)
2306 {
2307 return _PyObject_GC_Alloc(1, basicsize);
2308 }
2309
2310 PyObject *
_PyObject_GC_New(PyTypeObject * tp)2311 _PyObject_GC_New(PyTypeObject *tp)
2312 {
2313 PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp));
2314 if (op == NULL) {
2315 return NULL;
2316 }
2317 _PyObject_Init(op, tp);
2318 return op;
2319 }
2320
2321 PyVarObject *
_PyObject_GC_NewVar(PyTypeObject * tp,Py_ssize_t nitems)2322 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2323 {
2324 size_t size;
2325 PyVarObject *op;
2326
2327 if (nitems < 0) {
2328 PyErr_BadInternalCall();
2329 return NULL;
2330 }
2331 size = _PyObject_VAR_SIZE(tp, nitems);
2332 op = (PyVarObject *) _PyObject_GC_Malloc(size);
2333 if (op == NULL) {
2334 return NULL;
2335 }
2336 _PyObject_InitVar(op, tp, nitems);
2337 return op;
2338 }
2339
2340 PyVarObject *
_PyObject_GC_Resize(PyVarObject * op,Py_ssize_t nitems)2341 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2342 {
2343 const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2344 _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2345 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) {
2346 return (PyVarObject *)PyErr_NoMemory();
2347 }
2348
2349 PyGC_Head *g = AS_GC(op);
2350 g = (PyGC_Head *)PyObject_Realloc(g, sizeof(PyGC_Head) + basicsize);
2351 if (g == NULL)
2352 return (PyVarObject *)PyErr_NoMemory();
2353 op = (PyVarObject *) FROM_GC(g);
2354 Py_SET_SIZE(op, nitems);
2355 return op;
2356 }
2357
2358 void
PyObject_GC_Del(void * op)2359 PyObject_GC_Del(void *op)
2360 {
2361 PyGC_Head *g = AS_GC(op);
2362 if (_PyObject_GC_IS_TRACKED(op)) {
2363 gc_list_remove(g);
2364 }
2365 GCState *gcstate = get_gc_state();
2366 if (gcstate->generations[0].count > 0) {
2367 gcstate->generations[0].count--;
2368 }
2369 PyObject_Free(g);
2370 }
2371
2372 int
PyObject_GC_IsTracked(PyObject * obj)2373 PyObject_GC_IsTracked(PyObject* obj)
2374 {
2375 if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
2376 return 1;
2377 }
2378 return 0;
2379 }
2380
2381 int
PyObject_GC_IsFinalized(PyObject * obj)2382 PyObject_GC_IsFinalized(PyObject *obj)
2383 {
2384 if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) {
2385 return 1;
2386 }
2387 return 0;
2388 }
2389