• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 
3   Reference Cycle Garbage Collection
4   ==================================
5 
6   Neil Schemenauer <nas@arctrix.com>
7 
8   Based on a post on the python-dev list.  Ideas from Guido van Rossum,
9   Eric Tiedemann, and various others.
10 
11   http://www.arctrix.com/nas/python/gc/
12 
13   The following mailing list threads provide a historical perspective on
14   the design of this module.  Note that a fair amount of refinement has
15   occurred since those discussions.
16 
17   http://mail.python.org/pipermail/python-dev/2000-March/002385.html
18   http://mail.python.org/pipermail/python-dev/2000-March/002434.html
19   http://mail.python.org/pipermail/python-dev/2000-March/002497.html
20 
21   For a highlevel view of the collection process, read the collect
22   function.
23 
24 */
25 
26 #include "Python.h"
27 #include "pycore_context.h"
28 #include "pycore_initconfig.h"
29 #include "pycore_interp.h"      // PyInterpreterState.gc
30 #include "pycore_object.h"
31 #include "pycore_pyerrors.h"
32 #include "pycore_pystate.h"     // _PyThreadState_GET()
33 #include "pydtrace.h"
34 
35 typedef struct _gc_runtime_state GCState;
36 
37 /*[clinic input]
38 module gc
39 [clinic start generated code]*/
40 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5c9690ecc842d79]*/
41 
42 
43 #ifdef Py_DEBUG
44 #  define GC_DEBUG
45 #endif
46 
47 #define GC_NEXT _PyGCHead_NEXT
48 #define GC_PREV _PyGCHead_PREV
49 
50 // update_refs() set this bit for all objects in current generation.
51 // subtract_refs() and move_unreachable() uses this to distinguish
52 // visited object is in GCing or not.
53 //
54 // move_unreachable() removes this flag from reachable objects.
55 // Only unreachable objects have this flag.
56 //
57 // No objects in interpreter have this flag after GC ends.
58 #define PREV_MASK_COLLECTING   _PyGC_PREV_MASK_COLLECTING
59 
60 // Lowest bit of _gc_next is used for UNREACHABLE flag.
61 //
62 // This flag represents the object is in unreachable list in move_unreachable()
63 //
64 // Although this flag is used only in move_unreachable(), move_unreachable()
65 // doesn't clear this flag to skip unnecessary iteration.
66 // move_legacy_finalizers() removes this flag instead.
67 // Between them, unreachable list is not normal list and we can not use
68 // most gc_list_* functions for it.
69 #define NEXT_MASK_UNREACHABLE  (1)
70 
71 /* Get an object's GC head */
72 #define AS_GC(o) ((PyGC_Head *)(o)-1)
73 
74 /* Get the object given the GC head */
75 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
76 
77 static inline int
gc_is_collecting(PyGC_Head * g)78 gc_is_collecting(PyGC_Head *g)
79 {
80     return (g->_gc_prev & PREV_MASK_COLLECTING) != 0;
81 }
82 
83 static inline void
gc_clear_collecting(PyGC_Head * g)84 gc_clear_collecting(PyGC_Head *g)
85 {
86     g->_gc_prev &= ~PREV_MASK_COLLECTING;
87 }
88 
89 static inline Py_ssize_t
gc_get_refs(PyGC_Head * g)90 gc_get_refs(PyGC_Head *g)
91 {
92     return (Py_ssize_t)(g->_gc_prev >> _PyGC_PREV_SHIFT);
93 }
94 
95 static inline void
gc_set_refs(PyGC_Head * g,Py_ssize_t refs)96 gc_set_refs(PyGC_Head *g, Py_ssize_t refs)
97 {
98     g->_gc_prev = (g->_gc_prev & ~_PyGC_PREV_MASK)
99         | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
100 }
101 
102 static inline void
gc_reset_refs(PyGC_Head * g,Py_ssize_t refs)103 gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
104 {
105     g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
106         | PREV_MASK_COLLECTING
107         | ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
108 }
109 
110 static inline void
gc_decref(PyGC_Head * g)111 gc_decref(PyGC_Head *g)
112 {
113     _PyObject_ASSERT_WITH_MSG(FROM_GC(g),
114                               gc_get_refs(g) > 0,
115                               "refcount is too small");
116     g->_gc_prev -= 1 << _PyGC_PREV_SHIFT;
117 }
118 
119 /* set for debugging information */
120 #define DEBUG_STATS             (1<<0) /* print collection statistics */
121 #define DEBUG_COLLECTABLE       (1<<1) /* print collectable objects */
122 #define DEBUG_UNCOLLECTABLE     (1<<2) /* print uncollectable objects */
123 #define DEBUG_SAVEALL           (1<<5) /* save all garbage in gc.garbage */
124 #define DEBUG_LEAK              DEBUG_COLLECTABLE | \
125                 DEBUG_UNCOLLECTABLE | \
126                 DEBUG_SAVEALL
127 
128 #define GEN_HEAD(gcstate, n) (&(gcstate)->generations[n].head)
129 
130 
131 static GCState *
get_gc_state(void)132 get_gc_state(void)
133 {
134     PyInterpreterState *interp = _PyInterpreterState_GET();
135     return &interp->gc;
136 }
137 
138 
139 void
_PyGC_InitState(GCState * gcstate)140 _PyGC_InitState(GCState *gcstate)
141 {
142     gcstate->enabled = 1; /* automatic collection enabled? */
143 
144 #define _GEN_HEAD(n) GEN_HEAD(gcstate, n)
145     struct gc_generation generations[NUM_GENERATIONS] = {
146         /* PyGC_Head,                                    threshold,    count */
147         {{(uintptr_t)_GEN_HEAD(0), (uintptr_t)_GEN_HEAD(0)},   700,        0},
148         {{(uintptr_t)_GEN_HEAD(1), (uintptr_t)_GEN_HEAD(1)},   10,         0},
149         {{(uintptr_t)_GEN_HEAD(2), (uintptr_t)_GEN_HEAD(2)},   10,         0},
150     };
151     for (int i = 0; i < NUM_GENERATIONS; i++) {
152         gcstate->generations[i] = generations[i];
153     };
154     gcstate->generation0 = GEN_HEAD(gcstate, 0);
155     struct gc_generation permanent_generation = {
156           {(uintptr_t)&gcstate->permanent_generation.head,
157            (uintptr_t)&gcstate->permanent_generation.head}, 0, 0
158     };
159     gcstate->permanent_generation = permanent_generation;
160 }
161 
162 
163 PyStatus
_PyGC_Init(PyInterpreterState * interp)164 _PyGC_Init(PyInterpreterState *interp)
165 {
166     GCState *gcstate = &interp->gc;
167 
168     gcstate->garbage = PyList_New(0);
169     if (gcstate->garbage == NULL) {
170         return _PyStatus_NO_MEMORY();
171     }
172 
173     gcstate->callbacks = PyList_New(0);
174     if (gcstate->callbacks == NULL) {
175         return _PyStatus_NO_MEMORY();
176     }
177 
178     return _PyStatus_OK();
179 }
180 
181 
182 /*
183 _gc_prev values
184 ---------------
185 
186 Between collections, _gc_prev is used for doubly linked list.
187 
188 Lowest two bits of _gc_prev are used for flags.
189 PREV_MASK_COLLECTING is used only while collecting and cleared before GC ends
190 or _PyObject_GC_UNTRACK() is called.
191 
192 During a collection, _gc_prev is temporary used for gc_refs, and the gc list
193 is singly linked until _gc_prev is restored.
194 
195 gc_refs
196     At the start of a collection, update_refs() copies the true refcount
197     to gc_refs, for each object in the generation being collected.
198     subtract_refs() then adjusts gc_refs so that it equals the number of
199     times an object is referenced directly from outside the generation
200     being collected.
201 
202 PREV_MASK_COLLECTING
203     Objects in generation being collected are marked PREV_MASK_COLLECTING in
204     update_refs().
205 
206 
207 _gc_next values
208 ---------------
209 
210 _gc_next takes these values:
211 
212 0
213     The object is not tracked
214 
215 != 0
216     Pointer to the next object in the GC list.
217     Additionally, lowest bit is used temporary for
218     NEXT_MASK_UNREACHABLE flag described below.
219 
220 NEXT_MASK_UNREACHABLE
221     move_unreachable() then moves objects not reachable (whether directly or
222     indirectly) from outside the generation into an "unreachable" set and
223     set this flag.
224 
225     Objects that are found to be reachable have gc_refs set to 1.
226     When this flag is set for the reachable object, the object must be in
227     "unreachable" set.
228     The flag is unset and the object is moved back to "reachable" set.
229 
230     move_legacy_finalizers() will remove this flag from "unreachable" set.
231 */
232 
233 /*** list functions ***/
234 
235 static inline void
gc_list_init(PyGC_Head * list)236 gc_list_init(PyGC_Head *list)
237 {
238     // List header must not have flags.
239     // We can assign pointer by simple cast.
240     list->_gc_prev = (uintptr_t)list;
241     list->_gc_next = (uintptr_t)list;
242 }
243 
244 static inline int
gc_list_is_empty(PyGC_Head * list)245 gc_list_is_empty(PyGC_Head *list)
246 {
247     return (list->_gc_next == (uintptr_t)list);
248 }
249 
250 /* Append `node` to `list`. */
251 static inline void
gc_list_append(PyGC_Head * node,PyGC_Head * list)252 gc_list_append(PyGC_Head *node, PyGC_Head *list)
253 {
254     PyGC_Head *last = (PyGC_Head *)list->_gc_prev;
255 
256     // last <-> node
257     _PyGCHead_SET_PREV(node, last);
258     _PyGCHead_SET_NEXT(last, node);
259 
260     // node <-> list
261     _PyGCHead_SET_NEXT(node, list);
262     list->_gc_prev = (uintptr_t)node;
263 }
264 
265 /* Remove `node` from the gc list it's currently in. */
266 static inline void
gc_list_remove(PyGC_Head * node)267 gc_list_remove(PyGC_Head *node)
268 {
269     PyGC_Head *prev = GC_PREV(node);
270     PyGC_Head *next = GC_NEXT(node);
271 
272     _PyGCHead_SET_NEXT(prev, next);
273     _PyGCHead_SET_PREV(next, prev);
274 
275     node->_gc_next = 0; /* object is not currently tracked */
276 }
277 
278 /* Move `node` from the gc list it's currently in (which is not explicitly
279  * named here) to the end of `list`.  This is semantically the same as
280  * gc_list_remove(node) followed by gc_list_append(node, list).
281  */
282 static void
gc_list_move(PyGC_Head * node,PyGC_Head * list)283 gc_list_move(PyGC_Head *node, PyGC_Head *list)
284 {
285     /* Unlink from current list. */
286     PyGC_Head *from_prev = GC_PREV(node);
287     PyGC_Head *from_next = GC_NEXT(node);
288     _PyGCHead_SET_NEXT(from_prev, from_next);
289     _PyGCHead_SET_PREV(from_next, from_prev);
290 
291     /* Relink at end of new list. */
292     // list must not have flags.  So we can skip macros.
293     PyGC_Head *to_prev = (PyGC_Head*)list->_gc_prev;
294     _PyGCHead_SET_PREV(node, to_prev);
295     _PyGCHead_SET_NEXT(to_prev, node);
296     list->_gc_prev = (uintptr_t)node;
297     _PyGCHead_SET_NEXT(node, list);
298 }
299 
300 /* append list `from` onto list `to`; `from` becomes an empty list */
301 static void
gc_list_merge(PyGC_Head * from,PyGC_Head * to)302 gc_list_merge(PyGC_Head *from, PyGC_Head *to)
303 {
304     assert(from != to);
305     if (!gc_list_is_empty(from)) {
306         PyGC_Head *to_tail = GC_PREV(to);
307         PyGC_Head *from_head = GC_NEXT(from);
308         PyGC_Head *from_tail = GC_PREV(from);
309         assert(from_head != from);
310         assert(from_tail != from);
311 
312         _PyGCHead_SET_NEXT(to_tail, from_head);
313         _PyGCHead_SET_PREV(from_head, to_tail);
314 
315         _PyGCHead_SET_NEXT(from_tail, to);
316         _PyGCHead_SET_PREV(to, from_tail);
317     }
318     gc_list_init(from);
319 }
320 
321 static Py_ssize_t
gc_list_size(PyGC_Head * list)322 gc_list_size(PyGC_Head *list)
323 {
324     PyGC_Head *gc;
325     Py_ssize_t n = 0;
326     for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
327         n++;
328     }
329     return n;
330 }
331 
332 /* Walk the list and mark all objects as non-collecting */
333 static inline void
gc_list_clear_collecting(PyGC_Head * collectable)334 gc_list_clear_collecting(PyGC_Head *collectable)
335 {
336     PyGC_Head *gc;
337     for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) {
338         gc_clear_collecting(gc);
339     }
340 }
341 
342 /* Append objects in a GC list to a Python list.
343  * Return 0 if all OK, < 0 if error (out of memory for list)
344  */
345 static int
append_objects(PyObject * py_list,PyGC_Head * gc_list)346 append_objects(PyObject *py_list, PyGC_Head *gc_list)
347 {
348     PyGC_Head *gc;
349     for (gc = GC_NEXT(gc_list); gc != gc_list; gc = GC_NEXT(gc)) {
350         PyObject *op = FROM_GC(gc);
351         if (op != py_list) {
352             if (PyList_Append(py_list, op)) {
353                 return -1; /* exception */
354             }
355         }
356     }
357     return 0;
358 }
359 
360 // Constants for validate_list's flags argument.
361 enum flagstates {collecting_clear_unreachable_clear,
362                  collecting_clear_unreachable_set,
363                  collecting_set_unreachable_clear,
364                  collecting_set_unreachable_set};
365 
366 #ifdef GC_DEBUG
367 // validate_list checks list consistency.  And it works as document
368 // describing when flags are expected to be set / unset.
369 // `head` must be a doubly-linked gc list, although it's fine (expected!) if
370 // the prev and next pointers are "polluted" with flags.
371 // What's checked:
372 // - The `head` pointers are not polluted.
373 // - The objects' PREV_MASK_COLLECTING and NEXT_MASK_UNREACHABLE flags are all
374 //   `set or clear, as specified by the 'flags' argument.
375 // - The prev and next pointers are mutually consistent.
376 static void
validate_list(PyGC_Head * head,enum flagstates flags)377 validate_list(PyGC_Head *head, enum flagstates flags)
378 {
379     assert((head->_gc_prev & PREV_MASK_COLLECTING) == 0);
380     assert((head->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
381     uintptr_t prev_value = 0, next_value = 0;
382     switch (flags) {
383         case collecting_clear_unreachable_clear:
384             break;
385         case collecting_set_unreachable_clear:
386             prev_value = PREV_MASK_COLLECTING;
387             break;
388         case collecting_clear_unreachable_set:
389             next_value = NEXT_MASK_UNREACHABLE;
390             break;
391         case collecting_set_unreachable_set:
392             prev_value = PREV_MASK_COLLECTING;
393             next_value = NEXT_MASK_UNREACHABLE;
394             break;
395         default:
396             assert(! "bad internal flags argument");
397     }
398     PyGC_Head *prev = head;
399     PyGC_Head *gc = GC_NEXT(head);
400     while (gc != head) {
401         PyGC_Head *trueprev = GC_PREV(gc);
402         PyGC_Head *truenext = (PyGC_Head *)(gc->_gc_next  & ~NEXT_MASK_UNREACHABLE);
403         assert(truenext != NULL);
404         assert(trueprev == prev);
405         assert((gc->_gc_prev & PREV_MASK_COLLECTING) == prev_value);
406         assert((gc->_gc_next & NEXT_MASK_UNREACHABLE) == next_value);
407         prev = gc;
408         gc = truenext;
409     }
410     assert(prev == GC_PREV(head));
411 }
412 #else
413 #define validate_list(x, y) do{}while(0)
414 #endif
415 
416 /*** end of list stuff ***/
417 
418 
419 /* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
420  * PREV_MASK_COLLECTING bit is set for all objects in containers.
421  */
422 static void
update_refs(PyGC_Head * containers)423 update_refs(PyGC_Head *containers)
424 {
425     PyGC_Head *gc = GC_NEXT(containers);
426     for (; gc != containers; gc = GC_NEXT(gc)) {
427         gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc)));
428         /* Python's cyclic gc should never see an incoming refcount
429          * of 0:  if something decref'ed to 0, it should have been
430          * deallocated immediately at that time.
431          * Possible cause (if the assert triggers):  a tp_dealloc
432          * routine left a gc-aware object tracked during its teardown
433          * phase, and did something-- or allowed something to happen --
434          * that called back into Python.  gc can trigger then, and may
435          * see the still-tracked dying object.  Before this assert
436          * was added, such mistakes went on to allow gc to try to
437          * delete the object again.  In a debug build, that caused
438          * a mysterious segfault, when _Py_ForgetReference tried
439          * to remove the object from the doubly-linked list of all
440          * objects a second time.  In a release build, an actual
441          * double deallocation occurred, which leads to corruption
442          * of the allocator's internal bookkeeping pointers.  That's
443          * so serious that maybe this should be a release-build
444          * check instead of an assert?
445          */
446         _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0);
447     }
448 }
449 
450 /* A traversal callback for subtract_refs. */
451 static int
visit_decref(PyObject * op,void * parent)452 visit_decref(PyObject *op, void *parent)
453 {
454     _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op));
455 
456     if (_PyObject_IS_GC(op)) {
457         PyGC_Head *gc = AS_GC(op);
458         /* We're only interested in gc_refs for objects in the
459          * generation being collected, which can be recognized
460          * because only they have positive gc_refs.
461          */
462         if (gc_is_collecting(gc)) {
463             gc_decref(gc);
464         }
465     }
466     return 0;
467 }
468 
469 /* Subtract internal references from gc_refs.  After this, gc_refs is >= 0
470  * for all objects in containers, and is GC_REACHABLE for all tracked gc
471  * objects not in containers.  The ones with gc_refs > 0 are directly
472  * reachable from outside containers, and so can't be collected.
473  */
474 static void
subtract_refs(PyGC_Head * containers)475 subtract_refs(PyGC_Head *containers)
476 {
477     traverseproc traverse;
478     PyGC_Head *gc = GC_NEXT(containers);
479     for (; gc != containers; gc = GC_NEXT(gc)) {
480         PyObject *op = FROM_GC(gc);
481         traverse = Py_TYPE(op)->tp_traverse;
482         (void) traverse(FROM_GC(gc),
483                        (visitproc)visit_decref,
484                        op);
485     }
486 }
487 
488 /* A traversal callback for move_unreachable. */
489 static int
visit_reachable(PyObject * op,PyGC_Head * reachable)490 visit_reachable(PyObject *op, PyGC_Head *reachable)
491 {
492     if (!_PyObject_IS_GC(op)) {
493         return 0;
494     }
495 
496     PyGC_Head *gc = AS_GC(op);
497     const Py_ssize_t gc_refs = gc_get_refs(gc);
498 
499     // Ignore objects in other generation.
500     // This also skips objects "to the left" of the current position in
501     // move_unreachable's scan of the 'young' list - they've already been
502     // traversed, and no longer have the PREV_MASK_COLLECTING flag.
503     if (! gc_is_collecting(gc)) {
504         return 0;
505     }
506     // It would be a logic error elsewhere if the collecting flag were set on
507     // an untracked object.
508     assert(gc->_gc_next != 0);
509 
510     if (gc->_gc_next & NEXT_MASK_UNREACHABLE) {
511         /* This had gc_refs = 0 when move_unreachable got
512          * to it, but turns out it's reachable after all.
513          * Move it back to move_unreachable's 'young' list,
514          * and move_unreachable will eventually get to it
515          * again.
516          */
517         // Manually unlink gc from unreachable list because the list functions
518         // don't work right in the presence of NEXT_MASK_UNREACHABLE flags.
519         PyGC_Head *prev = GC_PREV(gc);
520         PyGC_Head *next = (PyGC_Head*)(gc->_gc_next & ~NEXT_MASK_UNREACHABLE);
521         _PyObject_ASSERT(FROM_GC(prev),
522                          prev->_gc_next & NEXT_MASK_UNREACHABLE);
523         _PyObject_ASSERT(FROM_GC(next),
524                          next->_gc_next & NEXT_MASK_UNREACHABLE);
525         prev->_gc_next = gc->_gc_next;  // copy NEXT_MASK_UNREACHABLE
526         _PyGCHead_SET_PREV(next, prev);
527 
528         gc_list_append(gc, reachable);
529         gc_set_refs(gc, 1);
530     }
531     else if (gc_refs == 0) {
532         /* This is in move_unreachable's 'young' list, but
533          * the traversal hasn't yet gotten to it.  All
534          * we need to do is tell move_unreachable that it's
535          * reachable.
536          */
537         gc_set_refs(gc, 1);
538     }
539     /* Else there's nothing to do.
540      * If gc_refs > 0, it must be in move_unreachable's 'young'
541      * list, and move_unreachable will eventually get to it.
542      */
543     else {
544         _PyObject_ASSERT_WITH_MSG(op, gc_refs > 0, "refcount is too small");
545     }
546     return 0;
547 }
548 
549 /* Move the unreachable objects from young to unreachable.  After this,
550  * all objects in young don't have PREV_MASK_COLLECTING flag and
551  * unreachable have the flag.
552  * All objects in young after this are directly or indirectly reachable
553  * from outside the original young; and all objects in unreachable are
554  * not.
555  *
556  * This function restores _gc_prev pointer.  young and unreachable are
557  * doubly linked list after this function.
558  * But _gc_next in unreachable list has NEXT_MASK_UNREACHABLE flag.
559  * So we can not gc_list_* functions for unreachable until we remove the flag.
560  */
561 static void
move_unreachable(PyGC_Head * young,PyGC_Head * unreachable)562 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
563 {
564     // previous elem in the young list, used for restore gc_prev.
565     PyGC_Head *prev = young;
566     PyGC_Head *gc = GC_NEXT(young);
567 
568     /* Invariants:  all objects "to the left" of us in young are reachable
569      * (directly or indirectly) from outside the young list as it was at entry.
570      *
571      * All other objects from the original young "to the left" of us are in
572      * unreachable now, and have NEXT_MASK_UNREACHABLE.  All objects to the
573      * left of us in 'young' now have been scanned, and no objects here
574      * or to the right have been scanned yet.
575      */
576 
577     while (gc != young) {
578         if (gc_get_refs(gc)) {
579             /* gc is definitely reachable from outside the
580              * original 'young'.  Mark it as such, and traverse
581              * its pointers to find any other objects that may
582              * be directly reachable from it.  Note that the
583              * call to tp_traverse may append objects to young,
584              * so we have to wait until it returns to determine
585              * the next object to visit.
586              */
587             PyObject *op = FROM_GC(gc);
588             traverseproc traverse = Py_TYPE(op)->tp_traverse;
589             _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(gc) > 0,
590                                       "refcount is too small");
591             // NOTE: visit_reachable may change gc->_gc_next when
592             // young->_gc_prev == gc.  Don't do gc = GC_NEXT(gc) before!
593             (void) traverse(op,
594                     (visitproc)visit_reachable,
595                     (void *)young);
596             // relink gc_prev to prev element.
597             _PyGCHead_SET_PREV(gc, prev);
598             // gc is not COLLECTING state after here.
599             gc_clear_collecting(gc);
600             prev = gc;
601         }
602         else {
603             /* This *may* be unreachable.  To make progress,
604              * assume it is.  gc isn't directly reachable from
605              * any object we've already traversed, but may be
606              * reachable from an object we haven't gotten to yet.
607              * visit_reachable will eventually move gc back into
608              * young if that's so, and we'll see it again.
609              */
610             // Move gc to unreachable.
611             // No need to gc->next->prev = prev because it is single linked.
612             prev->_gc_next = gc->_gc_next;
613 
614             // We can't use gc_list_append() here because we use
615             // NEXT_MASK_UNREACHABLE here.
616             PyGC_Head *last = GC_PREV(unreachable);
617             // NOTE: Since all objects in unreachable set has
618             // NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
619             // But this may pollute the unreachable list head's 'next' pointer
620             // too. That's semantically senseless but expedient here - the
621             // damage is repaired when this function ends.
622             last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
623             _PyGCHead_SET_PREV(gc, last);
624             gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
625             unreachable->_gc_prev = (uintptr_t)gc;
626         }
627         gc = (PyGC_Head*)prev->_gc_next;
628     }
629     // young->_gc_prev must be last element remained in the list.
630     young->_gc_prev = (uintptr_t)prev;
631     // don't let the pollution of the list head's next pointer leak
632     unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE;
633 }
634 
635 static void
untrack_tuples(PyGC_Head * head)636 untrack_tuples(PyGC_Head *head)
637 {
638     PyGC_Head *next, *gc = GC_NEXT(head);
639     while (gc != head) {
640         PyObject *op = FROM_GC(gc);
641         next = GC_NEXT(gc);
642         if (PyTuple_CheckExact(op)) {
643             _PyTuple_MaybeUntrack(op);
644         }
645         gc = next;
646     }
647 }
648 
649 /* Try to untrack all currently tracked dictionaries */
650 static void
untrack_dicts(PyGC_Head * head)651 untrack_dicts(PyGC_Head *head)
652 {
653     PyGC_Head *next, *gc = GC_NEXT(head);
654     while (gc != head) {
655         PyObject *op = FROM_GC(gc);
656         next = GC_NEXT(gc);
657         if (PyDict_CheckExact(op)) {
658             _PyDict_MaybeUntrack(op);
659         }
660         gc = next;
661     }
662 }
663 
664 /* Return true if object has a pre-PEP 442 finalization method. */
665 static int
has_legacy_finalizer(PyObject * op)666 has_legacy_finalizer(PyObject *op)
667 {
668     return Py_TYPE(op)->tp_del != NULL;
669 }
670 
671 /* Move the objects in unreachable with tp_del slots into `finalizers`.
672  *
673  * This function also removes NEXT_MASK_UNREACHABLE flag
674  * from _gc_next in unreachable.
675  */
676 static void
move_legacy_finalizers(PyGC_Head * unreachable,PyGC_Head * finalizers)677 move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
678 {
679     PyGC_Head *gc, *next;
680     assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
681 
682     /* March over unreachable.  Move objects with finalizers into
683      * `finalizers`.
684      */
685     for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
686         PyObject *op = FROM_GC(gc);
687 
688         _PyObject_ASSERT(op, gc->_gc_next & NEXT_MASK_UNREACHABLE);
689         gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
690         next = (PyGC_Head*)gc->_gc_next;
691 
692         if (has_legacy_finalizer(op)) {
693             gc_clear_collecting(gc);
694             gc_list_move(gc, finalizers);
695         }
696     }
697 }
698 
699 static inline void
clear_unreachable_mask(PyGC_Head * unreachable)700 clear_unreachable_mask(PyGC_Head *unreachable)
701 {
702     /* Check that the list head does not have the unreachable bit set */
703     assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0);
704 
705     PyGC_Head *gc, *next;
706     assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0);
707     for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
708         _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE);
709         gc->_gc_next &= ~NEXT_MASK_UNREACHABLE;
710         next = (PyGC_Head*)gc->_gc_next;
711     }
712     validate_list(unreachable, collecting_set_unreachable_clear);
713 }
714 
715 /* A traversal callback for move_legacy_finalizer_reachable. */
716 static int
visit_move(PyObject * op,PyGC_Head * tolist)717 visit_move(PyObject *op, PyGC_Head *tolist)
718 {
719     if (_PyObject_IS_GC(op)) {
720         PyGC_Head *gc = AS_GC(op);
721         if (gc_is_collecting(gc)) {
722             gc_list_move(gc, tolist);
723             gc_clear_collecting(gc);
724         }
725     }
726     return 0;
727 }
728 
729 /* Move objects that are reachable from finalizers, from the unreachable set
730  * into finalizers set.
731  */
732 static void
move_legacy_finalizer_reachable(PyGC_Head * finalizers)733 move_legacy_finalizer_reachable(PyGC_Head *finalizers)
734 {
735     traverseproc traverse;
736     PyGC_Head *gc = GC_NEXT(finalizers);
737     for (; gc != finalizers; gc = GC_NEXT(gc)) {
738         /* Note that the finalizers list may grow during this. */
739         traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
740         (void) traverse(FROM_GC(gc),
741                         (visitproc)visit_move,
742                         (void *)finalizers);
743     }
744 }
745 
746 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
747  * callback, invoke it if necessary.  Note that it's possible for such
748  * weakrefs to be outside the unreachable set -- indeed, those are precisely
749  * the weakrefs whose callbacks must be invoked.  See gc_weakref.txt for
750  * overview & some details.  Some weakrefs with callbacks may be reclaimed
751  * directly by this routine; the number reclaimed is the return value.  Other
752  * weakrefs with callbacks may be moved into the `old` generation.  Objects
753  * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
754  * unreachable are left at GC_TENTATIVELY_UNREACHABLE.  When this returns,
755  * no object in `unreachable` is weakly referenced anymore.
756  */
757 static int
handle_weakrefs(PyGC_Head * unreachable,PyGC_Head * old)758 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
759 {
760     PyGC_Head *gc;
761     PyObject *op;               /* generally FROM_GC(gc) */
762     PyWeakReference *wr;        /* generally a cast of op */
763     PyGC_Head wrcb_to_call;     /* weakrefs with callbacks to call */
764     PyGC_Head *next;
765     int num_freed = 0;
766 
767     gc_list_init(&wrcb_to_call);
768 
769     /* Clear all weakrefs to the objects in unreachable.  If such a weakref
770      * also has a callback, move it into `wrcb_to_call` if the callback
771      * needs to be invoked.  Note that we cannot invoke any callbacks until
772      * all weakrefs to unreachable objects are cleared, lest the callback
773      * resurrect an unreachable object via a still-active weakref.  We
774      * make another pass over wrcb_to_call, invoking callbacks, after this
775      * pass completes.
776      */
777     for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
778         PyWeakReference **wrlist;
779 
780         op = FROM_GC(gc);
781         next = GC_NEXT(gc);
782 
783         if (PyWeakref_Check(op)) {
784             /* A weakref inside the unreachable set must be cleared.  If we
785              * allow its callback to execute inside delete_garbage(), it
786              * could expose objects that have tp_clear already called on
787              * them.  Or, it could resurrect unreachable objects.  One way
788              * this can happen is if some container objects do not implement
789              * tp_traverse.  Then, wr_object can be outside the unreachable
790              * set but can be deallocated as a result of breaking the
791              * reference cycle.  If we don't clear the weakref, the callback
792              * will run and potentially cause a crash.  See bpo-38006 for
793              * one example.
794              */
795             _PyWeakref_ClearRef((PyWeakReference *)op);
796         }
797 
798         if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
799             continue;
800 
801         /* It supports weakrefs.  Does it have any? */
802         wrlist = (PyWeakReference **)
803                                 _PyObject_GET_WEAKREFS_LISTPTR(op);
804 
805         /* `op` may have some weakrefs.  March over the list, clear
806          * all the weakrefs, and move the weakrefs with callbacks
807          * that must be called into wrcb_to_call.
808          */
809         for (wr = *wrlist; wr != NULL; wr = *wrlist) {
810             PyGC_Head *wrasgc;                  /* AS_GC(wr) */
811 
812             /* _PyWeakref_ClearRef clears the weakref but leaves
813              * the callback pointer intact.  Obscure:  it also
814              * changes *wrlist.
815              */
816             _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
817             _PyWeakref_ClearRef(wr);
818             _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
819             if (wr->wr_callback == NULL) {
820                 /* no callback */
821                 continue;
822             }
823 
824             /* Headache time.  `op` is going away, and is weakly referenced by
825              * `wr`, which has a callback.  Should the callback be invoked?  If wr
826              * is also trash, no:
827              *
828              * 1. There's no need to call it.  The object and the weakref are
829              *    both going away, so it's legitimate to pretend the weakref is
830              *    going away first.  The user has to ensure a weakref outlives its
831              *    referent if they want a guarantee that the wr callback will get
832              *    invoked.
833              *
834              * 2. It may be catastrophic to call it.  If the callback is also in
835              *    cyclic trash (CT), then although the CT is unreachable from
836              *    outside the current generation, CT may be reachable from the
837              *    callback.  Then the callback could resurrect insane objects.
838              *
839              * Since the callback is never needed and may be unsafe in this case,
840              * wr is simply left in the unreachable set.  Note that because we
841              * already called _PyWeakref_ClearRef(wr), its callback will never
842              * trigger.
843              *
844              * OTOH, if wr isn't part of CT, we should invoke the callback:  the
845              * weakref outlived the trash.  Note that since wr isn't CT in this
846              * case, its callback can't be CT either -- wr acted as an external
847              * root to this generation, and therefore its callback did too.  So
848              * nothing in CT is reachable from the callback either, so it's hard
849              * to imagine how calling it later could create a problem for us.  wr
850              * is moved to wrcb_to_call in this case.
851              */
852             if (gc_is_collecting(AS_GC(wr))) {
853                 /* it should already have been cleared above */
854                 assert(wr->wr_object == Py_None);
855                 continue;
856             }
857 
858             /* Create a new reference so that wr can't go away
859              * before we can process it again.
860              */
861             Py_INCREF(wr);
862 
863             /* Move wr to wrcb_to_call, for the next pass. */
864             wrasgc = AS_GC(wr);
865             assert(wrasgc != next); /* wrasgc is reachable, but
866                                        next isn't, so they can't
867                                        be the same */
868             gc_list_move(wrasgc, &wrcb_to_call);
869         }
870     }
871 
872     /* Invoke the callbacks we decided to honor.  It's safe to invoke them
873      * because they can't reference unreachable objects.
874      */
875     while (! gc_list_is_empty(&wrcb_to_call)) {
876         PyObject *temp;
877         PyObject *callback;
878 
879         gc = (PyGC_Head*)wrcb_to_call._gc_next;
880         op = FROM_GC(gc);
881         _PyObject_ASSERT(op, PyWeakref_Check(op));
882         wr = (PyWeakReference *)op;
883         callback = wr->wr_callback;
884         _PyObject_ASSERT(op, callback != NULL);
885 
886         /* copy-paste of weakrefobject.c's handle_callback() */
887         temp = PyObject_CallOneArg(callback, (PyObject *)wr);
888         if (temp == NULL)
889             PyErr_WriteUnraisable(callback);
890         else
891             Py_DECREF(temp);
892 
893         /* Give up the reference we created in the first pass.  When
894          * op's refcount hits 0 (which it may or may not do right now),
895          * op's tp_dealloc will decref op->wr_callback too.  Note
896          * that the refcount probably will hit 0 now, and because this
897          * weakref was reachable to begin with, gc didn't already
898          * add it to its count of freed objects.  Example:  a reachable
899          * weak value dict maps some key to this reachable weakref.
900          * The callback removes this key->weakref mapping from the
901          * dict, leaving no other references to the weakref (excepting
902          * ours).
903          */
904         Py_DECREF(op);
905         if (wrcb_to_call._gc_next == (uintptr_t)gc) {
906             /* object is still alive -- move it */
907             gc_list_move(gc, old);
908         }
909         else {
910             ++num_freed;
911         }
912     }
913 
914     return num_freed;
915 }
916 
917 static void
debug_cycle(const char * msg,PyObject * op)918 debug_cycle(const char *msg, PyObject *op)
919 {
920     PySys_FormatStderr("gc: %s <%s %p>\n",
921                        msg, Py_TYPE(op)->tp_name, op);
922 }
923 
924 /* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
925  * only from such cycles).
926  * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
927  * garbage list (a Python list), else only the objects in finalizers with
928  * __del__ methods are appended to garbage.  All objects in finalizers are
929  * merged into the old list regardless.
930  */
931 static void
handle_legacy_finalizers(PyThreadState * tstate,GCState * gcstate,PyGC_Head * finalizers,PyGC_Head * old)932 handle_legacy_finalizers(PyThreadState *tstate,
933                          GCState *gcstate,
934                          PyGC_Head *finalizers, PyGC_Head *old)
935 {
936     assert(!_PyErr_Occurred(tstate));
937     assert(gcstate->garbage != NULL);
938 
939     PyGC_Head *gc = GC_NEXT(finalizers);
940     for (; gc != finalizers; gc = GC_NEXT(gc)) {
941         PyObject *op = FROM_GC(gc);
942 
943         if ((gcstate->debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
944             if (PyList_Append(gcstate->garbage, op) < 0) {
945                 _PyErr_Clear(tstate);
946                 break;
947             }
948         }
949     }
950 
951     gc_list_merge(finalizers, old);
952 }
953 
954 /* Run first-time finalizers (if any) on all the objects in collectable.
955  * Note that this may remove some (or even all) of the objects from the
956  * list, due to refcounts falling to 0.
957  */
958 static void
finalize_garbage(PyThreadState * tstate,PyGC_Head * collectable)959 finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable)
960 {
961     destructor finalize;
962     PyGC_Head seen;
963 
964     /* While we're going through the loop, `finalize(op)` may cause op, or
965      * other objects, to be reclaimed via refcounts falling to zero.  So
966      * there's little we can rely on about the structure of the input
967      * `collectable` list across iterations.  For safety, we always take the
968      * first object in that list and move it to a temporary `seen` list.
969      * If objects vanish from the `collectable` and `seen` lists we don't
970      * care.
971      */
972     gc_list_init(&seen);
973 
974     while (!gc_list_is_empty(collectable)) {
975         PyGC_Head *gc = GC_NEXT(collectable);
976         PyObject *op = FROM_GC(gc);
977         gc_list_move(gc, &seen);
978         if (!_PyGCHead_FINALIZED(gc) &&
979                 (finalize = Py_TYPE(op)->tp_finalize) != NULL) {
980             _PyGCHead_SET_FINALIZED(gc);
981             Py_INCREF(op);
982             finalize(op);
983             assert(!_PyErr_Occurred(tstate));
984             Py_DECREF(op);
985         }
986     }
987     gc_list_merge(&seen, collectable);
988 }
989 
990 /* Break reference cycles by clearing the containers involved.  This is
991  * tricky business as the lists can be changing and we don't know which
992  * objects may be freed.  It is possible I screwed something up here.
993  */
994 static void
delete_garbage(PyThreadState * tstate,GCState * gcstate,PyGC_Head * collectable,PyGC_Head * old)995 delete_garbage(PyThreadState *tstate, GCState *gcstate,
996                PyGC_Head *collectable, PyGC_Head *old)
997 {
998     assert(!_PyErr_Occurred(tstate));
999 
1000     while (!gc_list_is_empty(collectable)) {
1001         PyGC_Head *gc = GC_NEXT(collectable);
1002         PyObject *op = FROM_GC(gc);
1003 
1004         _PyObject_ASSERT_WITH_MSG(op, Py_REFCNT(op) > 0,
1005                                   "refcount is too small");
1006 
1007         if (gcstate->debug & DEBUG_SAVEALL) {
1008             assert(gcstate->garbage != NULL);
1009             if (PyList_Append(gcstate->garbage, op) < 0) {
1010                 _PyErr_Clear(tstate);
1011             }
1012         }
1013         else {
1014             inquiry clear;
1015             if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
1016                 Py_INCREF(op);
1017                 (void) clear(op);
1018                 if (_PyErr_Occurred(tstate)) {
1019                     _PyErr_WriteUnraisableMsg("in tp_clear of",
1020                                               (PyObject*)Py_TYPE(op));
1021                 }
1022                 Py_DECREF(op);
1023             }
1024         }
1025         if (GC_NEXT(collectable) == gc) {
1026             /* object is still alive, move it, it may die later */
1027             gc_clear_collecting(gc);
1028             gc_list_move(gc, old);
1029         }
1030     }
1031 }
1032 
1033 /* Clear all free lists
1034  * All free lists are cleared during the collection of the highest generation.
1035  * Allocated items in the free list may keep a pymalloc arena occupied.
1036  * Clearing the free lists may give back memory to the OS earlier.
1037  */
1038 static void
clear_freelists(PyInterpreterState * interp)1039 clear_freelists(PyInterpreterState *interp)
1040 {
1041     _PyFrame_ClearFreeList(interp);
1042     _PyTuple_ClearFreeList(interp);
1043     _PyFloat_ClearFreeList(interp);
1044     _PyList_ClearFreeList(interp);
1045     _PyDict_ClearFreeList(interp);
1046     _PyAsyncGen_ClearFreeLists(interp);
1047     _PyContext_ClearFreeList(interp);
1048 }
1049 
1050 // Show stats for objects in each generations
1051 static void
show_stats_each_generations(GCState * gcstate)1052 show_stats_each_generations(GCState *gcstate)
1053 {
1054     char buf[100];
1055     size_t pos = 0;
1056 
1057     for (int i = 0; i < NUM_GENERATIONS && pos < sizeof(buf); i++) {
1058         pos += PyOS_snprintf(buf+pos, sizeof(buf)-pos,
1059                              " %zd",
1060                              gc_list_size(GEN_HEAD(gcstate, i)));
1061     }
1062 
1063     PySys_FormatStderr(
1064         "gc: objects in each generation:%s\n"
1065         "gc: objects in permanent generation: %zd\n",
1066         buf, gc_list_size(&gcstate->permanent_generation.head));
1067 }
1068 
1069 /* Deduce which objects among "base" are unreachable from outside the list
1070    and move them to 'unreachable'. The process consist in the following steps:
1071 
1072 1. Copy all reference counts to a different field (gc_prev is used to hold
1073    this copy to save memory).
1074 2. Traverse all objects in "base" and visit all referred objects using
1075    "tp_traverse" and for every visited object, subtract 1 to the reference
1076    count (the one that we copied in the previous step). After this step, all
1077    objects that can be reached directly from outside must have strictly positive
1078    reference count, while all unreachable objects must have a count of exactly 0.
1079 3. Identify all unreachable objects (the ones with 0 reference count) and move
1080    them to the "unreachable" list. This step also needs to move back to "base" all
1081    objects that were initially marked as unreachable but are referred transitively
1082    by the reachable objects (the ones with strictly positive reference count).
1083 
1084 Contracts:
1085 
1086     * The "base" has to be a valid list with no mask set.
1087 
1088     * The "unreachable" list must be uninitialized (this function calls
1089       gc_list_init over 'unreachable').
1090 
1091 IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE
1092 flag set but it does not clear it to skip unnecessary iteration. Before the
1093 flag is cleared (for example, by using 'clear_unreachable_mask' function or
1094 by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
1095 list and we can not use most gc_list_* functions for it. */
1096 static inline void
deduce_unreachable(PyGC_Head * base,PyGC_Head * unreachable)1097 deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
1098     validate_list(base, collecting_clear_unreachable_clear);
1099     /* Using ob_refcnt and gc_refs, calculate which objects in the
1100      * container set are reachable from outside the set (i.e., have a
1101      * refcount greater than 0 when all the references within the
1102      * set are taken into account).
1103      */
1104     update_refs(base);  // gc_prev is used for gc_refs
1105     subtract_refs(base);
1106 
1107     /* Leave everything reachable from outside base in base, and move
1108      * everything else (in base) to unreachable.
1109      *
1110      * NOTE:  This used to move the reachable objects into a reachable
1111      * set instead.  But most things usually turn out to be reachable,
1112      * so it's more efficient to move the unreachable things.  It "sounds slick"
1113      * to move the unreachable objects, until you think about it - the reason it
1114      * pays isn't actually obvious.
1115      *
1116      * Suppose we create objects A, B, C in that order.  They appear in the young
1117      * generation in the same order.  If B points to A, and C to B, and C is
1118      * reachable from outside, then the adjusted refcounts will be 0, 0, and 1
1119      * respectively.
1120      *
1121      * When move_unreachable finds A, A is moved to the unreachable list.  The
1122      * same for B when it's first encountered.  Then C is traversed, B is moved
1123      * _back_ to the reachable list.  B is eventually traversed, and then A is
1124      * moved back to the reachable list.
1125      *
1126      * So instead of not moving at all, the reachable objects B and A are moved
1127      * twice each.  Why is this a win?  A straightforward algorithm to move the
1128      * reachable objects instead would move A, B, and C once each.
1129      *
1130      * The key is that this dance leaves the objects in order C, B, A - it's
1131      * reversed from the original order.  On all _subsequent_ scans, none of
1132      * them will move.  Since most objects aren't in cycles, this can save an
1133      * unbounded number of moves across an unbounded number of later collections.
1134      * It can cost more only the first time the chain is scanned.
1135      *
1136      * Drawback:  move_unreachable is also used to find out what's still trash
1137      * after finalizers may resurrect objects.  In _that_ case most unreachable
1138      * objects will remain unreachable, so it would be more efficient to move
1139      * the reachable objects instead.  But this is a one-time cost, probably not
1140      * worth complicating the code to speed just a little.
1141      */
1142     gc_list_init(unreachable);
1143     move_unreachable(base, unreachable);  // gc_prev is pointer again
1144     validate_list(base, collecting_clear_unreachable_clear);
1145     validate_list(unreachable, collecting_set_unreachable_set);
1146 }
1147 
1148 /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
1149    them to 'old_generation' and placing the rest on 'still_unreachable'.
1150 
1151    Contracts:
1152        * After this function 'unreachable' must not be used anymore and 'still_unreachable'
1153          will contain the objects that did not resurrect.
1154 
1155        * The "still_unreachable" list must be uninitialized (this function calls
1156          gc_list_init over 'still_unreachable').
1157 
1158 IMPORTANT: After a call to this function, the 'still_unreachable' set will have the
1159 PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so
1160 we can skip the expense of clearing the flag to avoid extra iteration. */
1161 static inline void
handle_resurrected_objects(PyGC_Head * unreachable,PyGC_Head * still_unreachable,PyGC_Head * old_generation)1162 handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
1163                            PyGC_Head *old_generation)
1164 {
1165     // Remove the PREV_MASK_COLLECTING from unreachable
1166     // to prepare it for a new call to 'deduce_unreachable'
1167     gc_list_clear_collecting(unreachable);
1168 
1169     // After the call to deduce_unreachable, the 'still_unreachable' set will
1170     // have the PREV_MARK_COLLECTING set, but the objects are going to be
1171     // removed so we can skip the expense of clearing the flag.
1172     PyGC_Head* resurrected = unreachable;
1173     deduce_unreachable(resurrected, still_unreachable);
1174     clear_unreachable_mask(still_unreachable);
1175 
1176     // Move the resurrected objects to the old generation for future collection.
1177     gc_list_merge(resurrected, old_generation);
1178 }
1179 
1180 /* This is the main function.  Read this to understand how the
1181  * collection process works. */
1182 static Py_ssize_t
gc_collect_main(PyThreadState * tstate,int generation,Py_ssize_t * n_collected,Py_ssize_t * n_uncollectable,int nofail)1183 gc_collect_main(PyThreadState *tstate, int generation,
1184                 Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
1185                 int nofail)
1186 {
1187     int i;
1188     Py_ssize_t m = 0; /* # objects collected */
1189     Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
1190     PyGC_Head *young; /* the generation we are examining */
1191     PyGC_Head *old; /* next older generation */
1192     PyGC_Head unreachable; /* non-problematic unreachable trash */
1193     PyGC_Head finalizers;  /* objects with, & reachable from, __del__ */
1194     PyGC_Head *gc;
1195     _PyTime_t t1 = 0;   /* initialize to prevent a compiler warning */
1196     GCState *gcstate = &tstate->interp->gc;
1197 
1198     // gc_collect_main() must not be called before _PyGC_Init
1199     // or after _PyGC_Fini()
1200     assert(gcstate->garbage != NULL);
1201     assert(!_PyErr_Occurred(tstate));
1202 
1203 #ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
1204     if (tstate->interp->config._isolated_interpreter) {
1205         // bpo-40533: The garbage collector must not be run on parallel on
1206         // Python objects shared by multiple interpreters.
1207         return 0;
1208     }
1209 #endif
1210 
1211     if (gcstate->debug & DEBUG_STATS) {
1212         PySys_WriteStderr("gc: collecting generation %d...\n", generation);
1213         show_stats_each_generations(gcstate);
1214         t1 = _PyTime_GetMonotonicClock();
1215     }
1216 
1217     if (PyDTrace_GC_START_ENABLED())
1218         PyDTrace_GC_START(generation);
1219 
1220     /* update collection and allocation counters */
1221     if (generation+1 < NUM_GENERATIONS)
1222         gcstate->generations[generation+1].count += 1;
1223     for (i = 0; i <= generation; i++)
1224         gcstate->generations[i].count = 0;
1225 
1226     /* merge younger generations with one we are currently collecting */
1227     for (i = 0; i < generation; i++) {
1228         gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
1229     }
1230 
1231     /* handy references */
1232     young = GEN_HEAD(gcstate, generation);
1233     if (generation < NUM_GENERATIONS-1)
1234         old = GEN_HEAD(gcstate, generation+1);
1235     else
1236         old = young;
1237     validate_list(old, collecting_clear_unreachable_clear);
1238 
1239     deduce_unreachable(young, &unreachable);
1240 
1241     untrack_tuples(young);
1242     /* Move reachable objects to next generation. */
1243     if (young != old) {
1244         if (generation == NUM_GENERATIONS - 2) {
1245             gcstate->long_lived_pending += gc_list_size(young);
1246         }
1247         gc_list_merge(young, old);
1248     }
1249     else {
1250         /* We only un-track dicts in full collections, to avoid quadratic
1251            dict build-up. See issue #14775. */
1252         untrack_dicts(young);
1253         gcstate->long_lived_pending = 0;
1254         gcstate->long_lived_total = gc_list_size(young);
1255     }
1256 
1257     /* All objects in unreachable are trash, but objects reachable from
1258      * legacy finalizers (e.g. tp_del) can't safely be deleted.
1259      */
1260     gc_list_init(&finalizers);
1261     // NEXT_MASK_UNREACHABLE is cleared here.
1262     // After move_legacy_finalizers(), unreachable is normal list.
1263     move_legacy_finalizers(&unreachable, &finalizers);
1264     /* finalizers contains the unreachable objects with a legacy finalizer;
1265      * unreachable objects reachable *from* those are also uncollectable,
1266      * and we move those into the finalizers list too.
1267      */
1268     move_legacy_finalizer_reachable(&finalizers);
1269 
1270     validate_list(&finalizers, collecting_clear_unreachable_clear);
1271     validate_list(&unreachable, collecting_set_unreachable_clear);
1272 
1273     /* Print debugging information. */
1274     if (gcstate->debug & DEBUG_COLLECTABLE) {
1275         for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) {
1276             debug_cycle("collectable", FROM_GC(gc));
1277         }
1278     }
1279 
1280     /* Clear weakrefs and invoke callbacks as necessary. */
1281     m += handle_weakrefs(&unreachable, old);
1282 
1283     validate_list(old, collecting_clear_unreachable_clear);
1284     validate_list(&unreachable, collecting_set_unreachable_clear);
1285 
1286     /* Call tp_finalize on objects which have one. */
1287     finalize_garbage(tstate, &unreachable);
1288 
1289     /* Handle any objects that may have resurrected after the call
1290      * to 'finalize_garbage' and continue the collection with the
1291      * objects that are still unreachable */
1292     PyGC_Head final_unreachable;
1293     handle_resurrected_objects(&unreachable, &final_unreachable, old);
1294 
1295     /* Call tp_clear on objects in the final_unreachable set.  This will cause
1296     * the reference cycles to be broken.  It may also cause some objects
1297     * in finalizers to be freed.
1298     */
1299     m += gc_list_size(&final_unreachable);
1300     delete_garbage(tstate, gcstate, &final_unreachable, old);
1301 
1302     /* Collect statistics on uncollectable objects found and print
1303      * debugging information. */
1304     for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) {
1305         n++;
1306         if (gcstate->debug & DEBUG_UNCOLLECTABLE)
1307             debug_cycle("uncollectable", FROM_GC(gc));
1308     }
1309     if (gcstate->debug & DEBUG_STATS) {
1310         double d = _PyTime_AsSecondsDouble(_PyTime_GetMonotonicClock() - t1);
1311         PySys_WriteStderr(
1312             "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
1313             n+m, n, d);
1314     }
1315 
1316     /* Append instances in the uncollectable set to a Python
1317      * reachable list of garbage.  The programmer has to deal with
1318      * this if they insist on creating this type of structure.
1319      */
1320     handle_legacy_finalizers(tstate, gcstate, &finalizers, old);
1321     validate_list(old, collecting_clear_unreachable_clear);
1322 
1323     /* Clear free list only during the collection of the highest
1324      * generation */
1325     if (generation == NUM_GENERATIONS-1) {
1326         clear_freelists(tstate->interp);
1327     }
1328 
1329     if (_PyErr_Occurred(tstate)) {
1330         if (nofail) {
1331             _PyErr_Clear(tstate);
1332         }
1333         else {
1334             _PyErr_WriteUnraisableMsg("in garbage collection", NULL);
1335         }
1336     }
1337 
1338     /* Update stats */
1339     if (n_collected) {
1340         *n_collected = m;
1341     }
1342     if (n_uncollectable) {
1343         *n_uncollectable = n;
1344     }
1345 
1346     struct gc_generation_stats *stats = &gcstate->generation_stats[generation];
1347     stats->collections++;
1348     stats->collected += m;
1349     stats->uncollectable += n;
1350 
1351     if (PyDTrace_GC_DONE_ENABLED()) {
1352         PyDTrace_GC_DONE(n + m);
1353     }
1354 
1355     assert(!_PyErr_Occurred(tstate));
1356     return n + m;
1357 }
1358 
1359 /* Invoke progress callbacks to notify clients that garbage collection
1360  * is starting or stopping
1361  */
1362 static void
invoke_gc_callback(PyThreadState * tstate,const char * phase,int generation,Py_ssize_t collected,Py_ssize_t uncollectable)1363 invoke_gc_callback(PyThreadState *tstate, const char *phase,
1364                    int generation, Py_ssize_t collected,
1365                    Py_ssize_t uncollectable)
1366 {
1367     assert(!_PyErr_Occurred(tstate));
1368 
1369     /* we may get called very early */
1370     GCState *gcstate = &tstate->interp->gc;
1371     if (gcstate->callbacks == NULL) {
1372         return;
1373     }
1374 
1375     /* The local variable cannot be rebound, check it for sanity */
1376     assert(PyList_CheckExact(gcstate->callbacks));
1377     PyObject *info = NULL;
1378     if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
1379         info = Py_BuildValue("{sisnsn}",
1380             "generation", generation,
1381             "collected", collected,
1382             "uncollectable", uncollectable);
1383         if (info == NULL) {
1384             PyErr_WriteUnraisable(NULL);
1385             return;
1386         }
1387     }
1388     for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
1389         PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
1390         Py_INCREF(cb); /* make sure cb doesn't go away */
1391         r = PyObject_CallFunction(cb, "sO", phase, info);
1392         if (r == NULL) {
1393             PyErr_WriteUnraisable(cb);
1394         }
1395         else {
1396             Py_DECREF(r);
1397         }
1398         Py_DECREF(cb);
1399     }
1400     Py_XDECREF(info);
1401     assert(!_PyErr_Occurred(tstate));
1402 }
1403 
1404 /* Perform garbage collection of a generation and invoke
1405  * progress callbacks.
1406  */
1407 static Py_ssize_t
gc_collect_with_callback(PyThreadState * tstate,int generation)1408 gc_collect_with_callback(PyThreadState *tstate, int generation)
1409 {
1410     assert(!_PyErr_Occurred(tstate));
1411     Py_ssize_t result, collected, uncollectable;
1412     invoke_gc_callback(tstate, "start", generation, 0, 0);
1413     result = gc_collect_main(tstate, generation, &collected, &uncollectable, 0);
1414     invoke_gc_callback(tstate, "stop", generation, collected, uncollectable);
1415     assert(!_PyErr_Occurred(tstate));
1416     return result;
1417 }
1418 
1419 static Py_ssize_t
gc_collect_generations(PyThreadState * tstate)1420 gc_collect_generations(PyThreadState *tstate)
1421 {
1422     GCState *gcstate = &tstate->interp->gc;
1423     /* Find the oldest generation (highest numbered) where the count
1424      * exceeds the threshold.  Objects in the that generation and
1425      * generations younger than it will be collected. */
1426     Py_ssize_t n = 0;
1427     for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
1428         if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
1429             /* Avoid quadratic performance degradation in number
1430                of tracked objects (see also issue #4074):
1431 
1432                To limit the cost of garbage collection, there are two strategies;
1433                  - make each collection faster, e.g. by scanning fewer objects
1434                  - do less collections
1435                This heuristic is about the latter strategy.
1436 
1437                In addition to the various configurable thresholds, we only trigger a
1438                full collection if the ratio
1439 
1440                 long_lived_pending / long_lived_total
1441 
1442                is above a given value (hardwired to 25%).
1443 
1444                The reason is that, while "non-full" collections (i.e., collections of
1445                the young and middle generations) will always examine roughly the same
1446                number of objects -- determined by the aforementioned thresholds --,
1447                the cost of a full collection is proportional to the total number of
1448                long-lived objects, which is virtually unbounded.
1449 
1450                Indeed, it has been remarked that doing a full collection every
1451                <constant number> of object creations entails a dramatic performance
1452                degradation in workloads which consist in creating and storing lots of
1453                long-lived objects (e.g. building a large list of GC-tracked objects would
1454                show quadratic performance, instead of linear as expected: see issue #4074).
1455 
1456                Using the above ratio, instead, yields amortized linear performance in
1457                the total number of objects (the effect of which can be summarized
1458                thusly: "each full garbage collection is more and more costly as the
1459                number of objects grows, but we do fewer and fewer of them").
1460 
1461                This heuristic was suggested by Martin von Löwis on python-dev in
1462                June 2008. His original analysis and proposal can be found at:
1463                http://mail.python.org/pipermail/python-dev/2008-June/080579.html
1464             */
1465             if (i == NUM_GENERATIONS - 1
1466                 && gcstate->long_lived_pending < gcstate->long_lived_total / 4)
1467                 continue;
1468             n = gc_collect_with_callback(tstate, i);
1469             break;
1470         }
1471     }
1472     return n;
1473 }
1474 
1475 #include "clinic/gcmodule.c.h"
1476 
1477 /*[clinic input]
1478 gc.enable
1479 
1480 Enable automatic garbage collection.
1481 [clinic start generated code]*/
1482 
1483 static PyObject *
gc_enable_impl(PyObject * module)1484 gc_enable_impl(PyObject *module)
1485 /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/
1486 {
1487     PyGC_Enable();
1488     Py_RETURN_NONE;
1489 }
1490 
1491 /*[clinic input]
1492 gc.disable
1493 
1494 Disable automatic garbage collection.
1495 [clinic start generated code]*/
1496 
1497 static PyObject *
gc_disable_impl(PyObject * module)1498 gc_disable_impl(PyObject *module)
1499 /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/
1500 {
1501     PyGC_Disable();
1502     Py_RETURN_NONE;
1503 }
1504 
1505 /*[clinic input]
1506 gc.isenabled -> bool
1507 
1508 Returns true if automatic garbage collection is enabled.
1509 [clinic start generated code]*/
1510 
1511 static int
gc_isenabled_impl(PyObject * module)1512 gc_isenabled_impl(PyObject *module)
1513 /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/
1514 {
1515     return PyGC_IsEnabled();
1516 }
1517 
1518 /*[clinic input]
1519 gc.collect -> Py_ssize_t
1520 
1521     generation: int(c_default="NUM_GENERATIONS - 1") = 2
1522 
1523 Run the garbage collector.
1524 
1525 With no arguments, run a full collection.  The optional argument
1526 may be an integer specifying which generation to collect.  A ValueError
1527 is raised if the generation number is invalid.
1528 
1529 The number of unreachable objects is returned.
1530 [clinic start generated code]*/
1531 
1532 static Py_ssize_t
gc_collect_impl(PyObject * module,int generation)1533 gc_collect_impl(PyObject *module, int generation)
1534 /*[clinic end generated code: output=b697e633043233c7 input=40720128b682d879]*/
1535 {
1536     PyThreadState *tstate = _PyThreadState_GET();
1537 
1538     if (generation < 0 || generation >= NUM_GENERATIONS) {
1539         _PyErr_SetString(tstate, PyExc_ValueError, "invalid generation");
1540         return -1;
1541     }
1542 
1543     GCState *gcstate = &tstate->interp->gc;
1544     Py_ssize_t n;
1545     if (gcstate->collecting) {
1546         /* already collecting, don't do anything */
1547         n = 0;
1548     }
1549     else {
1550         gcstate->collecting = 1;
1551         n = gc_collect_with_callback(tstate, generation);
1552         gcstate->collecting = 0;
1553     }
1554     return n;
1555 }
1556 
1557 /*[clinic input]
1558 gc.set_debug
1559 
1560     flags: int
1561         An integer that can have the following bits turned on:
1562           DEBUG_STATS - Print statistics during collection.
1563           DEBUG_COLLECTABLE - Print collectable objects found.
1564           DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects
1565             found.
1566           DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.
1567           DEBUG_LEAK - Debug leaking programs (everything but STATS).
1568     /
1569 
1570 Set the garbage collection debugging flags.
1571 
1572 Debugging information is written to sys.stderr.
1573 [clinic start generated code]*/
1574 
1575 static PyObject *
gc_set_debug_impl(PyObject * module,int flags)1576 gc_set_debug_impl(PyObject *module, int flags)
1577 /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/
1578 {
1579     GCState *gcstate = get_gc_state();
1580     gcstate->debug = flags;
1581     Py_RETURN_NONE;
1582 }
1583 
1584 /*[clinic input]
1585 gc.get_debug -> int
1586 
1587 Get the garbage collection debugging flags.
1588 [clinic start generated code]*/
1589 
1590 static int
gc_get_debug_impl(PyObject * module)1591 gc_get_debug_impl(PyObject *module)
1592 /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/
1593 {
1594     GCState *gcstate = get_gc_state();
1595     return gcstate->debug;
1596 }
1597 
1598 PyDoc_STRVAR(gc_set_thresh__doc__,
1599 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1600 "\n"
1601 "Sets the collection thresholds.  Setting threshold0 to zero disables\n"
1602 "collection.\n");
1603 
1604 static PyObject *
gc_set_threshold(PyObject * self,PyObject * args)1605 gc_set_threshold(PyObject *self, PyObject *args)
1606 {
1607     GCState *gcstate = get_gc_state();
1608     if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
1609                           &gcstate->generations[0].threshold,
1610                           &gcstate->generations[1].threshold,
1611                           &gcstate->generations[2].threshold))
1612         return NULL;
1613     for (int i = 3; i < NUM_GENERATIONS; i++) {
1614         /* generations higher than 2 get the same threshold */
1615         gcstate->generations[i].threshold = gcstate->generations[2].threshold;
1616     }
1617     Py_RETURN_NONE;
1618 }
1619 
1620 /*[clinic input]
1621 gc.get_threshold
1622 
1623 Return the current collection thresholds.
1624 [clinic start generated code]*/
1625 
1626 static PyObject *
gc_get_threshold_impl(PyObject * module)1627 gc_get_threshold_impl(PyObject *module)
1628 /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/
1629 {
1630     GCState *gcstate = get_gc_state();
1631     return Py_BuildValue("(iii)",
1632                          gcstate->generations[0].threshold,
1633                          gcstate->generations[1].threshold,
1634                          gcstate->generations[2].threshold);
1635 }
1636 
1637 /*[clinic input]
1638 gc.get_count
1639 
1640 Return a three-tuple of the current collection counts.
1641 [clinic start generated code]*/
1642 
1643 static PyObject *
gc_get_count_impl(PyObject * module)1644 gc_get_count_impl(PyObject *module)
1645 /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
1646 {
1647     GCState *gcstate = get_gc_state();
1648     return Py_BuildValue("(iii)",
1649                          gcstate->generations[0].count,
1650                          gcstate->generations[1].count,
1651                          gcstate->generations[2].count);
1652 }
1653 
1654 static int
referrersvisit(PyObject * obj,PyObject * objs)1655 referrersvisit(PyObject* obj, PyObject *objs)
1656 {
1657     Py_ssize_t i;
1658     for (i = 0; i < PyTuple_GET_SIZE(objs); i++)
1659         if (PyTuple_GET_ITEM(objs, i) == obj)
1660             return 1;
1661     return 0;
1662 }
1663 
1664 static int
gc_referrers_for(PyObject * objs,PyGC_Head * list,PyObject * resultlist)1665 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1666 {
1667     PyGC_Head *gc;
1668     PyObject *obj;
1669     traverseproc traverse;
1670     for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(gc)) {
1671         obj = FROM_GC(gc);
1672         traverse = Py_TYPE(obj)->tp_traverse;
1673         if (obj == objs || obj == resultlist)
1674             continue;
1675         if (traverse(obj, (visitproc)referrersvisit, objs)) {
1676             if (PyList_Append(resultlist, obj) < 0)
1677                 return 0; /* error */
1678         }
1679     }
1680     return 1; /* no error */
1681 }
1682 
1683 PyDoc_STRVAR(gc_get_referrers__doc__,
1684 "get_referrers(*objs) -> list\n\
1685 Return the list of objects that directly refer to any of objs.");
1686 
1687 static PyObject *
gc_get_referrers(PyObject * self,PyObject * args)1688 gc_get_referrers(PyObject *self, PyObject *args)
1689 {
1690     if (PySys_Audit("gc.get_referrers", "(O)", args) < 0) {
1691         return NULL;
1692     }
1693 
1694     PyObject *result = PyList_New(0);
1695     if (!result) {
1696         return NULL;
1697     }
1698 
1699     GCState *gcstate = get_gc_state();
1700     for (int i = 0; i < NUM_GENERATIONS; i++) {
1701         if (!(gc_referrers_for(args, GEN_HEAD(gcstate, i), result))) {
1702             Py_DECREF(result);
1703             return NULL;
1704         }
1705     }
1706     return result;
1707 }
1708 
1709 /* Append obj to list; return true if error (out of memory), false if OK. */
1710 static int
referentsvisit(PyObject * obj,PyObject * list)1711 referentsvisit(PyObject *obj, PyObject *list)
1712 {
1713     return PyList_Append(list, obj) < 0;
1714 }
1715 
1716 PyDoc_STRVAR(gc_get_referents__doc__,
1717 "get_referents(*objs) -> list\n\
1718 Return the list of objects that are directly referred to by objs.");
1719 
1720 static PyObject *
gc_get_referents(PyObject * self,PyObject * args)1721 gc_get_referents(PyObject *self, PyObject *args)
1722 {
1723     Py_ssize_t i;
1724     if (PySys_Audit("gc.get_referents", "(O)", args) < 0) {
1725         return NULL;
1726     }
1727     PyObject *result = PyList_New(0);
1728 
1729     if (result == NULL)
1730         return NULL;
1731 
1732     for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
1733         traverseproc traverse;
1734         PyObject *obj = PyTuple_GET_ITEM(args, i);
1735 
1736         if (!_PyObject_IS_GC(obj))
1737             continue;
1738         traverse = Py_TYPE(obj)->tp_traverse;
1739         if (! traverse)
1740             continue;
1741         if (traverse(obj, (visitproc)referentsvisit, result)) {
1742             Py_DECREF(result);
1743             return NULL;
1744         }
1745     }
1746     return result;
1747 }
1748 
1749 /*[clinic input]
1750 gc.get_objects
1751     generation: Py_ssize_t(accept={int, NoneType}, c_default="-1") = None
1752         Generation to extract the objects from.
1753 
1754 Return a list of objects tracked by the collector (excluding the list returned).
1755 
1756 If generation is not None, return only the objects tracked by the collector
1757 that are in that generation.
1758 [clinic start generated code]*/
1759 
1760 static PyObject *
gc_get_objects_impl(PyObject * module,Py_ssize_t generation)1761 gc_get_objects_impl(PyObject *module, Py_ssize_t generation)
1762 /*[clinic end generated code: output=48b35fea4ba6cb0e input=ef7da9df9806754c]*/
1763 {
1764     PyThreadState *tstate = _PyThreadState_GET();
1765     int i;
1766     PyObject* result;
1767     GCState *gcstate = &tstate->interp->gc;
1768 
1769     if (PySys_Audit("gc.get_objects", "n", generation) < 0) {
1770         return NULL;
1771     }
1772 
1773     result = PyList_New(0);
1774     if (result == NULL) {
1775         return NULL;
1776     }
1777 
1778     /* If generation is passed, we extract only that generation */
1779     if (generation != -1) {
1780         if (generation >= NUM_GENERATIONS) {
1781             _PyErr_Format(tstate, PyExc_ValueError,
1782                           "generation parameter must be less than the number of "
1783                           "available generations (%i)",
1784                            NUM_GENERATIONS);
1785             goto error;
1786         }
1787 
1788         if (generation < 0) {
1789             _PyErr_SetString(tstate, PyExc_ValueError,
1790                              "generation parameter cannot be negative");
1791             goto error;
1792         }
1793 
1794         if (append_objects(result, GEN_HEAD(gcstate, generation))) {
1795             goto error;
1796         }
1797 
1798         return result;
1799     }
1800 
1801     /* If generation is not passed or None, get all objects from all generations */
1802     for (i = 0; i < NUM_GENERATIONS; i++) {
1803         if (append_objects(result, GEN_HEAD(gcstate, i))) {
1804             goto error;
1805         }
1806     }
1807     return result;
1808 
1809 error:
1810     Py_DECREF(result);
1811     return NULL;
1812 }
1813 
1814 /*[clinic input]
1815 gc.get_stats
1816 
1817 Return a list of dictionaries containing per-generation statistics.
1818 [clinic start generated code]*/
1819 
1820 static PyObject *
gc_get_stats_impl(PyObject * module)1821 gc_get_stats_impl(PyObject *module)
1822 /*[clinic end generated code: output=a8ab1d8a5d26f3ab input=1ef4ed9d17b1a470]*/
1823 {
1824     int i;
1825     struct gc_generation_stats stats[NUM_GENERATIONS], *st;
1826 
1827     /* To get consistent values despite allocations while constructing
1828        the result list, we use a snapshot of the running stats. */
1829     GCState *gcstate = get_gc_state();
1830     for (i = 0; i < NUM_GENERATIONS; i++) {
1831         stats[i] = gcstate->generation_stats[i];
1832     }
1833 
1834     PyObject *result = PyList_New(0);
1835     if (result == NULL)
1836         return NULL;
1837 
1838     for (i = 0; i < NUM_GENERATIONS; i++) {
1839         PyObject *dict;
1840         st = &stats[i];
1841         dict = Py_BuildValue("{snsnsn}",
1842                              "collections", st->collections,
1843                              "collected", st->collected,
1844                              "uncollectable", st->uncollectable
1845                             );
1846         if (dict == NULL)
1847             goto error;
1848         if (PyList_Append(result, dict)) {
1849             Py_DECREF(dict);
1850             goto error;
1851         }
1852         Py_DECREF(dict);
1853     }
1854     return result;
1855 
1856 error:
1857     Py_XDECREF(result);
1858     return NULL;
1859 }
1860 
1861 
1862 /*[clinic input]
1863 gc.is_tracked
1864 
1865     obj: object
1866     /
1867 
1868 Returns true if the object is tracked by the garbage collector.
1869 
1870 Simple atomic objects will return false.
1871 [clinic start generated code]*/
1872 
1873 static PyObject *
gc_is_tracked(PyObject * module,PyObject * obj)1874 gc_is_tracked(PyObject *module, PyObject *obj)
1875 /*[clinic end generated code: output=14f0103423b28e31 input=d83057f170ea2723]*/
1876 {
1877     PyObject *result;
1878 
1879     if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj))
1880         result = Py_True;
1881     else
1882         result = Py_False;
1883     Py_INCREF(result);
1884     return result;
1885 }
1886 
1887 /*[clinic input]
1888 gc.is_finalized
1889 
1890     obj: object
1891     /
1892 
1893 Returns true if the object has been already finalized by the GC.
1894 [clinic start generated code]*/
1895 
1896 static PyObject *
gc_is_finalized(PyObject * module,PyObject * obj)1897 gc_is_finalized(PyObject *module, PyObject *obj)
1898 /*[clinic end generated code: output=e1516ac119a918ed input=201d0c58f69ae390]*/
1899 {
1900     if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) {
1901          Py_RETURN_TRUE;
1902     }
1903     Py_RETURN_FALSE;
1904 }
1905 
1906 /*[clinic input]
1907 gc.freeze
1908 
1909 Freeze all current tracked objects and ignore them for future collections.
1910 
1911 This can be used before a POSIX fork() call to make the gc copy-on-write friendly.
1912 Note: collection before a POSIX fork() call may free pages for future allocation
1913 which can cause copy-on-write.
1914 [clinic start generated code]*/
1915 
1916 static PyObject *
gc_freeze_impl(PyObject * module)1917 gc_freeze_impl(PyObject *module)
1918 /*[clinic end generated code: output=502159d9cdc4c139 input=b602b16ac5febbe5]*/
1919 {
1920     GCState *gcstate = get_gc_state();
1921     for (int i = 0; i < NUM_GENERATIONS; ++i) {
1922         gc_list_merge(GEN_HEAD(gcstate, i), &gcstate->permanent_generation.head);
1923         gcstate->generations[i].count = 0;
1924     }
1925     Py_RETURN_NONE;
1926 }
1927 
1928 /*[clinic input]
1929 gc.unfreeze
1930 
1931 Unfreeze all objects in the permanent generation.
1932 
1933 Put all objects in the permanent generation back into oldest generation.
1934 [clinic start generated code]*/
1935 
1936 static PyObject *
gc_unfreeze_impl(PyObject * module)1937 gc_unfreeze_impl(PyObject *module)
1938 /*[clinic end generated code: output=1c15f2043b25e169 input=2dd52b170f4cef6c]*/
1939 {
1940     GCState *gcstate = get_gc_state();
1941     gc_list_merge(&gcstate->permanent_generation.head,
1942                   GEN_HEAD(gcstate, NUM_GENERATIONS-1));
1943     Py_RETURN_NONE;
1944 }
1945 
1946 /*[clinic input]
1947 gc.get_freeze_count -> Py_ssize_t
1948 
1949 Return the number of objects in the permanent generation.
1950 [clinic start generated code]*/
1951 
1952 static Py_ssize_t
gc_get_freeze_count_impl(PyObject * module)1953 gc_get_freeze_count_impl(PyObject *module)
1954 /*[clinic end generated code: output=61cbd9f43aa032e1 input=45ffbc65cfe2a6ed]*/
1955 {
1956     GCState *gcstate = get_gc_state();
1957     return gc_list_size(&gcstate->permanent_generation.head);
1958 }
1959 
1960 
1961 PyDoc_STRVAR(gc__doc__,
1962 "This module provides access to the garbage collector for reference cycles.\n"
1963 "\n"
1964 "enable() -- Enable automatic garbage collection.\n"
1965 "disable() -- Disable automatic garbage collection.\n"
1966 "isenabled() -- Returns true if automatic collection is enabled.\n"
1967 "collect() -- Do a full collection right now.\n"
1968 "get_count() -- Return the current collection counts.\n"
1969 "get_stats() -- Return list of dictionaries containing per-generation stats.\n"
1970 "set_debug() -- Set debugging flags.\n"
1971 "get_debug() -- Get debugging flags.\n"
1972 "set_threshold() -- Set the collection thresholds.\n"
1973 "get_threshold() -- Return the current the collection thresholds.\n"
1974 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1975 "is_tracked() -- Returns true if a given object is tracked.\n"
1976 "is_finalized() -- Returns true if a given object has been already finalized.\n"
1977 "get_referrers() -- Return the list of objects that refer to an object.\n"
1978 "get_referents() -- Return the list of objects that an object refers to.\n"
1979 "freeze() -- Freeze all tracked objects and ignore them for future collections.\n"
1980 "unfreeze() -- Unfreeze all objects in the permanent generation.\n"
1981 "get_freeze_count() -- Return the number of objects in the permanent generation.\n");
1982 
1983 static PyMethodDef GcMethods[] = {
1984     GC_ENABLE_METHODDEF
1985     GC_DISABLE_METHODDEF
1986     GC_ISENABLED_METHODDEF
1987     GC_SET_DEBUG_METHODDEF
1988     GC_GET_DEBUG_METHODDEF
1989     GC_GET_COUNT_METHODDEF
1990     {"set_threshold",  gc_set_threshold, METH_VARARGS, gc_set_thresh__doc__},
1991     GC_GET_THRESHOLD_METHODDEF
1992     GC_COLLECT_METHODDEF
1993     GC_GET_OBJECTS_METHODDEF
1994     GC_GET_STATS_METHODDEF
1995     GC_IS_TRACKED_METHODDEF
1996     GC_IS_FINALIZED_METHODDEF
1997     {"get_referrers",  gc_get_referrers, METH_VARARGS,
1998         gc_get_referrers__doc__},
1999     {"get_referents",  gc_get_referents, METH_VARARGS,
2000         gc_get_referents__doc__},
2001     GC_FREEZE_METHODDEF
2002     GC_UNFREEZE_METHODDEF
2003     GC_GET_FREEZE_COUNT_METHODDEF
2004     {NULL,      NULL}           /* Sentinel */
2005 };
2006 
2007 static int
gcmodule_exec(PyObject * module)2008 gcmodule_exec(PyObject *module)
2009 {
2010     GCState *gcstate = get_gc_state();
2011 
2012     /* garbage and callbacks are initialized by _PyGC_Init() early in
2013      * interpreter lifecycle. */
2014     assert(gcstate->garbage != NULL);
2015     if (PyModule_AddObjectRef(module, "garbage", gcstate->garbage) < 0) {
2016         return -1;
2017     }
2018     assert(gcstate->callbacks != NULL);
2019     if (PyModule_AddObjectRef(module, "callbacks", gcstate->callbacks) < 0) {
2020         return -1;
2021     }
2022 
2023 #define ADD_INT(NAME) if (PyModule_AddIntConstant(module, #NAME, NAME) < 0) { return -1; }
2024     ADD_INT(DEBUG_STATS);
2025     ADD_INT(DEBUG_COLLECTABLE);
2026     ADD_INT(DEBUG_UNCOLLECTABLE);
2027     ADD_INT(DEBUG_SAVEALL);
2028     ADD_INT(DEBUG_LEAK);
2029 #undef ADD_INT
2030     return 0;
2031 }
2032 
2033 static PyModuleDef_Slot gcmodule_slots[] = {
2034     {Py_mod_exec, gcmodule_exec},
2035     {0, NULL}
2036 };
2037 
2038 static struct PyModuleDef gcmodule = {
2039     PyModuleDef_HEAD_INIT,
2040     .m_name = "gc",
2041     .m_doc = gc__doc__,
2042     .m_size = 0,  // per interpreter state, see: get_gc_state()
2043     .m_methods = GcMethods,
2044     .m_slots = gcmodule_slots
2045 };
2046 
2047 PyMODINIT_FUNC
PyInit_gc(void)2048 PyInit_gc(void)
2049 {
2050     return PyModuleDef_Init(&gcmodule);
2051 }
2052 
2053 /* C API for controlling the state of the garbage collector */
2054 int
PyGC_Enable(void)2055 PyGC_Enable(void)
2056 {
2057     GCState *gcstate = get_gc_state();
2058     int old_state = gcstate->enabled;
2059     gcstate->enabled = 1;
2060     return old_state;
2061 }
2062 
2063 int
PyGC_Disable(void)2064 PyGC_Disable(void)
2065 {
2066     GCState *gcstate = get_gc_state();
2067     int old_state = gcstate->enabled;
2068     gcstate->enabled = 0;
2069     return old_state;
2070 }
2071 
2072 int
PyGC_IsEnabled(void)2073 PyGC_IsEnabled(void)
2074 {
2075     GCState *gcstate = get_gc_state();
2076     return gcstate->enabled;
2077 }
2078 
2079 /* Public API to invoke gc.collect() from C */
2080 Py_ssize_t
PyGC_Collect(void)2081 PyGC_Collect(void)
2082 {
2083     PyThreadState *tstate = _PyThreadState_GET();
2084     GCState *gcstate = &tstate->interp->gc;
2085 
2086     if (!gcstate->enabled) {
2087         return 0;
2088     }
2089 
2090     Py_ssize_t n;
2091     if (gcstate->collecting) {
2092         /* already collecting, don't do anything */
2093         n = 0;
2094     }
2095     else {
2096         PyObject *exc, *value, *tb;
2097         gcstate->collecting = 1;
2098         _PyErr_Fetch(tstate, &exc, &value, &tb);
2099         n = gc_collect_with_callback(tstate, NUM_GENERATIONS - 1);
2100         _PyErr_Restore(tstate, exc, value, tb);
2101         gcstate->collecting = 0;
2102     }
2103 
2104     return n;
2105 }
2106 
2107 Py_ssize_t
_PyGC_CollectNoFail(PyThreadState * tstate)2108 _PyGC_CollectNoFail(PyThreadState *tstate)
2109 {
2110     /* Ideally, this function is only called on interpreter shutdown,
2111        and therefore not recursively.  Unfortunately, when there are daemon
2112        threads, a daemon thread can start a cyclic garbage collection
2113        during interpreter shutdown (and then never finish it).
2114        See http://bugs.python.org/issue8713#msg195178 for an example.
2115        */
2116     GCState *gcstate = &tstate->interp->gc;
2117     if (gcstate->collecting) {
2118         return 0;
2119     }
2120 
2121     Py_ssize_t n;
2122     gcstate->collecting = 1;
2123     n = gc_collect_main(tstate, NUM_GENERATIONS - 1, NULL, NULL, 1);
2124     gcstate->collecting = 0;
2125     return n;
2126 }
2127 
2128 void
_PyGC_DumpShutdownStats(PyInterpreterState * interp)2129 _PyGC_DumpShutdownStats(PyInterpreterState *interp)
2130 {
2131     GCState *gcstate = &interp->gc;
2132     if (!(gcstate->debug & DEBUG_SAVEALL)
2133         && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
2134         const char *message;
2135         if (gcstate->debug & DEBUG_UNCOLLECTABLE)
2136             message = "gc: %zd uncollectable objects at " \
2137                 "shutdown";
2138         else
2139             message = "gc: %zd uncollectable objects at " \
2140                 "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
2141         /* PyErr_WarnFormat does too many things and we are at shutdown,
2142            the warnings module's dependencies (e.g. linecache) may be gone
2143            already. */
2144         if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
2145                                      "gc", NULL, message,
2146                                      PyList_GET_SIZE(gcstate->garbage)))
2147             PyErr_WriteUnraisable(NULL);
2148         if (gcstate->debug & DEBUG_UNCOLLECTABLE) {
2149             PyObject *repr = NULL, *bytes = NULL;
2150             repr = PyObject_Repr(gcstate->garbage);
2151             if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
2152                 PyErr_WriteUnraisable(gcstate->garbage);
2153             else {
2154                 PySys_WriteStderr(
2155                     "      %s\n",
2156                     PyBytes_AS_STRING(bytes)
2157                     );
2158             }
2159             Py_XDECREF(repr);
2160             Py_XDECREF(bytes);
2161         }
2162     }
2163 }
2164 
2165 
2166 static void
gc_fini_untrack(PyGC_Head * list)2167 gc_fini_untrack(PyGC_Head *list)
2168 {
2169     PyGC_Head *gc;
2170     for (gc = GC_NEXT(list); gc != list; gc = GC_NEXT(list)) {
2171         PyObject *op = FROM_GC(gc);
2172         _PyObject_GC_UNTRACK(op);
2173     }
2174 }
2175 
2176 
2177 void
_PyGC_Fini(PyInterpreterState * interp)2178 _PyGC_Fini(PyInterpreterState *interp)
2179 {
2180     GCState *gcstate = &interp->gc;
2181     Py_CLEAR(gcstate->garbage);
2182     Py_CLEAR(gcstate->callbacks);
2183 
2184     if (!_Py_IsMainInterpreter(interp)) {
2185         // bpo-46070: Explicitly untrack all objects currently tracked by the
2186         // GC. Otherwise, if an object is used later by another interpreter,
2187         // calling PyObject_GC_UnTrack() on the object crashs if the previous
2188         // or the next object of the PyGC_Head structure became a dangling
2189         // pointer.
2190         for (int i = 0; i < NUM_GENERATIONS; i++) {
2191             PyGC_Head *gen = GEN_HEAD(gcstate, i);
2192             gc_fini_untrack(gen);
2193         }
2194     }
2195 }
2196 
2197 /* for debugging */
2198 void
_PyGC_Dump(PyGC_Head * g)2199 _PyGC_Dump(PyGC_Head *g)
2200 {
2201     _PyObject_Dump(FROM_GC(g));
2202 }
2203 
2204 
2205 #ifdef Py_DEBUG
2206 static int
visit_validate(PyObject * op,void * parent_raw)2207 visit_validate(PyObject *op, void *parent_raw)
2208 {
2209     PyObject *parent = _PyObject_CAST(parent_raw);
2210     if (_PyObject_IsFreed(op)) {
2211         _PyObject_ASSERT_FAILED_MSG(parent,
2212                                     "PyObject_GC_Track() object is not valid");
2213     }
2214     return 0;
2215 }
2216 #endif
2217 
2218 
2219 /* extension modules might be compiled with GC support so these
2220    functions must always be available */
2221 
2222 void
PyObject_GC_Track(void * op_raw)2223 PyObject_GC_Track(void *op_raw)
2224 {
2225     PyObject *op = _PyObject_CAST(op_raw);
2226     if (_PyObject_GC_IS_TRACKED(op)) {
2227         _PyObject_ASSERT_FAILED_MSG(op,
2228                                     "object already tracked "
2229                                     "by the garbage collector");
2230     }
2231     _PyObject_GC_TRACK(op);
2232 
2233 #ifdef Py_DEBUG
2234     /* Check that the object is valid: validate objects traversed
2235        by tp_traverse() */
2236     traverseproc traverse = Py_TYPE(op)->tp_traverse;
2237     (void)traverse(op, visit_validate, op);
2238 #endif
2239 }
2240 
2241 void
PyObject_GC_UnTrack(void * op_raw)2242 PyObject_GC_UnTrack(void *op_raw)
2243 {
2244     PyObject *op = _PyObject_CAST(op_raw);
2245     /* Obscure:  the Py_TRASHCAN mechanism requires that we be able to
2246      * call PyObject_GC_UnTrack twice on an object.
2247      */
2248     if (_PyObject_GC_IS_TRACKED(op)) {
2249         _PyObject_GC_UNTRACK(op);
2250     }
2251 }
2252 
2253 int
PyObject_IS_GC(PyObject * obj)2254 PyObject_IS_GC(PyObject *obj)
2255 {
2256     return _PyObject_IS_GC(obj);
2257 }
2258 
2259 static PyObject *
_PyObject_GC_Alloc(int use_calloc,size_t basicsize)2260 _PyObject_GC_Alloc(int use_calloc, size_t basicsize)
2261 {
2262     PyThreadState *tstate = _PyThreadState_GET();
2263     GCState *gcstate = &tstate->interp->gc;
2264     if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) {
2265         return _PyErr_NoMemory(tstate);
2266     }
2267     size_t size = sizeof(PyGC_Head) + basicsize;
2268 
2269     PyGC_Head *g;
2270     if (use_calloc) {
2271         g = (PyGC_Head *)PyObject_Calloc(1, size);
2272     }
2273     else {
2274         g = (PyGC_Head *)PyObject_Malloc(size);
2275     }
2276     if (g == NULL) {
2277         return _PyErr_NoMemory(tstate);
2278     }
2279     assert(((uintptr_t)g & 3) == 0);  // g must be aligned 4bytes boundary
2280 
2281     g->_gc_next = 0;
2282     g->_gc_prev = 0;
2283     gcstate->generations[0].count++; /* number of allocated GC objects */
2284     if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
2285         gcstate->enabled &&
2286         gcstate->generations[0].threshold &&
2287         !gcstate->collecting &&
2288         !_PyErr_Occurred(tstate))
2289     {
2290         gcstate->collecting = 1;
2291         gc_collect_generations(tstate);
2292         gcstate->collecting = 0;
2293     }
2294     PyObject *op = FROM_GC(g);
2295     return op;
2296 }
2297 
2298 PyObject *
_PyObject_GC_Malloc(size_t basicsize)2299 _PyObject_GC_Malloc(size_t basicsize)
2300 {
2301     return _PyObject_GC_Alloc(0, basicsize);
2302 }
2303 
2304 PyObject *
_PyObject_GC_Calloc(size_t basicsize)2305 _PyObject_GC_Calloc(size_t basicsize)
2306 {
2307     return _PyObject_GC_Alloc(1, basicsize);
2308 }
2309 
2310 PyObject *
_PyObject_GC_New(PyTypeObject * tp)2311 _PyObject_GC_New(PyTypeObject *tp)
2312 {
2313     PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp));
2314     if (op == NULL) {
2315         return NULL;
2316     }
2317     _PyObject_Init(op, tp);
2318     return op;
2319 }
2320 
2321 PyVarObject *
_PyObject_GC_NewVar(PyTypeObject * tp,Py_ssize_t nitems)2322 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
2323 {
2324     size_t size;
2325     PyVarObject *op;
2326 
2327     if (nitems < 0) {
2328         PyErr_BadInternalCall();
2329         return NULL;
2330     }
2331     size = _PyObject_VAR_SIZE(tp, nitems);
2332     op = (PyVarObject *) _PyObject_GC_Malloc(size);
2333     if (op == NULL) {
2334         return NULL;
2335     }
2336     _PyObject_InitVar(op, tp, nitems);
2337     return op;
2338 }
2339 
2340 PyVarObject *
_PyObject_GC_Resize(PyVarObject * op,Py_ssize_t nitems)2341 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
2342 {
2343     const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
2344     _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
2345     if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head)) {
2346         return (PyVarObject *)PyErr_NoMemory();
2347     }
2348 
2349     PyGC_Head *g = AS_GC(op);
2350     g = (PyGC_Head *)PyObject_Realloc(g,  sizeof(PyGC_Head) + basicsize);
2351     if (g == NULL)
2352         return (PyVarObject *)PyErr_NoMemory();
2353     op = (PyVarObject *) FROM_GC(g);
2354     Py_SET_SIZE(op, nitems);
2355     return op;
2356 }
2357 
2358 void
PyObject_GC_Del(void * op)2359 PyObject_GC_Del(void *op)
2360 {
2361     PyGC_Head *g = AS_GC(op);
2362     if (_PyObject_GC_IS_TRACKED(op)) {
2363         gc_list_remove(g);
2364     }
2365     GCState *gcstate = get_gc_state();
2366     if (gcstate->generations[0].count > 0) {
2367         gcstate->generations[0].count--;
2368     }
2369     PyObject_Free(g);
2370 }
2371 
2372 int
PyObject_GC_IsTracked(PyObject * obj)2373 PyObject_GC_IsTracked(PyObject* obj)
2374 {
2375     if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
2376         return 1;
2377     }
2378     return 0;
2379 }
2380 
2381 int
PyObject_GC_IsFinalized(PyObject * obj)2382 PyObject_GC_IsFinalized(PyObject *obj)
2383 {
2384     if (_PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(AS_GC(obj))) {
2385          return 1;
2386     }
2387     return 0;
2388 }
2389