1 /*
2
3 Reference Cycle Garbage Collection
4 ==================================
5
6 Neil Schemenauer <nas@arctrix.com>
7
8 Based on a post on the python-dev list. Ideas from Guido van Rossum,
9 Eric Tiedemann, and various others.
10
11 http://www.arctrix.com/nas/python/gc/
12
13 The following mailing list threads provide a historical perspective on
14 the design of this module. Note that a fair amount of refinement has
15 occurred since those discussions.
16
17 http://mail.python.org/pipermail/python-dev/2000-March/002385.html
18 http://mail.python.org/pipermail/python-dev/2000-March/002434.html
19 http://mail.python.org/pipermail/python-dev/2000-March/002497.html
20
21 For a highlevel view of the collection process, read the collect
22 function.
23
24 */
25
26 #include "Python.h"
27 #include "frameobject.h" /* for PyFrame_ClearFreeList */
28 #include "pydtrace.h"
29 #include "pytime.h" /* for _PyTime_GetMonotonicClock() */
30
31 /* Get an object's GC head */
32 #define AS_GC(o) ((PyGC_Head *)(o)-1)
33
34 /* Get the object given the GC head */
35 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
36
37 /*** Global GC state ***/
38
39 struct gc_generation {
40 PyGC_Head head;
41 int threshold; /* collection threshold */
42 int count; /* count of allocations or collections of younger
43 generations */
44 };
45
46 #define NUM_GENERATIONS 3
47 #define GEN_HEAD(n) (&generations[n].head)
48
49 /* linked lists of container objects */
50 static struct gc_generation generations[NUM_GENERATIONS] = {
51 /* PyGC_Head, threshold, count */
52 {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0},
53 {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0},
54 {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0},
55 };
56
57 PyGC_Head *_PyGC_generation0 = GEN_HEAD(0);
58
59 static int enabled = 1; /* automatic collection enabled? */
60
61 /* true if we are currently running the collector */
62 static int collecting = 0;
63
64 /* list of uncollectable objects */
65 static PyObject *garbage = NULL;
66
67 /* Python string to use if unhandled exception occurs */
68 static PyObject *gc_str = NULL;
69
70 /* a list of callbacks to be invoked when collection is performed */
71 static PyObject *callbacks = NULL;
72
73 /* This is the number of objects that survived the last full collection. It
74 approximates the number of long lived objects tracked by the GC.
75
76 (by "full collection", we mean a collection of the oldest generation).
77 */
78 static Py_ssize_t long_lived_total = 0;
79
80 /* This is the number of objects that survived all "non-full" collections,
81 and are awaiting to undergo a full collection for the first time.
82
83 */
84 static Py_ssize_t long_lived_pending = 0;
85
86 /*
87 NOTE: about the counting of long-lived objects.
88
89 To limit the cost of garbage collection, there are two strategies;
90 - make each collection faster, e.g. by scanning fewer objects
91 - do less collections
92 This heuristic is about the latter strategy.
93
94 In addition to the various configurable thresholds, we only trigger a
95 full collection if the ratio
96 long_lived_pending / long_lived_total
97 is above a given value (hardwired to 25%).
98
99 The reason is that, while "non-full" collections (i.e., collections of
100 the young and middle generations) will always examine roughly the same
101 number of objects -- determined by the aforementioned thresholds --,
102 the cost of a full collection is proportional to the total number of
103 long-lived objects, which is virtually unbounded.
104
105 Indeed, it has been remarked that doing a full collection every
106 <constant number> of object creations entails a dramatic performance
107 degradation in workloads which consist in creating and storing lots of
108 long-lived objects (e.g. building a large list of GC-tracked objects would
109 show quadratic performance, instead of linear as expected: see issue #4074).
110
111 Using the above ratio, instead, yields amortized linear performance in
112 the total number of objects (the effect of which can be summarized
113 thusly: "each full garbage collection is more and more costly as the
114 number of objects grows, but we do fewer and fewer of them").
115
116 This heuristic was suggested by Martin von Löwis on python-dev in
117 June 2008. His original analysis and proposal can be found at:
118 http://mail.python.org/pipermail/python-dev/2008-June/080579.html
119 */
120
121 /*
122 NOTE: about untracking of mutable objects.
123
124 Certain types of container cannot participate in a reference cycle, and
125 so do not need to be tracked by the garbage collector. Untracking these
126 objects reduces the cost of garbage collections. However, determining
127 which objects may be untracked is not free, and the costs must be
128 weighed against the benefits for garbage collection.
129
130 There are two possible strategies for when to untrack a container:
131
132 i) When the container is created.
133 ii) When the container is examined by the garbage collector.
134
135 Tuples containing only immutable objects (integers, strings etc, and
136 recursively, tuples of immutable objects) do not need to be tracked.
137 The interpreter creates a large number of tuples, many of which will
138 not survive until garbage collection. It is therefore not worthwhile
139 to untrack eligible tuples at creation time.
140
141 Instead, all tuples except the empty tuple are tracked when created.
142 During garbage collection it is determined whether any surviving tuples
143 can be untracked. A tuple can be untracked if all of its contents are
144 already not tracked. Tuples are examined for untracking in all garbage
145 collection cycles. It may take more than one cycle to untrack a tuple.
146
147 Dictionaries containing only immutable objects also do not need to be
148 tracked. Dictionaries are untracked when created. If a tracked item is
149 inserted into a dictionary (either as a key or value), the dictionary
150 becomes tracked. During a full garbage collection (all generations),
151 the collector will untrack any dictionaries whose contents are not
152 tracked.
153
154 The module provides the python function is_tracked(obj), which returns
155 the CURRENT tracking status of the object. Subsequent garbage
156 collections may change the tracking status of the object.
157
158 Untracking of certain containers was introduced in issue #4688, and
159 the algorithm was refined in response to issue #14775.
160 */
161
162 /* set for debugging information */
163 #define DEBUG_STATS (1<<0) /* print collection statistics */
164 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
165 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */
166 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */
167 #define DEBUG_LEAK DEBUG_COLLECTABLE | \
168 DEBUG_UNCOLLECTABLE | \
169 DEBUG_SAVEALL
170 static int debug;
171
172 /* Running stats per generation */
173 struct gc_generation_stats {
174 /* total number of collections */
175 Py_ssize_t collections;
176 /* total number of collected objects */
177 Py_ssize_t collected;
178 /* total number of uncollectable objects (put into gc.garbage) */
179 Py_ssize_t uncollectable;
180 };
181
182 static struct gc_generation_stats generation_stats[NUM_GENERATIONS];
183
184 /*--------------------------------------------------------------------------
185 gc_refs values.
186
187 Between collections, every gc'ed object has one of two gc_refs values:
188
189 GC_UNTRACKED
190 The initial state; objects returned by PyObject_GC_Malloc are in this
191 state. The object doesn't live in any generation list, and its
192 tp_traverse slot must not be called.
193
194 GC_REACHABLE
195 The object lives in some generation list, and its tp_traverse is safe to
196 call. An object transitions to GC_REACHABLE when PyObject_GC_Track
197 is called.
198
199 During a collection, gc_refs can temporarily take on other states:
200
201 >= 0
202 At the start of a collection, update_refs() copies the true refcount
203 to gc_refs, for each object in the generation being collected.
204 subtract_refs() then adjusts gc_refs so that it equals the number of
205 times an object is referenced directly from outside the generation
206 being collected.
207 gc_refs remains >= 0 throughout these steps.
208
209 GC_TENTATIVELY_UNREACHABLE
210 move_unreachable() then moves objects not reachable (whether directly or
211 indirectly) from outside the generation into an "unreachable" set.
212 Objects that are found to be reachable have gc_refs set to GC_REACHABLE
213 again. Objects that are found to be unreachable have gc_refs set to
214 GC_TENTATIVELY_UNREACHABLE. It's "tentatively" because the pass doing
215 this can't be sure until it ends, and GC_TENTATIVELY_UNREACHABLE may
216 transition back to GC_REACHABLE.
217
218 Only objects with GC_TENTATIVELY_UNREACHABLE still set are candidates
219 for collection. If it's decided not to collect such an object (e.g.,
220 it has a __del__ method), its gc_refs is restored to GC_REACHABLE again.
221 ----------------------------------------------------------------------------
222 */
223 #define GC_UNTRACKED _PyGC_REFS_UNTRACKED
224 #define GC_REACHABLE _PyGC_REFS_REACHABLE
225 #define GC_TENTATIVELY_UNREACHABLE _PyGC_REFS_TENTATIVELY_UNREACHABLE
226
227 #define IS_TRACKED(o) (_PyGC_REFS(o) != GC_UNTRACKED)
228 #define IS_REACHABLE(o) (_PyGC_REFS(o) == GC_REACHABLE)
229 #define IS_TENTATIVELY_UNREACHABLE(o) ( \
230 _PyGC_REFS(o) == GC_TENTATIVELY_UNREACHABLE)
231
232 /*** list functions ***/
233
234 static void
gc_list_init(PyGC_Head * list)235 gc_list_init(PyGC_Head *list)
236 {
237 list->gc.gc_prev = list;
238 list->gc.gc_next = list;
239 }
240
241 static int
gc_list_is_empty(PyGC_Head * list)242 gc_list_is_empty(PyGC_Head *list)
243 {
244 return (list->gc.gc_next == list);
245 }
246
247 #if 0
248 /* This became unused after gc_list_move() was introduced. */
249 /* Append `node` to `list`. */
250 static void
251 gc_list_append(PyGC_Head *node, PyGC_Head *list)
252 {
253 node->gc.gc_next = list;
254 node->gc.gc_prev = list->gc.gc_prev;
255 node->gc.gc_prev->gc.gc_next = node;
256 list->gc.gc_prev = node;
257 }
258 #endif
259
260 /* Remove `node` from the gc list it's currently in. */
261 static void
gc_list_remove(PyGC_Head * node)262 gc_list_remove(PyGC_Head *node)
263 {
264 node->gc.gc_prev->gc.gc_next = node->gc.gc_next;
265 node->gc.gc_next->gc.gc_prev = node->gc.gc_prev;
266 node->gc.gc_next = NULL; /* object is not currently tracked */
267 }
268
269 /* Move `node` from the gc list it's currently in (which is not explicitly
270 * named here) to the end of `list`. This is semantically the same as
271 * gc_list_remove(node) followed by gc_list_append(node, list).
272 */
273 static void
gc_list_move(PyGC_Head * node,PyGC_Head * list)274 gc_list_move(PyGC_Head *node, PyGC_Head *list)
275 {
276 PyGC_Head *new_prev;
277 PyGC_Head *current_prev = node->gc.gc_prev;
278 PyGC_Head *current_next = node->gc.gc_next;
279 /* Unlink from current list. */
280 current_prev->gc.gc_next = current_next;
281 current_next->gc.gc_prev = current_prev;
282 /* Relink at end of new list. */
283 new_prev = node->gc.gc_prev = list->gc.gc_prev;
284 new_prev->gc.gc_next = list->gc.gc_prev = node;
285 node->gc.gc_next = list;
286 }
287
288 /* append list `from` onto list `to`; `from` becomes an empty list */
289 static void
gc_list_merge(PyGC_Head * from,PyGC_Head * to)290 gc_list_merge(PyGC_Head *from, PyGC_Head *to)
291 {
292 PyGC_Head *tail;
293 assert(from != to);
294 if (!gc_list_is_empty(from)) {
295 tail = to->gc.gc_prev;
296 tail->gc.gc_next = from->gc.gc_next;
297 tail->gc.gc_next->gc.gc_prev = tail;
298 to->gc.gc_prev = from->gc.gc_prev;
299 to->gc.gc_prev->gc.gc_next = to;
300 }
301 gc_list_init(from);
302 }
303
304 static Py_ssize_t
gc_list_size(PyGC_Head * list)305 gc_list_size(PyGC_Head *list)
306 {
307 PyGC_Head *gc;
308 Py_ssize_t n = 0;
309 for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) {
310 n++;
311 }
312 return n;
313 }
314
315 /* Append objects in a GC list to a Python list.
316 * Return 0 if all OK, < 0 if error (out of memory for list).
317 */
318 static int
append_objects(PyObject * py_list,PyGC_Head * gc_list)319 append_objects(PyObject *py_list, PyGC_Head *gc_list)
320 {
321 PyGC_Head *gc;
322 for (gc = gc_list->gc.gc_next; gc != gc_list; gc = gc->gc.gc_next) {
323 PyObject *op = FROM_GC(gc);
324 if (op != py_list) {
325 if (PyList_Append(py_list, op)) {
326 return -1; /* exception */
327 }
328 }
329 }
330 return 0;
331 }
332
333 /*** end of list stuff ***/
334
335
336 /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects
337 * in containers, and is GC_REACHABLE for all tracked gc objects not in
338 * containers.
339 */
340 static void
update_refs(PyGC_Head * containers)341 update_refs(PyGC_Head *containers)
342 {
343 PyGC_Head *gc = containers->gc.gc_next;
344 for (; gc != containers; gc = gc->gc.gc_next) {
345 assert(_PyGCHead_REFS(gc) == GC_REACHABLE);
346 _PyGCHead_SET_REFS(gc, Py_REFCNT(FROM_GC(gc)));
347 /* Python's cyclic gc should never see an incoming refcount
348 * of 0: if something decref'ed to 0, it should have been
349 * deallocated immediately at that time.
350 * Possible cause (if the assert triggers): a tp_dealloc
351 * routine left a gc-aware object tracked during its teardown
352 * phase, and did something-- or allowed something to happen --
353 * that called back into Python. gc can trigger then, and may
354 * see the still-tracked dying object. Before this assert
355 * was added, such mistakes went on to allow gc to try to
356 * delete the object again. In a debug build, that caused
357 * a mysterious segfault, when _Py_ForgetReference tried
358 * to remove the object from the doubly-linked list of all
359 * objects a second time. In a release build, an actual
360 * double deallocation occurred, which leads to corruption
361 * of the allocator's internal bookkeeping pointers. That's
362 * so serious that maybe this should be a release-build
363 * check instead of an assert?
364 */
365 assert(_PyGCHead_REFS(gc) != 0);
366 }
367 }
368
369 /* A traversal callback for subtract_refs. */
370 static int
visit_decref(PyObject * op,void * data)371 visit_decref(PyObject *op, void *data)
372 {
373 assert(op != NULL);
374 if (PyObject_IS_GC(op)) {
375 PyGC_Head *gc = AS_GC(op);
376 /* We're only interested in gc_refs for objects in the
377 * generation being collected, which can be recognized
378 * because only they have positive gc_refs.
379 */
380 assert(_PyGCHead_REFS(gc) != 0); /* else refcount was too small */
381 if (_PyGCHead_REFS(gc) > 0)
382 _PyGCHead_DECREF(gc);
383 }
384 return 0;
385 }
386
387 /* Subtract internal references from gc_refs. After this, gc_refs is >= 0
388 * for all objects in containers, and is GC_REACHABLE for all tracked gc
389 * objects not in containers. The ones with gc_refs > 0 are directly
390 * reachable from outside containers, and so can't be collected.
391 */
392 static void
subtract_refs(PyGC_Head * containers)393 subtract_refs(PyGC_Head *containers)
394 {
395 traverseproc traverse;
396 PyGC_Head *gc = containers->gc.gc_next;
397 for (; gc != containers; gc=gc->gc.gc_next) {
398 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
399 (void) traverse(FROM_GC(gc),
400 (visitproc)visit_decref,
401 NULL);
402 }
403 }
404
405 /* A traversal callback for move_unreachable. */
406 static int
visit_reachable(PyObject * op,PyGC_Head * reachable)407 visit_reachable(PyObject *op, PyGC_Head *reachable)
408 {
409 if (PyObject_IS_GC(op)) {
410 PyGC_Head *gc = AS_GC(op);
411 const Py_ssize_t gc_refs = _PyGCHead_REFS(gc);
412
413 if (gc_refs == 0) {
414 /* This is in move_unreachable's 'young' list, but
415 * the traversal hasn't yet gotten to it. All
416 * we need to do is tell move_unreachable that it's
417 * reachable.
418 */
419 _PyGCHead_SET_REFS(gc, 1);
420 }
421 else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) {
422 /* This had gc_refs = 0 when move_unreachable got
423 * to it, but turns out it's reachable after all.
424 * Move it back to move_unreachable's 'young' list,
425 * and move_unreachable will eventually get to it
426 * again.
427 */
428 gc_list_move(gc, reachable);
429 _PyGCHead_SET_REFS(gc, 1);
430 }
431 /* Else there's nothing to do.
432 * If gc_refs > 0, it must be in move_unreachable's 'young'
433 * list, and move_unreachable will eventually get to it.
434 * If gc_refs == GC_REACHABLE, it's either in some other
435 * generation so we don't care about it, or move_unreachable
436 * already dealt with it.
437 * If gc_refs == GC_UNTRACKED, it must be ignored.
438 */
439 else {
440 assert(gc_refs > 0
441 || gc_refs == GC_REACHABLE
442 || gc_refs == GC_UNTRACKED);
443 }
444 }
445 return 0;
446 }
447
448 /* Move the unreachable objects from young to unreachable. After this,
449 * all objects in young have gc_refs = GC_REACHABLE, and all objects in
450 * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked
451 * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
452 * All objects in young after this are directly or indirectly reachable
453 * from outside the original young; and all objects in unreachable are
454 * not.
455 */
456 static void
move_unreachable(PyGC_Head * young,PyGC_Head * unreachable)457 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
458 {
459 PyGC_Head *gc = young->gc.gc_next;
460
461 /* Invariants: all objects "to the left" of us in young have gc_refs
462 * = GC_REACHABLE, and are indeed reachable (directly or indirectly)
463 * from outside the young list as it was at entry. All other objects
464 * from the original young "to the left" of us are in unreachable now,
465 * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the
466 * left of us in 'young' now have been scanned, and no objects here
467 * or to the right have been scanned yet.
468 */
469
470 while (gc != young) {
471 PyGC_Head *next;
472
473 if (_PyGCHead_REFS(gc)) {
474 /* gc is definitely reachable from outside the
475 * original 'young'. Mark it as such, and traverse
476 * its pointers to find any other objects that may
477 * be directly reachable from it. Note that the
478 * call to tp_traverse may append objects to young,
479 * so we have to wait until it returns to determine
480 * the next object to visit.
481 */
482 PyObject *op = FROM_GC(gc);
483 traverseproc traverse = Py_TYPE(op)->tp_traverse;
484 assert(_PyGCHead_REFS(gc) > 0);
485 _PyGCHead_SET_REFS(gc, GC_REACHABLE);
486 (void) traverse(op,
487 (visitproc)visit_reachable,
488 (void *)young);
489 next = gc->gc.gc_next;
490 if (PyTuple_CheckExact(op)) {
491 _PyTuple_MaybeUntrack(op);
492 }
493 }
494 else {
495 /* This *may* be unreachable. To make progress,
496 * assume it is. gc isn't directly reachable from
497 * any object we've already traversed, but may be
498 * reachable from an object we haven't gotten to yet.
499 * visit_reachable will eventually move gc back into
500 * young if that's so, and we'll see it again.
501 */
502 next = gc->gc.gc_next;
503 gc_list_move(gc, unreachable);
504 _PyGCHead_SET_REFS(gc, GC_TENTATIVELY_UNREACHABLE);
505 }
506 gc = next;
507 }
508 }
509
510 /* Try to untrack all currently tracked dictionaries */
511 static void
untrack_dicts(PyGC_Head * head)512 untrack_dicts(PyGC_Head *head)
513 {
514 PyGC_Head *next, *gc = head->gc.gc_next;
515 while (gc != head) {
516 PyObject *op = FROM_GC(gc);
517 next = gc->gc.gc_next;
518 if (PyDict_CheckExact(op))
519 _PyDict_MaybeUntrack(op);
520 gc = next;
521 }
522 }
523
524 /* Return true if object has a pre-PEP 442 finalization method. */
525 static int
has_legacy_finalizer(PyObject * op)526 has_legacy_finalizer(PyObject *op)
527 {
528 return op->ob_type->tp_del != NULL;
529 }
530
531 /* Move the objects in unreachable with tp_del slots into `finalizers`.
532 * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the
533 * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE.
534 */
535 static void
move_legacy_finalizers(PyGC_Head * unreachable,PyGC_Head * finalizers)536 move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
537 {
538 PyGC_Head *gc;
539 PyGC_Head *next;
540
541 /* March over unreachable. Move objects with finalizers into
542 * `finalizers`.
543 */
544 for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
545 PyObject *op = FROM_GC(gc);
546
547 assert(IS_TENTATIVELY_UNREACHABLE(op));
548 next = gc->gc.gc_next;
549
550 if (has_legacy_finalizer(op)) {
551 gc_list_move(gc, finalizers);
552 _PyGCHead_SET_REFS(gc, GC_REACHABLE);
553 }
554 }
555 }
556
557 /* A traversal callback for move_legacy_finalizer_reachable. */
558 static int
visit_move(PyObject * op,PyGC_Head * tolist)559 visit_move(PyObject *op, PyGC_Head *tolist)
560 {
561 if (PyObject_IS_GC(op)) {
562 if (IS_TENTATIVELY_UNREACHABLE(op)) {
563 PyGC_Head *gc = AS_GC(op);
564 gc_list_move(gc, tolist);
565 _PyGCHead_SET_REFS(gc, GC_REACHABLE);
566 }
567 }
568 return 0;
569 }
570
571 /* Move objects that are reachable from finalizers, from the unreachable set
572 * into finalizers set.
573 */
574 static void
move_legacy_finalizer_reachable(PyGC_Head * finalizers)575 move_legacy_finalizer_reachable(PyGC_Head *finalizers)
576 {
577 traverseproc traverse;
578 PyGC_Head *gc = finalizers->gc.gc_next;
579 for (; gc != finalizers; gc = gc->gc.gc_next) {
580 /* Note that the finalizers list may grow during this. */
581 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
582 (void) traverse(FROM_GC(gc),
583 (visitproc)visit_move,
584 (void *)finalizers);
585 }
586 }
587
588 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
589 * callback, invoke it if necessary. Note that it's possible for such
590 * weakrefs to be outside the unreachable set -- indeed, those are precisely
591 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for
592 * overview & some details. Some weakrefs with callbacks may be reclaimed
593 * directly by this routine; the number reclaimed is the return value. Other
594 * weakrefs with callbacks may be moved into the `old` generation. Objects
595 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
596 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns,
597 * no object in `unreachable` is weakly referenced anymore.
598 */
599 static int
handle_weakrefs(PyGC_Head * unreachable,PyGC_Head * old)600 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
601 {
602 PyGC_Head *gc;
603 PyObject *op; /* generally FROM_GC(gc) */
604 PyWeakReference *wr; /* generally a cast of op */
605 PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */
606 PyGC_Head *next;
607 int num_freed = 0;
608
609 gc_list_init(&wrcb_to_call);
610
611 /* Clear all weakrefs to the objects in unreachable. If such a weakref
612 * also has a callback, move it into `wrcb_to_call` if the callback
613 * needs to be invoked. Note that we cannot invoke any callbacks until
614 * all weakrefs to unreachable objects are cleared, lest the callback
615 * resurrect an unreachable object via a still-active weakref. We
616 * make another pass over wrcb_to_call, invoking callbacks, after this
617 * pass completes.
618 */
619 for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
620 PyWeakReference **wrlist;
621
622 op = FROM_GC(gc);
623 assert(IS_TENTATIVELY_UNREACHABLE(op));
624 next = gc->gc.gc_next;
625
626 if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
627 continue;
628
629 /* It supports weakrefs. Does it have any? */
630 wrlist = (PyWeakReference **)
631 PyObject_GET_WEAKREFS_LISTPTR(op);
632
633 /* `op` may have some weakrefs. March over the list, clear
634 * all the weakrefs, and move the weakrefs with callbacks
635 * that must be called into wrcb_to_call.
636 */
637 for (wr = *wrlist; wr != NULL; wr = *wrlist) {
638 PyGC_Head *wrasgc; /* AS_GC(wr) */
639
640 /* _PyWeakref_ClearRef clears the weakref but leaves
641 * the callback pointer intact. Obscure: it also
642 * changes *wrlist.
643 */
644 assert(wr->wr_object == op);
645 _PyWeakref_ClearRef(wr);
646 assert(wr->wr_object == Py_None);
647 if (wr->wr_callback == NULL)
648 continue; /* no callback */
649
650 /* Headache time. `op` is going away, and is weakly referenced by
651 * `wr`, which has a callback. Should the callback be invoked? If wr
652 * is also trash, no:
653 *
654 * 1. There's no need to call it. The object and the weakref are
655 * both going away, so it's legitimate to pretend the weakref is
656 * going away first. The user has to ensure a weakref outlives its
657 * referent if they want a guarantee that the wr callback will get
658 * invoked.
659 *
660 * 2. It may be catastrophic to call it. If the callback is also in
661 * cyclic trash (CT), then although the CT is unreachable from
662 * outside the current generation, CT may be reachable from the
663 * callback. Then the callback could resurrect insane objects.
664 *
665 * Since the callback is never needed and may be unsafe in this case,
666 * wr is simply left in the unreachable set. Note that because we
667 * already called _PyWeakref_ClearRef(wr), its callback will never
668 * trigger.
669 *
670 * OTOH, if wr isn't part of CT, we should invoke the callback: the
671 * weakref outlived the trash. Note that since wr isn't CT in this
672 * case, its callback can't be CT either -- wr acted as an external
673 * root to this generation, and therefore its callback did too. So
674 * nothing in CT is reachable from the callback either, so it's hard
675 * to imagine how calling it later could create a problem for us. wr
676 * is moved to wrcb_to_call in this case.
677 */
678 if (IS_TENTATIVELY_UNREACHABLE(wr))
679 continue;
680 assert(IS_REACHABLE(wr));
681
682 /* Create a new reference so that wr can't go away
683 * before we can process it again.
684 */
685 Py_INCREF(wr);
686
687 /* Move wr to wrcb_to_call, for the next pass. */
688 wrasgc = AS_GC(wr);
689 assert(wrasgc != next); /* wrasgc is reachable, but
690 next isn't, so they can't
691 be the same */
692 gc_list_move(wrasgc, &wrcb_to_call);
693 }
694 }
695
696 /* Invoke the callbacks we decided to honor. It's safe to invoke them
697 * because they can't reference unreachable objects.
698 */
699 while (! gc_list_is_empty(&wrcb_to_call)) {
700 PyObject *temp;
701 PyObject *callback;
702
703 gc = wrcb_to_call.gc.gc_next;
704 op = FROM_GC(gc);
705 assert(IS_REACHABLE(op));
706 assert(PyWeakref_Check(op));
707 wr = (PyWeakReference *)op;
708 callback = wr->wr_callback;
709 assert(callback != NULL);
710
711 /* copy-paste of weakrefobject.c's handle_callback() */
712 temp = PyObject_CallFunctionObjArgs(callback, wr, NULL);
713 if (temp == NULL)
714 PyErr_WriteUnraisable(callback);
715 else
716 Py_DECREF(temp);
717
718 /* Give up the reference we created in the first pass. When
719 * op's refcount hits 0 (which it may or may not do right now),
720 * op's tp_dealloc will decref op->wr_callback too. Note
721 * that the refcount probably will hit 0 now, and because this
722 * weakref was reachable to begin with, gc didn't already
723 * add it to its count of freed objects. Example: a reachable
724 * weak value dict maps some key to this reachable weakref.
725 * The callback removes this key->weakref mapping from the
726 * dict, leaving no other references to the weakref (excepting
727 * ours).
728 */
729 Py_DECREF(op);
730 if (wrcb_to_call.gc.gc_next == gc) {
731 /* object is still alive -- move it */
732 gc_list_move(gc, old);
733 }
734 else
735 ++num_freed;
736 }
737
738 return num_freed;
739 }
740
741 static void
debug_cycle(const char * msg,PyObject * op)742 debug_cycle(const char *msg, PyObject *op)
743 {
744 PySys_FormatStderr("gc: %s <%s %p>\n",
745 msg, Py_TYPE(op)->tp_name, op);
746 }
747
748 /* Handle uncollectable garbage (cycles with tp_del slots, and stuff reachable
749 * only from such cycles).
750 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
751 * garbage list (a Python list), else only the objects in finalizers with
752 * __del__ methods are appended to garbage. All objects in finalizers are
753 * merged into the old list regardless.
754 * Returns 0 if all OK, <0 on error (out of memory to grow the garbage list).
755 * The finalizers list is made empty on a successful return.
756 */
757 static int
handle_legacy_finalizers(PyGC_Head * finalizers,PyGC_Head * old)758 handle_legacy_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
759 {
760 PyGC_Head *gc = finalizers->gc.gc_next;
761
762 if (garbage == NULL) {
763 garbage = PyList_New(0);
764 if (garbage == NULL)
765 Py_FatalError("gc couldn't create gc.garbage list");
766 }
767 for (; gc != finalizers; gc = gc->gc.gc_next) {
768 PyObject *op = FROM_GC(gc);
769
770 if ((debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
771 if (PyList_Append(garbage, op) < 0)
772 return -1;
773 }
774 }
775
776 gc_list_merge(finalizers, old);
777 return 0;
778 }
779
780 /* Run first-time finalizers (if any) on all the objects in collectable.
781 * Note that this may remove some (or even all) of the objects from the
782 * list, due to refcounts falling to 0.
783 */
784 static void
finalize_garbage(PyGC_Head * collectable)785 finalize_garbage(PyGC_Head *collectable)
786 {
787 destructor finalize;
788 PyGC_Head seen;
789
790 /* While we're going through the loop, `finalize(op)` may cause op, or
791 * other objects, to be reclaimed via refcounts falling to zero. So
792 * there's little we can rely on about the structure of the input
793 * `collectable` list across iterations. For safety, we always take the
794 * first object in that list and move it to a temporary `seen` list.
795 * If objects vanish from the `collectable` and `seen` lists we don't
796 * care.
797 */
798 gc_list_init(&seen);
799
800 while (!gc_list_is_empty(collectable)) {
801 PyGC_Head *gc = collectable->gc.gc_next;
802 PyObject *op = FROM_GC(gc);
803 gc_list_move(gc, &seen);
804 if (!_PyGCHead_FINALIZED(gc) &&
805 PyType_HasFeature(Py_TYPE(op), Py_TPFLAGS_HAVE_FINALIZE) &&
806 (finalize = Py_TYPE(op)->tp_finalize) != NULL) {
807 _PyGCHead_SET_FINALIZED(gc, 1);
808 Py_INCREF(op);
809 finalize(op);
810 Py_DECREF(op);
811 }
812 }
813 gc_list_merge(&seen, collectable);
814 }
815
816 /* Walk the collectable list and check that they are really unreachable
817 from the outside (some objects could have been resurrected by a
818 finalizer). */
819 static int
check_garbage(PyGC_Head * collectable)820 check_garbage(PyGC_Head *collectable)
821 {
822 PyGC_Head *gc;
823 for (gc = collectable->gc.gc_next; gc != collectable;
824 gc = gc->gc.gc_next) {
825 _PyGCHead_SET_REFS(gc, Py_REFCNT(FROM_GC(gc)));
826 assert(_PyGCHead_REFS(gc) != 0);
827 }
828 subtract_refs(collectable);
829 for (gc = collectable->gc.gc_next; gc != collectable;
830 gc = gc->gc.gc_next) {
831 assert(_PyGCHead_REFS(gc) >= 0);
832 if (_PyGCHead_REFS(gc) != 0)
833 return -1;
834 }
835 return 0;
836 }
837
838 static void
revive_garbage(PyGC_Head * collectable)839 revive_garbage(PyGC_Head *collectable)
840 {
841 PyGC_Head *gc;
842 for (gc = collectable->gc.gc_next; gc != collectable;
843 gc = gc->gc.gc_next) {
844 _PyGCHead_SET_REFS(gc, GC_REACHABLE);
845 }
846 }
847
848 /* Break reference cycles by clearing the containers involved. This is
849 * tricky business as the lists can be changing and we don't know which
850 * objects may be freed. It is possible I screwed something up here.
851 */
852 static void
delete_garbage(PyGC_Head * collectable,PyGC_Head * old)853 delete_garbage(PyGC_Head *collectable, PyGC_Head *old)
854 {
855 inquiry clear;
856
857 while (!gc_list_is_empty(collectable)) {
858 PyGC_Head *gc = collectable->gc.gc_next;
859 PyObject *op = FROM_GC(gc);
860
861 if (debug & DEBUG_SAVEALL) {
862 PyList_Append(garbage, op);
863 }
864 else {
865 if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
866 Py_INCREF(op);
867 clear(op);
868 Py_DECREF(op);
869 }
870 }
871 if (collectable->gc.gc_next == gc) {
872 /* object is still alive, move it, it may die later */
873 gc_list_move(gc, old);
874 _PyGCHead_SET_REFS(gc, GC_REACHABLE);
875 }
876 }
877 }
878
879 /* Clear all free lists
880 * All free lists are cleared during the collection of the highest generation.
881 * Allocated items in the free list may keep a pymalloc arena occupied.
882 * Clearing the free lists may give back memory to the OS earlier.
883 */
884 static void
clear_freelists(void)885 clear_freelists(void)
886 {
887 (void)PyMethod_ClearFreeList();
888 (void)PyFrame_ClearFreeList();
889 (void)PyCFunction_ClearFreeList();
890 (void)PyTuple_ClearFreeList();
891 (void)PyUnicode_ClearFreeList();
892 (void)PyFloat_ClearFreeList();
893 (void)PyList_ClearFreeList();
894 (void)PyDict_ClearFreeList();
895 (void)PySet_ClearFreeList();
896 (void)PyAsyncGen_ClearFreeLists();
897 }
898
899 /* This is the main function. Read this to understand how the
900 * collection process works. */
901 static Py_ssize_t
collect(int generation,Py_ssize_t * n_collected,Py_ssize_t * n_uncollectable,int nofail)902 collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
903 int nofail)
904 {
905 int i;
906 Py_ssize_t m = 0; /* # objects collected */
907 Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
908 PyGC_Head *young; /* the generation we are examining */
909 PyGC_Head *old; /* next older generation */
910 PyGC_Head unreachable; /* non-problematic unreachable trash */
911 PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
912 PyGC_Head *gc;
913 _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */
914
915 struct gc_generation_stats *stats = &generation_stats[generation];
916
917 if (debug & DEBUG_STATS) {
918 PySys_WriteStderr("gc: collecting generation %d...\n",
919 generation);
920 PySys_WriteStderr("gc: objects in each generation:");
921 for (i = 0; i < NUM_GENERATIONS; i++)
922 PySys_FormatStderr(" %zd",
923 gc_list_size(GEN_HEAD(i)));
924 t1 = _PyTime_GetMonotonicClock();
925
926 PySys_WriteStderr("\n");
927 }
928
929 if (PyDTrace_GC_START_ENABLED())
930 PyDTrace_GC_START(generation);
931
932 /* update collection and allocation counters */
933 if (generation+1 < NUM_GENERATIONS)
934 generations[generation+1].count += 1;
935 for (i = 0; i <= generation; i++)
936 generations[i].count = 0;
937
938 /* merge younger generations with one we are currently collecting */
939 for (i = 0; i < generation; i++) {
940 gc_list_merge(GEN_HEAD(i), GEN_HEAD(generation));
941 }
942
943 /* handy references */
944 young = GEN_HEAD(generation);
945 if (generation < NUM_GENERATIONS-1)
946 old = GEN_HEAD(generation+1);
947 else
948 old = young;
949
950 /* Using ob_refcnt and gc_refs, calculate which objects in the
951 * container set are reachable from outside the set (i.e., have a
952 * refcount greater than 0 when all the references within the
953 * set are taken into account).
954 */
955 update_refs(young);
956 subtract_refs(young);
957
958 /* Leave everything reachable from outside young in young, and move
959 * everything else (in young) to unreachable.
960 * NOTE: This used to move the reachable objects into a reachable
961 * set instead. But most things usually turn out to be reachable,
962 * so it's more efficient to move the unreachable things.
963 */
964 gc_list_init(&unreachable);
965 move_unreachable(young, &unreachable);
966
967 /* Move reachable objects to next generation. */
968 if (young != old) {
969 if (generation == NUM_GENERATIONS - 2) {
970 long_lived_pending += gc_list_size(young);
971 }
972 gc_list_merge(young, old);
973 }
974 else {
975 /* We only untrack dicts in full collections, to avoid quadratic
976 dict build-up. See issue #14775. */
977 untrack_dicts(young);
978 long_lived_pending = 0;
979 long_lived_total = gc_list_size(young);
980 }
981
982 /* All objects in unreachable are trash, but objects reachable from
983 * legacy finalizers (e.g. tp_del) can't safely be deleted.
984 */
985 gc_list_init(&finalizers);
986 move_legacy_finalizers(&unreachable, &finalizers);
987 /* finalizers contains the unreachable objects with a legacy finalizer;
988 * unreachable objects reachable *from* those are also uncollectable,
989 * and we move those into the finalizers list too.
990 */
991 move_legacy_finalizer_reachable(&finalizers);
992
993 /* Collect statistics on collectable objects found and print
994 * debugging information.
995 */
996 for (gc = unreachable.gc.gc_next; gc != &unreachable;
997 gc = gc->gc.gc_next) {
998 m++;
999 if (debug & DEBUG_COLLECTABLE) {
1000 debug_cycle("collectable", FROM_GC(gc));
1001 }
1002 }
1003
1004 /* Clear weakrefs and invoke callbacks as necessary. */
1005 m += handle_weakrefs(&unreachable, old);
1006
1007 /* Call tp_finalize on objects which have one. */
1008 finalize_garbage(&unreachable);
1009
1010 if (check_garbage(&unreachable)) {
1011 revive_garbage(&unreachable);
1012 gc_list_merge(&unreachable, old);
1013 }
1014 else {
1015 /* Call tp_clear on objects in the unreachable set. This will cause
1016 * the reference cycles to be broken. It may also cause some objects
1017 * in finalizers to be freed.
1018 */
1019 delete_garbage(&unreachable, old);
1020 }
1021
1022 /* Collect statistics on uncollectable objects found and print
1023 * debugging information. */
1024 for (gc = finalizers.gc.gc_next;
1025 gc != &finalizers;
1026 gc = gc->gc.gc_next) {
1027 n++;
1028 if (debug & DEBUG_UNCOLLECTABLE)
1029 debug_cycle("uncollectable", FROM_GC(gc));
1030 }
1031 if (debug & DEBUG_STATS) {
1032 _PyTime_t t2 = _PyTime_GetMonotonicClock();
1033
1034 if (m == 0 && n == 0)
1035 PySys_WriteStderr("gc: done");
1036 else
1037 PySys_FormatStderr(
1038 "gc: done, %zd unreachable, %zd uncollectable",
1039 n+m, n);
1040 PySys_WriteStderr(", %.4fs elapsed\n",
1041 _PyTime_AsSecondsDouble(t2 - t1));
1042 }
1043
1044 /* Append instances in the uncollectable set to a Python
1045 * reachable list of garbage. The programmer has to deal with
1046 * this if they insist on creating this type of structure.
1047 */
1048 (void)handle_legacy_finalizers(&finalizers, old);
1049
1050 /* Clear free list only during the collection of the highest
1051 * generation */
1052 if (generation == NUM_GENERATIONS-1) {
1053 clear_freelists();
1054 }
1055
1056 if (PyErr_Occurred()) {
1057 if (nofail) {
1058 PyErr_Clear();
1059 }
1060 else {
1061 if (gc_str == NULL)
1062 gc_str = PyUnicode_FromString("garbage collection");
1063 PyErr_WriteUnraisable(gc_str);
1064 Py_FatalError("unexpected exception during garbage collection");
1065 }
1066 }
1067
1068 /* Update stats */
1069 if (n_collected)
1070 *n_collected = m;
1071 if (n_uncollectable)
1072 *n_uncollectable = n;
1073 stats->collections++;
1074 stats->collected += m;
1075 stats->uncollectable += n;
1076
1077 if (PyDTrace_GC_DONE_ENABLED())
1078 PyDTrace_GC_DONE(n+m);
1079
1080 return n+m;
1081 }
1082
1083 /* Invoke progress callbacks to notify clients that garbage collection
1084 * is starting or stopping
1085 */
1086 static void
invoke_gc_callback(const char * phase,int generation,Py_ssize_t collected,Py_ssize_t uncollectable)1087 invoke_gc_callback(const char *phase, int generation,
1088 Py_ssize_t collected, Py_ssize_t uncollectable)
1089 {
1090 Py_ssize_t i;
1091 PyObject *info = NULL;
1092
1093 /* we may get called very early */
1094 if (callbacks == NULL)
1095 return;
1096 /* The local variable cannot be rebound, check it for sanity */
1097 assert(callbacks != NULL && PyList_CheckExact(callbacks));
1098 if (PyList_GET_SIZE(callbacks) != 0) {
1099 info = Py_BuildValue("{sisnsn}",
1100 "generation", generation,
1101 "collected", collected,
1102 "uncollectable", uncollectable);
1103 if (info == NULL) {
1104 PyErr_WriteUnraisable(NULL);
1105 return;
1106 }
1107 }
1108 for (i=0; i<PyList_GET_SIZE(callbacks); i++) {
1109 PyObject *r, *cb = PyList_GET_ITEM(callbacks, i);
1110 Py_INCREF(cb); /* make sure cb doesn't go away */
1111 r = PyObject_CallFunction(cb, "sO", phase, info);
1112 Py_XDECREF(r);
1113 if (r == NULL)
1114 PyErr_WriteUnraisable(cb);
1115 Py_DECREF(cb);
1116 }
1117 Py_XDECREF(info);
1118 }
1119
1120 /* Perform garbage collection of a generation and invoke
1121 * progress callbacks.
1122 */
1123 static Py_ssize_t
collect_with_callback(int generation)1124 collect_with_callback(int generation)
1125 {
1126 Py_ssize_t result, collected, uncollectable;
1127 invoke_gc_callback("start", generation, 0, 0);
1128 result = collect(generation, &collected, &uncollectable, 0);
1129 invoke_gc_callback("stop", generation, collected, uncollectable);
1130 return result;
1131 }
1132
1133 static Py_ssize_t
collect_generations(void)1134 collect_generations(void)
1135 {
1136 int i;
1137 Py_ssize_t n = 0;
1138
1139 /* Find the oldest generation (highest numbered) where the count
1140 * exceeds the threshold. Objects in the that generation and
1141 * generations younger than it will be collected. */
1142 for (i = NUM_GENERATIONS-1; i >= 0; i--) {
1143 if (generations[i].count > generations[i].threshold) {
1144 /* Avoid quadratic performance degradation in number
1145 of tracked objects. See comments at the beginning
1146 of this file, and issue #4074.
1147 */
1148 if (i == NUM_GENERATIONS - 1
1149 && long_lived_pending < long_lived_total / 4)
1150 continue;
1151 n = collect_with_callback(i);
1152 break;
1153 }
1154 }
1155 return n;
1156 }
1157
1158 PyDoc_STRVAR(gc_enable__doc__,
1159 "enable() -> None\n"
1160 "\n"
1161 "Enable automatic garbage collection.\n");
1162
1163 static PyObject *
gc_enable(PyObject * self,PyObject * noargs)1164 gc_enable(PyObject *self, PyObject *noargs)
1165 {
1166 enabled = 1;
1167 Py_INCREF(Py_None);
1168 return Py_None;
1169 }
1170
1171 PyDoc_STRVAR(gc_disable__doc__,
1172 "disable() -> None\n"
1173 "\n"
1174 "Disable automatic garbage collection.\n");
1175
1176 static PyObject *
gc_disable(PyObject * self,PyObject * noargs)1177 gc_disable(PyObject *self, PyObject *noargs)
1178 {
1179 enabled = 0;
1180 Py_INCREF(Py_None);
1181 return Py_None;
1182 }
1183
1184 PyDoc_STRVAR(gc_isenabled__doc__,
1185 "isenabled() -> status\n"
1186 "\n"
1187 "Returns true if automatic garbage collection is enabled.\n");
1188
1189 static PyObject *
gc_isenabled(PyObject * self,PyObject * noargs)1190 gc_isenabled(PyObject *self, PyObject *noargs)
1191 {
1192 return PyBool_FromLong((long)enabled);
1193 }
1194
1195 PyDoc_STRVAR(gc_collect__doc__,
1196 "collect([generation]) -> n\n"
1197 "\n"
1198 "With no arguments, run a full collection. The optional argument\n"
1199 "may be an integer specifying which generation to collect. A ValueError\n"
1200 "is raised if the generation number is invalid.\n\n"
1201 "The number of unreachable objects is returned.\n");
1202
1203 static PyObject *
gc_collect(PyObject * self,PyObject * args,PyObject * kws)1204 gc_collect(PyObject *self, PyObject *args, PyObject *kws)
1205 {
1206 static char *keywords[] = {"generation", NULL};
1207 int genarg = NUM_GENERATIONS - 1;
1208 Py_ssize_t n;
1209
1210 if (!PyArg_ParseTupleAndKeywords(args, kws, "|i", keywords, &genarg))
1211 return NULL;
1212
1213 else if (genarg < 0 || genarg >= NUM_GENERATIONS) {
1214 PyErr_SetString(PyExc_ValueError, "invalid generation");
1215 return NULL;
1216 }
1217
1218 if (collecting)
1219 n = 0; /* already collecting, don't do anything */
1220 else {
1221 collecting = 1;
1222 n = collect_with_callback(genarg);
1223 collecting = 0;
1224 }
1225
1226 return PyLong_FromSsize_t(n);
1227 }
1228
1229 PyDoc_STRVAR(gc_set_debug__doc__,
1230 "set_debug(flags) -> None\n"
1231 "\n"
1232 "Set the garbage collection debugging flags. Debugging information is\n"
1233 "written to sys.stderr.\n"
1234 "\n"
1235 "flags is an integer and can have the following bits turned on:\n"
1236 "\n"
1237 " DEBUG_STATS - Print statistics during collection.\n"
1238 " DEBUG_COLLECTABLE - Print collectable objects found.\n"
1239 " DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects found.\n"
1240 " DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.\n"
1241 " DEBUG_LEAK - Debug leaking programs (everything but STATS).\n");
1242
1243 static PyObject *
gc_set_debug(PyObject * self,PyObject * args)1244 gc_set_debug(PyObject *self, PyObject *args)
1245 {
1246 if (!PyArg_ParseTuple(args, "i:set_debug", &debug))
1247 return NULL;
1248
1249 Py_INCREF(Py_None);
1250 return Py_None;
1251 }
1252
1253 PyDoc_STRVAR(gc_get_debug__doc__,
1254 "get_debug() -> flags\n"
1255 "\n"
1256 "Get the garbage collection debugging flags.\n");
1257
1258 static PyObject *
gc_get_debug(PyObject * self,PyObject * noargs)1259 gc_get_debug(PyObject *self, PyObject *noargs)
1260 {
1261 return Py_BuildValue("i", debug);
1262 }
1263
1264 PyDoc_STRVAR(gc_set_thresh__doc__,
1265 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1266 "\n"
1267 "Sets the collection thresholds. Setting threshold0 to zero disables\n"
1268 "collection.\n");
1269
1270 static PyObject *
gc_set_thresh(PyObject * self,PyObject * args)1271 gc_set_thresh(PyObject *self, PyObject *args)
1272 {
1273 int i;
1274 if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
1275 &generations[0].threshold,
1276 &generations[1].threshold,
1277 &generations[2].threshold))
1278 return NULL;
1279 for (i = 2; i < NUM_GENERATIONS; i++) {
1280 /* generations higher than 2 get the same threshold */
1281 generations[i].threshold = generations[2].threshold;
1282 }
1283
1284 Py_INCREF(Py_None);
1285 return Py_None;
1286 }
1287
1288 PyDoc_STRVAR(gc_get_thresh__doc__,
1289 "get_threshold() -> (threshold0, threshold1, threshold2)\n"
1290 "\n"
1291 "Return the current collection thresholds\n");
1292
1293 static PyObject *
gc_get_thresh(PyObject * self,PyObject * noargs)1294 gc_get_thresh(PyObject *self, PyObject *noargs)
1295 {
1296 return Py_BuildValue("(iii)",
1297 generations[0].threshold,
1298 generations[1].threshold,
1299 generations[2].threshold);
1300 }
1301
1302 PyDoc_STRVAR(gc_get_count__doc__,
1303 "get_count() -> (count0, count1, count2)\n"
1304 "\n"
1305 "Return the current collection counts\n");
1306
1307 static PyObject *
gc_get_count(PyObject * self,PyObject * noargs)1308 gc_get_count(PyObject *self, PyObject *noargs)
1309 {
1310 return Py_BuildValue("(iii)",
1311 generations[0].count,
1312 generations[1].count,
1313 generations[2].count);
1314 }
1315
1316 static int
referrersvisit(PyObject * obj,PyObject * objs)1317 referrersvisit(PyObject* obj, PyObject *objs)
1318 {
1319 Py_ssize_t i;
1320 for (i = 0; i < PyTuple_GET_SIZE(objs); i++)
1321 if (PyTuple_GET_ITEM(objs, i) == obj)
1322 return 1;
1323 return 0;
1324 }
1325
1326 static int
gc_referrers_for(PyObject * objs,PyGC_Head * list,PyObject * resultlist)1327 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1328 {
1329 PyGC_Head *gc;
1330 PyObject *obj;
1331 traverseproc traverse;
1332 for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) {
1333 obj = FROM_GC(gc);
1334 traverse = Py_TYPE(obj)->tp_traverse;
1335 if (obj == objs || obj == resultlist)
1336 continue;
1337 if (traverse(obj, (visitproc)referrersvisit, objs)) {
1338 if (PyList_Append(resultlist, obj) < 0)
1339 return 0; /* error */
1340 }
1341 }
1342 return 1; /* no error */
1343 }
1344
1345 PyDoc_STRVAR(gc_get_referrers__doc__,
1346 "get_referrers(*objs) -> list\n\
1347 Return the list of objects that directly refer to any of objs.");
1348
1349 static PyObject *
gc_get_referrers(PyObject * self,PyObject * args)1350 gc_get_referrers(PyObject *self, PyObject *args)
1351 {
1352 int i;
1353 PyObject *result = PyList_New(0);
1354 if (!result) return NULL;
1355
1356 for (i = 0; i < NUM_GENERATIONS; i++) {
1357 if (!(gc_referrers_for(args, GEN_HEAD(i), result))) {
1358 Py_DECREF(result);
1359 return NULL;
1360 }
1361 }
1362 return result;
1363 }
1364
1365 /* Append obj to list; return true if error (out of memory), false if OK. */
1366 static int
referentsvisit(PyObject * obj,PyObject * list)1367 referentsvisit(PyObject *obj, PyObject *list)
1368 {
1369 return PyList_Append(list, obj) < 0;
1370 }
1371
1372 PyDoc_STRVAR(gc_get_referents__doc__,
1373 "get_referents(*objs) -> list\n\
1374 Return the list of objects that are directly referred to by objs.");
1375
1376 static PyObject *
gc_get_referents(PyObject * self,PyObject * args)1377 gc_get_referents(PyObject *self, PyObject *args)
1378 {
1379 Py_ssize_t i;
1380 PyObject *result = PyList_New(0);
1381
1382 if (result == NULL)
1383 return NULL;
1384
1385 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
1386 traverseproc traverse;
1387 PyObject *obj = PyTuple_GET_ITEM(args, i);
1388
1389 if (! PyObject_IS_GC(obj))
1390 continue;
1391 traverse = Py_TYPE(obj)->tp_traverse;
1392 if (! traverse)
1393 continue;
1394 if (traverse(obj, (visitproc)referentsvisit, result)) {
1395 Py_DECREF(result);
1396 return NULL;
1397 }
1398 }
1399 return result;
1400 }
1401
1402 PyDoc_STRVAR(gc_get_objects__doc__,
1403 "get_objects() -> [...]\n"
1404 "\n"
1405 "Return a list of objects tracked by the collector (excluding the list\n"
1406 "returned).\n");
1407
1408 static PyObject *
gc_get_objects(PyObject * self,PyObject * noargs)1409 gc_get_objects(PyObject *self, PyObject *noargs)
1410 {
1411 int i;
1412 PyObject* result;
1413
1414 result = PyList_New(0);
1415 if (result == NULL)
1416 return NULL;
1417 for (i = 0; i < NUM_GENERATIONS; i++) {
1418 if (append_objects(result, GEN_HEAD(i))) {
1419 Py_DECREF(result);
1420 return NULL;
1421 }
1422 }
1423 return result;
1424 }
1425
1426 PyDoc_STRVAR(gc_get_stats__doc__,
1427 "get_stats() -> [...]\n"
1428 "\n"
1429 "Return a list of dictionaries containing per-generation statistics.\n");
1430
1431 static PyObject *
gc_get_stats(PyObject * self,PyObject * noargs)1432 gc_get_stats(PyObject *self, PyObject *noargs)
1433 {
1434 int i;
1435 PyObject *result;
1436 struct gc_generation_stats stats[NUM_GENERATIONS], *st;
1437
1438 /* To get consistent values despite allocations while constructing
1439 the result list, we use a snapshot of the running stats. */
1440 for (i = 0; i < NUM_GENERATIONS; i++) {
1441 stats[i] = generation_stats[i];
1442 }
1443
1444 result = PyList_New(0);
1445 if (result == NULL)
1446 return NULL;
1447
1448 for (i = 0; i < NUM_GENERATIONS; i++) {
1449 PyObject *dict;
1450 st = &stats[i];
1451 dict = Py_BuildValue("{snsnsn}",
1452 "collections", st->collections,
1453 "collected", st->collected,
1454 "uncollectable", st->uncollectable
1455 );
1456 if (dict == NULL)
1457 goto error;
1458 if (PyList_Append(result, dict)) {
1459 Py_DECREF(dict);
1460 goto error;
1461 }
1462 Py_DECREF(dict);
1463 }
1464 return result;
1465
1466 error:
1467 Py_XDECREF(result);
1468 return NULL;
1469 }
1470
1471
1472 PyDoc_STRVAR(gc_is_tracked__doc__,
1473 "is_tracked(obj) -> bool\n"
1474 "\n"
1475 "Returns true if the object is tracked by the garbage collector.\n"
1476 "Simple atomic objects will return false.\n"
1477 );
1478
1479 static PyObject *
gc_is_tracked(PyObject * self,PyObject * obj)1480 gc_is_tracked(PyObject *self, PyObject *obj)
1481 {
1482 PyObject *result;
1483
1484 if (PyObject_IS_GC(obj) && IS_TRACKED(obj))
1485 result = Py_True;
1486 else
1487 result = Py_False;
1488 Py_INCREF(result);
1489 return result;
1490 }
1491
1492
1493 PyDoc_STRVAR(gc__doc__,
1494 "This module provides access to the garbage collector for reference cycles.\n"
1495 "\n"
1496 "enable() -- Enable automatic garbage collection.\n"
1497 "disable() -- Disable automatic garbage collection.\n"
1498 "isenabled() -- Returns true if automatic collection is enabled.\n"
1499 "collect() -- Do a full collection right now.\n"
1500 "get_count() -- Return the current collection counts.\n"
1501 "get_stats() -- Return list of dictionaries containing per-generation stats.\n"
1502 "set_debug() -- Set debugging flags.\n"
1503 "get_debug() -- Get debugging flags.\n"
1504 "set_threshold() -- Set the collection thresholds.\n"
1505 "get_threshold() -- Return the current the collection thresholds.\n"
1506 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1507 "is_tracked() -- Returns true if a given object is tracked.\n"
1508 "get_referrers() -- Return the list of objects that refer to an object.\n"
1509 "get_referents() -- Return the list of objects that an object refers to.\n");
1510
1511 static PyMethodDef GcMethods[] = {
1512 {"enable", gc_enable, METH_NOARGS, gc_enable__doc__},
1513 {"disable", gc_disable, METH_NOARGS, gc_disable__doc__},
1514 {"isenabled", gc_isenabled, METH_NOARGS, gc_isenabled__doc__},
1515 {"set_debug", gc_set_debug, METH_VARARGS, gc_set_debug__doc__},
1516 {"get_debug", gc_get_debug, METH_NOARGS, gc_get_debug__doc__},
1517 {"get_count", gc_get_count, METH_NOARGS, gc_get_count__doc__},
1518 {"set_threshold", gc_set_thresh, METH_VARARGS, gc_set_thresh__doc__},
1519 {"get_threshold", gc_get_thresh, METH_NOARGS, gc_get_thresh__doc__},
1520 {"collect", (PyCFunction)gc_collect,
1521 METH_VARARGS | METH_KEYWORDS, gc_collect__doc__},
1522 {"get_objects", gc_get_objects,METH_NOARGS, gc_get_objects__doc__},
1523 {"get_stats", gc_get_stats, METH_NOARGS, gc_get_stats__doc__},
1524 {"is_tracked", gc_is_tracked, METH_O, gc_is_tracked__doc__},
1525 {"get_referrers", gc_get_referrers, METH_VARARGS,
1526 gc_get_referrers__doc__},
1527 {"get_referents", gc_get_referents, METH_VARARGS,
1528 gc_get_referents__doc__},
1529 {NULL, NULL} /* Sentinel */
1530 };
1531
1532 static struct PyModuleDef gcmodule = {
1533 PyModuleDef_HEAD_INIT,
1534 "gc", /* m_name */
1535 gc__doc__, /* m_doc */
1536 -1, /* m_size */
1537 GcMethods, /* m_methods */
1538 NULL, /* m_reload */
1539 NULL, /* m_traverse */
1540 NULL, /* m_clear */
1541 NULL /* m_free */
1542 };
1543
1544 PyMODINIT_FUNC
PyInit_gc(void)1545 PyInit_gc(void)
1546 {
1547 PyObject *m;
1548
1549 m = PyModule_Create(&gcmodule);
1550
1551 if (m == NULL)
1552 return NULL;
1553
1554 if (garbage == NULL) {
1555 garbage = PyList_New(0);
1556 if (garbage == NULL)
1557 return NULL;
1558 }
1559 Py_INCREF(garbage);
1560 if (PyModule_AddObject(m, "garbage", garbage) < 0)
1561 return NULL;
1562
1563 if (callbacks == NULL) {
1564 callbacks = PyList_New(0);
1565 if (callbacks == NULL)
1566 return NULL;
1567 }
1568 Py_INCREF(callbacks);
1569 if (PyModule_AddObject(m, "callbacks", callbacks) < 0)
1570 return NULL;
1571
1572 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL
1573 ADD_INT(DEBUG_STATS);
1574 ADD_INT(DEBUG_COLLECTABLE);
1575 ADD_INT(DEBUG_UNCOLLECTABLE);
1576 ADD_INT(DEBUG_SAVEALL);
1577 ADD_INT(DEBUG_LEAK);
1578 #undef ADD_INT
1579 return m;
1580 }
1581
1582 /* API to invoke gc.collect() from C */
1583 Py_ssize_t
PyGC_Collect(void)1584 PyGC_Collect(void)
1585 {
1586 Py_ssize_t n;
1587
1588 if (collecting)
1589 n = 0; /* already collecting, don't do anything */
1590 else {
1591 collecting = 1;
1592 n = collect_with_callback(NUM_GENERATIONS - 1);
1593 collecting = 0;
1594 }
1595
1596 return n;
1597 }
1598
1599 Py_ssize_t
_PyGC_CollectIfEnabled(void)1600 _PyGC_CollectIfEnabled(void)
1601 {
1602 if (!enabled)
1603 return 0;
1604
1605 return PyGC_Collect();
1606 }
1607
1608 Py_ssize_t
_PyGC_CollectNoFail(void)1609 _PyGC_CollectNoFail(void)
1610 {
1611 Py_ssize_t n;
1612
1613 /* Ideally, this function is only called on interpreter shutdown,
1614 and therefore not recursively. Unfortunately, when there are daemon
1615 threads, a daemon thread can start a cyclic garbage collection
1616 during interpreter shutdown (and then never finish it).
1617 See http://bugs.python.org/issue8713#msg195178 for an example.
1618 */
1619 if (collecting)
1620 n = 0;
1621 else {
1622 collecting = 1;
1623 n = collect(NUM_GENERATIONS - 1, NULL, NULL, 1);
1624 collecting = 0;
1625 }
1626 return n;
1627 }
1628
1629 void
_PyGC_DumpShutdownStats(void)1630 _PyGC_DumpShutdownStats(void)
1631 {
1632 if (!(debug & DEBUG_SAVEALL)
1633 && garbage != NULL && PyList_GET_SIZE(garbage) > 0) {
1634 char *message;
1635 if (debug & DEBUG_UNCOLLECTABLE)
1636 message = "gc: %zd uncollectable objects at " \
1637 "shutdown";
1638 else
1639 message = "gc: %zd uncollectable objects at " \
1640 "shutdown; use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
1641 /* PyErr_WarnFormat does too many things and we are at shutdown,
1642 the warnings module's dependencies (e.g. linecache) may be gone
1643 already. */
1644 if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
1645 "gc", NULL, message,
1646 PyList_GET_SIZE(garbage)))
1647 PyErr_WriteUnraisable(NULL);
1648 if (debug & DEBUG_UNCOLLECTABLE) {
1649 PyObject *repr = NULL, *bytes = NULL;
1650 repr = PyObject_Repr(garbage);
1651 if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
1652 PyErr_WriteUnraisable(garbage);
1653 else {
1654 PySys_WriteStderr(
1655 " %s\n",
1656 PyBytes_AS_STRING(bytes)
1657 );
1658 }
1659 Py_XDECREF(repr);
1660 Py_XDECREF(bytes);
1661 }
1662 }
1663 }
1664
1665 void
_PyGC_Fini(void)1666 _PyGC_Fini(void)
1667 {
1668 Py_CLEAR(callbacks);
1669 }
1670
1671 /* for debugging */
1672 void
_PyGC_Dump(PyGC_Head * g)1673 _PyGC_Dump(PyGC_Head *g)
1674 {
1675 _PyObject_Dump(FROM_GC(g));
1676 }
1677
1678 /* extension modules might be compiled with GC support so these
1679 functions must always be available */
1680
1681 #undef PyObject_GC_Track
1682 #undef PyObject_GC_UnTrack
1683 #undef PyObject_GC_Del
1684 #undef _PyObject_GC_Malloc
1685
1686 void
PyObject_GC_Track(void * op)1687 PyObject_GC_Track(void *op)
1688 {
1689 _PyObject_GC_TRACK(op);
1690 }
1691
1692 void
PyObject_GC_UnTrack(void * op)1693 PyObject_GC_UnTrack(void *op)
1694 {
1695 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to
1696 * call PyObject_GC_UnTrack twice on an object.
1697 */
1698 if (IS_TRACKED(op))
1699 _PyObject_GC_UNTRACK(op);
1700 }
1701
1702 static PyObject *
_PyObject_GC_Alloc(int use_calloc,size_t basicsize)1703 _PyObject_GC_Alloc(int use_calloc, size_t basicsize)
1704 {
1705 PyObject *op;
1706 PyGC_Head *g;
1707 size_t size;
1708 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head))
1709 return PyErr_NoMemory();
1710 size = sizeof(PyGC_Head) + basicsize;
1711 if (use_calloc)
1712 g = (PyGC_Head *)PyObject_Calloc(1, size);
1713 else
1714 g = (PyGC_Head *)PyObject_Malloc(size);
1715 if (g == NULL)
1716 return PyErr_NoMemory();
1717 g->gc.gc_refs = 0;
1718 _PyGCHead_SET_REFS(g, GC_UNTRACKED);
1719 generations[0].count++; /* number of allocated GC objects */
1720 if (generations[0].count > generations[0].threshold &&
1721 enabled &&
1722 generations[0].threshold &&
1723 !collecting &&
1724 !PyErr_Occurred()) {
1725 collecting = 1;
1726 collect_generations();
1727 collecting = 0;
1728 }
1729 op = FROM_GC(g);
1730 return op;
1731 }
1732
1733 PyObject *
_PyObject_GC_Malloc(size_t basicsize)1734 _PyObject_GC_Malloc(size_t basicsize)
1735 {
1736 return _PyObject_GC_Alloc(0, basicsize);
1737 }
1738
1739 PyObject *
_PyObject_GC_Calloc(size_t basicsize)1740 _PyObject_GC_Calloc(size_t basicsize)
1741 {
1742 return _PyObject_GC_Alloc(1, basicsize);
1743 }
1744
1745 PyObject *
_PyObject_GC_New(PyTypeObject * tp)1746 _PyObject_GC_New(PyTypeObject *tp)
1747 {
1748 PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp));
1749 if (op != NULL)
1750 op = PyObject_INIT(op, tp);
1751 return op;
1752 }
1753
1754 PyVarObject *
_PyObject_GC_NewVar(PyTypeObject * tp,Py_ssize_t nitems)1755 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
1756 {
1757 size_t size;
1758 PyVarObject *op;
1759
1760 if (nitems < 0) {
1761 PyErr_BadInternalCall();
1762 return NULL;
1763 }
1764 size = _PyObject_VAR_SIZE(tp, nitems);
1765 op = (PyVarObject *) _PyObject_GC_Malloc(size);
1766 if (op != NULL)
1767 op = PyObject_INIT_VAR(op, tp, nitems);
1768 return op;
1769 }
1770
1771 PyVarObject *
_PyObject_GC_Resize(PyVarObject * op,Py_ssize_t nitems)1772 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
1773 {
1774 const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
1775 PyGC_Head *g = AS_GC(op);
1776 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head))
1777 return (PyVarObject *)PyErr_NoMemory();
1778 g = (PyGC_Head *)PyObject_REALLOC(g, sizeof(PyGC_Head) + basicsize);
1779 if (g == NULL)
1780 return (PyVarObject *)PyErr_NoMemory();
1781 op = (PyVarObject *) FROM_GC(g);
1782 Py_SIZE(op) = nitems;
1783 return op;
1784 }
1785
1786 void
PyObject_GC_Del(void * op)1787 PyObject_GC_Del(void *op)
1788 {
1789 PyGC_Head *g = AS_GC(op);
1790 if (IS_TRACKED(op))
1791 gc_list_remove(g);
1792 if (generations[0].count > 0) {
1793 generations[0].count--;
1794 }
1795 PyObject_FREE(g);
1796 }
1797