1 /*
2
3 Reference Cycle Garbage Collection
4 ==================================
5
6 Neil Schemenauer <nas@arctrix.com>
7
8 Based on a post on the python-dev list. Ideas from Guido van Rossum,
9 Eric Tiedemann, and various others.
10
11 http://www.arctrix.com/nas/python/gc/
12 http://www.python.org/pipermail/python-dev/2000-March/003869.html
13 http://www.python.org/pipermail/python-dev/2000-March/004010.html
14 http://www.python.org/pipermail/python-dev/2000-March/004022.html
15
16 For a highlevel view of the collection process, read the collect
17 function.
18
19 */
20
21 #include "Python.h"
22 #include "frameobject.h" /* for PyFrame_ClearFreeList */
23
24 /* Get an object's GC head */
25 #define AS_GC(o) ((PyGC_Head *)(o)-1)
26
27 /* Get the object given the GC head */
28 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
29
30 /*** Global GC state ***/
31
32 struct gc_generation {
33 PyGC_Head head;
34 int threshold; /* collection threshold */
35 int count; /* count of allocations or collections of younger
36 generations */
37 };
38
39 #define NUM_GENERATIONS 3
40 #define GEN_HEAD(n) (&generations[n].head)
41
42 /* linked lists of container objects */
43 static struct gc_generation generations[NUM_GENERATIONS] = {
44 /* PyGC_Head, threshold, count */
45 {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0},
46 {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0},
47 {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0},
48 };
49
50 PyGC_Head *_PyGC_generation0 = GEN_HEAD(0);
51
52 static int enabled = 1; /* automatic collection enabled? */
53
54 /* true if we are currently running the collector */
55 static int collecting = 0;
56
57 /* list of uncollectable objects */
58 static PyObject *garbage = NULL;
59
60 /* Python string to use if unhandled exception occurs */
61 static PyObject *gc_str = NULL;
62
63 /* Python string used to look for __del__ attribute. */
64 static PyObject *delstr = NULL;
65
66 /* This is the number of objects who survived the last full collection. It
67 approximates the number of long lived objects tracked by the GC.
68
69 (by "full collection", we mean a collection of the oldest generation).
70 */
71 static Py_ssize_t long_lived_total = 0;
72
73 /* This is the number of objects who survived all "non-full" collections,
74 and are awaiting to undergo a full collection for the first time.
75
76 */
77 static Py_ssize_t long_lived_pending = 0;
78
79 /*
80 NOTE: about the counting of long-lived objects.
81
82 To limit the cost of garbage collection, there are two strategies;
83 - make each collection faster, e.g. by scanning fewer objects
84 - do less collections
85 This heuristic is about the latter strategy.
86
87 In addition to the various configurable thresholds, we only trigger a
88 full collection if the ratio
89 long_lived_pending / long_lived_total
90 is above a given value (hardwired to 25%).
91
92 The reason is that, while "non-full" collections (i.e., collections of
93 the young and middle generations) will always examine roughly the same
94 number of objects -- determined by the aforementioned thresholds --,
95 the cost of a full collection is proportional to the total number of
96 long-lived objects, which is virtually unbounded.
97
98 Indeed, it has been remarked that doing a full collection every
99 <constant number> of object creations entails a dramatic performance
100 degradation in workloads which consist in creating and storing lots of
101 long-lived objects (e.g. building a large list of GC-tracked objects would
102 show quadratic performance, instead of linear as expected: see issue #4074).
103
104 Using the above ratio, instead, yields amortized linear performance in
105 the total number of objects (the effect of which can be summarized
106 thusly: "each full garbage collection is more and more costly as the
107 number of objects grows, but we do fewer and fewer of them").
108
109 This heuristic was suggested by Martin von Löwis on python-dev in
110 June 2008. His original analysis and proposal can be found at:
111 http://mail.python.org/pipermail/python-dev/2008-June/080579.html
112 */
113
114 /*
115 NOTE: about untracking of mutable objects.
116
117 Certain types of container cannot participate in a reference cycle, and
118 so do not need to be tracked by the garbage collector. Untracking these
119 objects reduces the cost of garbage collections. However, determining
120 which objects may be untracked is not free, and the costs must be
121 weighed against the benefits for garbage collection.
122
123 There are two possible strategies for when to untrack a container:
124
125 i) When the container is created.
126 ii) When the container is examined by the garbage collector.
127
128 Tuples containing only immutable objects (integers, strings etc, and
129 recursively, tuples of immutable objects) do not need to be tracked.
130 The interpreter creates a large number of tuples, many of which will
131 not survive until garbage collection. It is therefore not worthwhile
132 to untrack eligible tuples at creation time.
133
134 Instead, all tuples except the empty tuple are tracked when created.
135 During garbage collection it is determined whether any surviving tuples
136 can be untracked. A tuple can be untracked if all of its contents are
137 already not tracked. Tuples are examined for untracking in all garbage
138 collection cycles. It may take more than one cycle to untrack a tuple.
139
140 Dictionaries containing only immutable objects also do not need to be
141 tracked. Dictionaries are untracked when created. If a tracked item is
142 inserted into a dictionary (either as a key or value), the dictionary
143 becomes tracked. During a full garbage collection (all generations),
144 the collector will untrack any dictionaries whose contents are not
145 tracked.
146
147 The module provides the python function is_tracked(obj), which returns
148 the CURRENT tracking status of the object. Subsequent garbage
149 collections may change the tracking status of the object.
150
151 Untracking of certain containers was introduced in issue #4688, and
152 the algorithm was refined in response to issue #14775.
153 */
154
155 /* set for debugging information */
156 #define DEBUG_STATS (1<<0) /* print collection statistics */
157 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
158 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */
159 #define DEBUG_INSTANCES (1<<3) /* print instances */
160 #define DEBUG_OBJECTS (1<<4) /* print other objects */
161 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */
162 #define DEBUG_LEAK DEBUG_COLLECTABLE | \
163 DEBUG_UNCOLLECTABLE | \
164 DEBUG_INSTANCES | \
165 DEBUG_OBJECTS | \
166 DEBUG_SAVEALL
167 static int debug;
168 static PyObject *tmod = NULL;
169
170 /*--------------------------------------------------------------------------
171 gc_refs values.
172
173 Between collections, every gc'ed object has one of two gc_refs values:
174
175 GC_UNTRACKED
176 The initial state; objects returned by PyObject_GC_Malloc are in this
177 state. The object doesn't live in any generation list, and its
178 tp_traverse slot must not be called.
179
180 GC_REACHABLE
181 The object lives in some generation list, and its tp_traverse is safe to
182 call. An object transitions to GC_REACHABLE when PyObject_GC_Track
183 is called.
184
185 During a collection, gc_refs can temporarily take on other states:
186
187 >= 0
188 At the start of a collection, update_refs() copies the true refcount
189 to gc_refs, for each object in the generation being collected.
190 subtract_refs() then adjusts gc_refs so that it equals the number of
191 times an object is referenced directly from outside the generation
192 being collected.
193 gc_refs remains >= 0 throughout these steps.
194
195 GC_TENTATIVELY_UNREACHABLE
196 move_unreachable() then moves objects not reachable (whether directly or
197 indirectly) from outside the generation into an "unreachable" set.
198 Objects that are found to be reachable have gc_refs set to GC_REACHABLE
199 again. Objects that are found to be unreachable have gc_refs set to
200 GC_TENTATIVELY_UNREACHABLE. It's "tentatively" because the pass doing
201 this can't be sure until it ends, and GC_TENTATIVELY_UNREACHABLE may
202 transition back to GC_REACHABLE.
203
204 Only objects with GC_TENTATIVELY_UNREACHABLE still set are candidates
205 for collection. If it's decided not to collect such an object (e.g.,
206 it has a __del__ method), its gc_refs is restored to GC_REACHABLE again.
207 ----------------------------------------------------------------------------
208 */
209 #define GC_UNTRACKED _PyGC_REFS_UNTRACKED
210 #define GC_REACHABLE _PyGC_REFS_REACHABLE
211 #define GC_TENTATIVELY_UNREACHABLE _PyGC_REFS_TENTATIVELY_UNREACHABLE
212
213 #define IS_TRACKED(o) ((AS_GC(o))->gc.gc_refs != GC_UNTRACKED)
214 #define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE)
215 #define IS_TENTATIVELY_UNREACHABLE(o) ( \
216 (AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE)
217
218 /*** list functions ***/
219
220 static void
gc_list_init(PyGC_Head * list)221 gc_list_init(PyGC_Head *list)
222 {
223 list->gc.gc_prev = list;
224 list->gc.gc_next = list;
225 }
226
227 static int
gc_list_is_empty(PyGC_Head * list)228 gc_list_is_empty(PyGC_Head *list)
229 {
230 return (list->gc.gc_next == list);
231 }
232
233 #if 0
234 /* This became unused after gc_list_move() was introduced. */
235 /* Append `node` to `list`. */
236 static void
237 gc_list_append(PyGC_Head *node, PyGC_Head *list)
238 {
239 node->gc.gc_next = list;
240 node->gc.gc_prev = list->gc.gc_prev;
241 node->gc.gc_prev->gc.gc_next = node;
242 list->gc.gc_prev = node;
243 }
244 #endif
245
246 /* Remove `node` from the gc list it's currently in. */
247 static void
gc_list_remove(PyGC_Head * node)248 gc_list_remove(PyGC_Head *node)
249 {
250 node->gc.gc_prev->gc.gc_next = node->gc.gc_next;
251 node->gc.gc_next->gc.gc_prev = node->gc.gc_prev;
252 node->gc.gc_next = NULL; /* object is not currently tracked */
253 }
254
255 /* Move `node` from the gc list it's currently in (which is not explicitly
256 * named here) to the end of `list`. This is semantically the same as
257 * gc_list_remove(node) followed by gc_list_append(node, list).
258 */
259 static void
gc_list_move(PyGC_Head * node,PyGC_Head * list)260 gc_list_move(PyGC_Head *node, PyGC_Head *list)
261 {
262 PyGC_Head *new_prev;
263 PyGC_Head *current_prev = node->gc.gc_prev;
264 PyGC_Head *current_next = node->gc.gc_next;
265 /* Unlink from current list. */
266 current_prev->gc.gc_next = current_next;
267 current_next->gc.gc_prev = current_prev;
268 /* Relink at end of new list. */
269 new_prev = node->gc.gc_prev = list->gc.gc_prev;
270 new_prev->gc.gc_next = list->gc.gc_prev = node;
271 node->gc.gc_next = list;
272 }
273
274 /* append list `from` onto list `to`; `from` becomes an empty list */
275 static void
gc_list_merge(PyGC_Head * from,PyGC_Head * to)276 gc_list_merge(PyGC_Head *from, PyGC_Head *to)
277 {
278 PyGC_Head *tail;
279 assert(from != to);
280 if (!gc_list_is_empty(from)) {
281 tail = to->gc.gc_prev;
282 tail->gc.gc_next = from->gc.gc_next;
283 tail->gc.gc_next->gc.gc_prev = tail;
284 to->gc.gc_prev = from->gc.gc_prev;
285 to->gc.gc_prev->gc.gc_next = to;
286 }
287 gc_list_init(from);
288 }
289
290 static Py_ssize_t
gc_list_size(PyGC_Head * list)291 gc_list_size(PyGC_Head *list)
292 {
293 PyGC_Head *gc;
294 Py_ssize_t n = 0;
295 for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) {
296 n++;
297 }
298 return n;
299 }
300
301 /* Append objects in a GC list to a Python list.
302 * Return 0 if all OK, < 0 if error (out of memory for list).
303 */
304 static int
append_objects(PyObject * py_list,PyGC_Head * gc_list)305 append_objects(PyObject *py_list, PyGC_Head *gc_list)
306 {
307 PyGC_Head *gc;
308 for (gc = gc_list->gc.gc_next; gc != gc_list; gc = gc->gc.gc_next) {
309 PyObject *op = FROM_GC(gc);
310 if (op != py_list) {
311 if (PyList_Append(py_list, op)) {
312 return -1; /* exception */
313 }
314 }
315 }
316 return 0;
317 }
318
319 /*** end of list stuff ***/
320
321
322 /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects
323 * in containers, and is GC_REACHABLE for all tracked gc objects not in
324 * containers.
325 */
326 static void
update_refs(PyGC_Head * containers)327 update_refs(PyGC_Head *containers)
328 {
329 PyGC_Head *gc = containers->gc.gc_next;
330 for (; gc != containers; gc = gc->gc.gc_next) {
331 assert(gc->gc.gc_refs == GC_REACHABLE);
332 gc->gc.gc_refs = Py_REFCNT(FROM_GC(gc));
333 /* Python's cyclic gc should never see an incoming refcount
334 * of 0: if something decref'ed to 0, it should have been
335 * deallocated immediately at that time.
336 * Possible cause (if the assert triggers): a tp_dealloc
337 * routine left a gc-aware object tracked during its teardown
338 * phase, and did something-- or allowed something to happen --
339 * that called back into Python. gc can trigger then, and may
340 * see the still-tracked dying object. Before this assert
341 * was added, such mistakes went on to allow gc to try to
342 * delete the object again. In a debug build, that caused
343 * a mysterious segfault, when _Py_ForgetReference tried
344 * to remove the object from the doubly-linked list of all
345 * objects a second time. In a release build, an actual
346 * double deallocation occurred, which leads to corruption
347 * of the allocator's internal bookkeeping pointers. That's
348 * so serious that maybe this should be a release-build
349 * check instead of an assert?
350 */
351 assert(gc->gc.gc_refs != 0);
352 }
353 }
354
355 /* A traversal callback for subtract_refs. */
356 static int
visit_decref(PyObject * op,void * data)357 visit_decref(PyObject *op, void *data)
358 {
359 assert(op != NULL);
360 if (PyObject_IS_GC(op)) {
361 PyGC_Head *gc = AS_GC(op);
362 /* We're only interested in gc_refs for objects in the
363 * generation being collected, which can be recognized
364 * because only they have positive gc_refs.
365 */
366 assert(gc->gc.gc_refs != 0); /* else refcount was too small */
367 if (gc->gc.gc_refs > 0)
368 gc->gc.gc_refs--;
369 }
370 return 0;
371 }
372
373 /* Subtract internal references from gc_refs. After this, gc_refs is >= 0
374 * for all objects in containers, and is GC_REACHABLE for all tracked gc
375 * objects not in containers. The ones with gc_refs > 0 are directly
376 * reachable from outside containers, and so can't be collected.
377 */
378 static void
subtract_refs(PyGC_Head * containers)379 subtract_refs(PyGC_Head *containers)
380 {
381 traverseproc traverse;
382 PyGC_Head *gc = containers->gc.gc_next;
383 for (; gc != containers; gc=gc->gc.gc_next) {
384 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
385 (void) traverse(FROM_GC(gc),
386 (visitproc)visit_decref,
387 NULL);
388 }
389 }
390
391 /* A traversal callback for move_unreachable. */
392 static int
visit_reachable(PyObject * op,PyGC_Head * reachable)393 visit_reachable(PyObject *op, PyGC_Head *reachable)
394 {
395 if (PyObject_IS_GC(op)) {
396 PyGC_Head *gc = AS_GC(op);
397 const Py_ssize_t gc_refs = gc->gc.gc_refs;
398
399 if (gc_refs == 0) {
400 /* This is in move_unreachable's 'young' list, but
401 * the traversal hasn't yet gotten to it. All
402 * we need to do is tell move_unreachable that it's
403 * reachable.
404 */
405 gc->gc.gc_refs = 1;
406 }
407 else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) {
408 /* This had gc_refs = 0 when move_unreachable got
409 * to it, but turns out it's reachable after all.
410 * Move it back to move_unreachable's 'young' list,
411 * and move_unreachable will eventually get to it
412 * again.
413 */
414 gc_list_move(gc, reachable);
415 gc->gc.gc_refs = 1;
416 }
417 /* Else there's nothing to do.
418 * If gc_refs > 0, it must be in move_unreachable's 'young'
419 * list, and move_unreachable will eventually get to it.
420 * If gc_refs == GC_REACHABLE, it's either in some other
421 * generation so we don't care about it, or move_unreachable
422 * already dealt with it.
423 * If gc_refs == GC_UNTRACKED, it must be ignored.
424 */
425 else {
426 assert(gc_refs > 0
427 || gc_refs == GC_REACHABLE
428 || gc_refs == GC_UNTRACKED);
429 }
430 }
431 return 0;
432 }
433
434 /* Move the unreachable objects from young to unreachable. After this,
435 * all objects in young have gc_refs = GC_REACHABLE, and all objects in
436 * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked
437 * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
438 * All objects in young after this are directly or indirectly reachable
439 * from outside the original young; and all objects in unreachable are
440 * not.
441 */
442 static void
move_unreachable(PyGC_Head * young,PyGC_Head * unreachable)443 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
444 {
445 PyGC_Head *gc = young->gc.gc_next;
446
447 /* Invariants: all objects "to the left" of us in young have gc_refs
448 * = GC_REACHABLE, and are indeed reachable (directly or indirectly)
449 * from outside the young list as it was at entry. All other objects
450 * from the original young "to the left" of us are in unreachable now,
451 * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the
452 * left of us in 'young' now have been scanned, and no objects here
453 * or to the right have been scanned yet.
454 */
455
456 while (gc != young) {
457 PyGC_Head *next;
458
459 if (gc->gc.gc_refs) {
460 /* gc is definitely reachable from outside the
461 * original 'young'. Mark it as such, and traverse
462 * its pointers to find any other objects that may
463 * be directly reachable from it. Note that the
464 * call to tp_traverse may append objects to young,
465 * so we have to wait until it returns to determine
466 * the next object to visit.
467 */
468 PyObject *op = FROM_GC(gc);
469 traverseproc traverse = Py_TYPE(op)->tp_traverse;
470 assert(gc->gc.gc_refs > 0);
471 gc->gc.gc_refs = GC_REACHABLE;
472 (void) traverse(op,
473 (visitproc)visit_reachable,
474 (void *)young);
475 next = gc->gc.gc_next;
476 if (PyTuple_CheckExact(op)) {
477 _PyTuple_MaybeUntrack(op);
478 }
479 }
480 else {
481 /* This *may* be unreachable. To make progress,
482 * assume it is. gc isn't directly reachable from
483 * any object we've already traversed, but may be
484 * reachable from an object we haven't gotten to yet.
485 * visit_reachable will eventually move gc back into
486 * young if that's so, and we'll see it again.
487 */
488 next = gc->gc.gc_next;
489 gc_list_move(gc, unreachable);
490 gc->gc.gc_refs = GC_TENTATIVELY_UNREACHABLE;
491 }
492 gc = next;
493 }
494 }
495
496 /* Return true if object has a finalization method.
497 * CAUTION: An instance of an old-style class has to be checked for a
498 *__del__ method, and earlier versions of this used to call PyObject_HasAttr,
499 * which in turn could call the class's __getattr__ hook (if any). That
500 * could invoke arbitrary Python code, mutating the object graph in arbitrary
501 * ways, and that was the source of some excruciatingly subtle bugs.
502 */
503 static int
has_finalizer(PyObject * op)504 has_finalizer(PyObject *op)
505 {
506 if (PyInstance_Check(op)) {
507 assert(delstr != NULL);
508 return _PyInstance_Lookup(op, delstr) != NULL;
509 }
510 else if (PyType_HasFeature(op->ob_type, Py_TPFLAGS_HEAPTYPE))
511 return op->ob_type->tp_del != NULL;
512 else if (PyGen_CheckExact(op))
513 return PyGen_NeedsFinalizing((PyGenObject *)op);
514 else
515 return 0;
516 }
517
518 /* Try to untrack all currently tracked dictionaries */
519 static void
untrack_dicts(PyGC_Head * head)520 untrack_dicts(PyGC_Head *head)
521 {
522 PyGC_Head *next, *gc = head->gc.gc_next;
523 while (gc != head) {
524 PyObject *op = FROM_GC(gc);
525 next = gc->gc.gc_next;
526 if (PyDict_CheckExact(op))
527 _PyDict_MaybeUntrack(op);
528 gc = next;
529 }
530 }
531
532 /* Move the objects in unreachable with __del__ methods into `finalizers`.
533 * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the
534 * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE.
535 */
536 static void
move_finalizers(PyGC_Head * unreachable,PyGC_Head * finalizers)537 move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
538 {
539 PyGC_Head *gc;
540 PyGC_Head *next;
541
542 /* March over unreachable. Move objects with finalizers into
543 * `finalizers`.
544 */
545 for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
546 PyObject *op = FROM_GC(gc);
547
548 assert(IS_TENTATIVELY_UNREACHABLE(op));
549 next = gc->gc.gc_next;
550
551 if (has_finalizer(op)) {
552 gc_list_move(gc, finalizers);
553 gc->gc.gc_refs = GC_REACHABLE;
554 }
555 }
556 }
557
558 /* A traversal callback for move_finalizer_reachable. */
559 static int
visit_move(PyObject * op,PyGC_Head * tolist)560 visit_move(PyObject *op, PyGC_Head *tolist)
561 {
562 if (PyObject_IS_GC(op)) {
563 if (IS_TENTATIVELY_UNREACHABLE(op)) {
564 PyGC_Head *gc = AS_GC(op);
565 gc_list_move(gc, tolist);
566 gc->gc.gc_refs = GC_REACHABLE;
567 }
568 }
569 return 0;
570 }
571
572 /* Move objects that are reachable from finalizers, from the unreachable set
573 * into finalizers set.
574 */
575 static void
move_finalizer_reachable(PyGC_Head * finalizers)576 move_finalizer_reachable(PyGC_Head *finalizers)
577 {
578 traverseproc traverse;
579 PyGC_Head *gc = finalizers->gc.gc_next;
580 for (; gc != finalizers; gc = gc->gc.gc_next) {
581 /* Note that the finalizers list may grow during this. */
582 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
583 (void) traverse(FROM_GC(gc),
584 (visitproc)visit_move,
585 (void *)finalizers);
586 }
587 }
588
589 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
590 * callback, invoke it if necessary. Note that it's possible for such
591 * weakrefs to be outside the unreachable set -- indeed, those are precisely
592 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for
593 * overview & some details. Some weakrefs with callbacks may be reclaimed
594 * directly by this routine; the number reclaimed is the return value. Other
595 * weakrefs with callbacks may be moved into the `old` generation. Objects
596 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
597 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns,
598 * no object in `unreachable` is weakly referenced anymore.
599 */
600 static int
handle_weakrefs(PyGC_Head * unreachable,PyGC_Head * old)601 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
602 {
603 PyGC_Head *gc;
604 PyObject *op; /* generally FROM_GC(gc) */
605 PyWeakReference *wr; /* generally a cast of op */
606 PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */
607 PyGC_Head *next;
608 int num_freed = 0;
609
610 gc_list_init(&wrcb_to_call);
611
612 /* Clear all weakrefs to the objects in unreachable. If such a weakref
613 * also has a callback, move it into `wrcb_to_call` if the callback
614 * needs to be invoked. Note that we cannot invoke any callbacks until
615 * all weakrefs to unreachable objects are cleared, lest the callback
616 * resurrect an unreachable object via a still-active weakref. We
617 * make another pass over wrcb_to_call, invoking callbacks, after this
618 * pass completes.
619 */
620 for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
621 PyWeakReference **wrlist;
622
623 op = FROM_GC(gc);
624 assert(IS_TENTATIVELY_UNREACHABLE(op));
625 next = gc->gc.gc_next;
626
627 if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
628 continue;
629
630 /* It supports weakrefs. Does it have any? */
631 wrlist = (PyWeakReference **)
632 PyObject_GET_WEAKREFS_LISTPTR(op);
633
634 /* `op` may have some weakrefs. March over the list, clear
635 * all the weakrefs, and move the weakrefs with callbacks
636 * that must be called into wrcb_to_call.
637 */
638 for (wr = *wrlist; wr != NULL; wr = *wrlist) {
639 PyGC_Head *wrasgc; /* AS_GC(wr) */
640
641 /* _PyWeakref_ClearRef clears the weakref but leaves
642 * the callback pointer intact. Obscure: it also
643 * changes *wrlist.
644 */
645 assert(wr->wr_object == op);
646 _PyWeakref_ClearRef(wr);
647 assert(wr->wr_object == Py_None);
648 if (wr->wr_callback == NULL)
649 continue; /* no callback */
650
651 /* Headache time. `op` is going away, and is weakly referenced by
652 * `wr`, which has a callback. Should the callback be invoked? If wr
653 * is also trash, no:
654 *
655 * 1. There's no need to call it. The object and the weakref are
656 * both going away, so it's legitimate to pretend the weakref is
657 * going away first. The user has to ensure a weakref outlives its
658 * referent if they want a guarantee that the wr callback will get
659 * invoked.
660 *
661 * 2. It may be catastrophic to call it. If the callback is also in
662 * cyclic trash (CT), then although the CT is unreachable from
663 * outside the current generation, CT may be reachable from the
664 * callback. Then the callback could resurrect insane objects.
665 *
666 * Since the callback is never needed and may be unsafe in this case,
667 * wr is simply left in the unreachable set. Note that because we
668 * already called _PyWeakref_ClearRef(wr), its callback will never
669 * trigger.
670 *
671 * OTOH, if wr isn't part of CT, we should invoke the callback: the
672 * weakref outlived the trash. Note that since wr isn't CT in this
673 * case, its callback can't be CT either -- wr acted as an external
674 * root to this generation, and therefore its callback did too. So
675 * nothing in CT is reachable from the callback either, so it's hard
676 * to imagine how calling it later could create a problem for us. wr
677 * is moved to wrcb_to_call in this case.
678 */
679 if (IS_TENTATIVELY_UNREACHABLE(wr))
680 continue;
681 assert(IS_REACHABLE(wr));
682
683 /* Create a new reference so that wr can't go away
684 * before we can process it again.
685 */
686 Py_INCREF(wr);
687
688 /* Move wr to wrcb_to_call, for the next pass. */
689 wrasgc = AS_GC(wr);
690 assert(wrasgc != next); /* wrasgc is reachable, but
691 next isn't, so they can't
692 be the same */
693 gc_list_move(wrasgc, &wrcb_to_call);
694 }
695 }
696
697 /* Invoke the callbacks we decided to honor. It's safe to invoke them
698 * because they can't reference unreachable objects.
699 */
700 while (! gc_list_is_empty(&wrcb_to_call)) {
701 PyObject *temp;
702 PyObject *callback;
703
704 gc = wrcb_to_call.gc.gc_next;
705 op = FROM_GC(gc);
706 assert(IS_REACHABLE(op));
707 assert(PyWeakref_Check(op));
708 wr = (PyWeakReference *)op;
709 callback = wr->wr_callback;
710 assert(callback != NULL);
711
712 /* copy-paste of weakrefobject.c's handle_callback() */
713 temp = PyObject_CallFunctionObjArgs(callback, wr, NULL);
714 if (temp == NULL)
715 PyErr_WriteUnraisable(callback);
716 else
717 Py_DECREF(temp);
718
719 /* Give up the reference we created in the first pass. When
720 * op's refcount hits 0 (which it may or may not do right now),
721 * op's tp_dealloc will decref op->wr_callback too. Note
722 * that the refcount probably will hit 0 now, and because this
723 * weakref was reachable to begin with, gc didn't already
724 * add it to its count of freed objects. Example: a reachable
725 * weak value dict maps some key to this reachable weakref.
726 * The callback removes this key->weakref mapping from the
727 * dict, leaving no other references to the weakref (excepting
728 * ours).
729 */
730 Py_DECREF(op);
731 if (wrcb_to_call.gc.gc_next == gc) {
732 /* object is still alive -- move it */
733 gc_list_move(gc, old);
734 }
735 else
736 ++num_freed;
737 }
738
739 return num_freed;
740 }
741
742 static void
debug_instance(char * msg,PyInstanceObject * inst)743 debug_instance(char *msg, PyInstanceObject *inst)
744 {
745 char *cname;
746 /* simple version of instance_repr */
747 PyObject *classname = inst->in_class->cl_name;
748 if (classname != NULL && PyString_Check(classname))
749 cname = PyString_AsString(classname);
750 else
751 cname = "?";
752 PySys_WriteStderr("gc: %.100s <%.100s instance at %p>\n",
753 msg, cname, inst);
754 }
755
756 static void
debug_cycle(char * msg,PyObject * op)757 debug_cycle(char *msg, PyObject *op)
758 {
759 if ((debug & DEBUG_INSTANCES) && PyInstance_Check(op)) {
760 debug_instance(msg, (PyInstanceObject *)op);
761 }
762 else if (debug & DEBUG_OBJECTS) {
763 PySys_WriteStderr("gc: %.100s <%.100s %p>\n",
764 msg, Py_TYPE(op)->tp_name, op);
765 }
766 }
767
768 /* Handle uncollectable garbage (cycles with finalizers, and stuff reachable
769 * only from such cycles).
770 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
771 * garbage list (a Python list), else only the objects in finalizers with
772 * __del__ methods are appended to garbage. All objects in finalizers are
773 * merged into the old list regardless.
774 */
775 static void
handle_finalizers(PyGC_Head * finalizers,PyGC_Head * old)776 handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
777 {
778 PyGC_Head *gc = finalizers->gc.gc_next;
779
780 if (garbage == NULL) {
781 garbage = PyList_New(0);
782 if (garbage == NULL)
783 Py_FatalError("gc couldn't create gc.garbage list");
784 }
785 for (; gc != finalizers; gc = gc->gc.gc_next) {
786 PyObject *op = FROM_GC(gc);
787
788 if ((debug & DEBUG_SAVEALL) || has_finalizer(op)) {
789 if (PyList_Append(garbage, op) < 0)
790 break;
791 }
792 }
793
794 gc_list_merge(finalizers, old);
795 }
796
797 /* Break reference cycles by clearing the containers involved. This is
798 * tricky business as the lists can be changing and we don't know which
799 * objects may be freed. It is possible I screwed something up here.
800 */
801 static void
delete_garbage(PyGC_Head * collectable,PyGC_Head * old)802 delete_garbage(PyGC_Head *collectable, PyGC_Head *old)
803 {
804 inquiry clear;
805
806 while (!gc_list_is_empty(collectable)) {
807 PyGC_Head *gc = collectable->gc.gc_next;
808 PyObject *op = FROM_GC(gc);
809
810 assert(IS_TENTATIVELY_UNREACHABLE(op));
811 if (debug & DEBUG_SAVEALL) {
812 PyList_Append(garbage, op);
813 }
814 else {
815 if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
816 Py_INCREF(op);
817 clear(op);
818 Py_DECREF(op);
819 }
820 }
821 if (collectable->gc.gc_next == gc) {
822 /* object is still alive, move it, it may die later */
823 gc_list_move(gc, old);
824 gc->gc.gc_refs = GC_REACHABLE;
825 }
826 }
827 }
828
829 /* Clear all free lists
830 * All free lists are cleared during the collection of the highest generation.
831 * Allocated items in the free list may keep a pymalloc arena occupied.
832 * Clearing the free lists may give back memory to the OS earlier.
833 */
834 static void
clear_freelists(void)835 clear_freelists(void)
836 {
837 (void)PyMethod_ClearFreeList();
838 (void)PyFrame_ClearFreeList();
839 (void)PyCFunction_ClearFreeList();
840 (void)PyTuple_ClearFreeList();
841 #ifdef Py_USING_UNICODE
842 (void)PyUnicode_ClearFreeList();
843 #endif
844 (void)PyInt_ClearFreeList();
845 (void)PyFloat_ClearFreeList();
846 }
847
848 static double
get_time(void)849 get_time(void)
850 {
851 double result = 0;
852 if (tmod != NULL) {
853 PyObject *f = PyObject_CallMethod(tmod, "time", NULL);
854 if (f == NULL) {
855 PyErr_Clear();
856 }
857 else {
858 if (PyFloat_Check(f))
859 result = PyFloat_AsDouble(f);
860 Py_DECREF(f);
861 }
862 }
863 return result;
864 }
865
866 /* This is the main function. Read this to understand how the
867 * collection process works. */
868 static Py_ssize_t
collect(int generation)869 collect(int generation)
870 {
871 int i;
872 Py_ssize_t m = 0; /* # objects collected */
873 Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
874 PyGC_Head *young; /* the generation we are examining */
875 PyGC_Head *old; /* next older generation */
876 PyGC_Head unreachable; /* non-problematic unreachable trash */
877 PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
878 PyGC_Head *gc;
879 double t1 = 0.0;
880
881 if (delstr == NULL) {
882 delstr = PyString_InternFromString("__del__");
883 if (delstr == NULL)
884 Py_FatalError("gc couldn't allocate \"__del__\"");
885 }
886
887 if (debug & DEBUG_STATS) {
888 PySys_WriteStderr("gc: collecting generation %d...\n",
889 generation);
890 PySys_WriteStderr("gc: objects in each generation:");
891 for (i = 0; i < NUM_GENERATIONS; i++)
892 PySys_WriteStderr(" %" PY_FORMAT_SIZE_T "d",
893 gc_list_size(GEN_HEAD(i)));
894 t1 = get_time();
895 PySys_WriteStderr("\n");
896 }
897
898 /* update collection and allocation counters */
899 if (generation+1 < NUM_GENERATIONS)
900 generations[generation+1].count += 1;
901 for (i = 0; i <= generation; i++)
902 generations[i].count = 0;
903
904 /* merge younger generations with one we are currently collecting */
905 for (i = 0; i < generation; i++) {
906 gc_list_merge(GEN_HEAD(i), GEN_HEAD(generation));
907 }
908
909 /* handy references */
910 young = GEN_HEAD(generation);
911 if (generation < NUM_GENERATIONS-1)
912 old = GEN_HEAD(generation+1);
913 else
914 old = young;
915
916 /* Using ob_refcnt and gc_refs, calculate which objects in the
917 * container set are reachable from outside the set (i.e., have a
918 * refcount greater than 0 when all the references within the
919 * set are taken into account).
920 */
921 update_refs(young);
922 subtract_refs(young);
923
924 /* Leave everything reachable from outside young in young, and move
925 * everything else (in young) to unreachable.
926 * NOTE: This used to move the reachable objects into a reachable
927 * set instead. But most things usually turn out to be reachable,
928 * so it's more efficient to move the unreachable things.
929 */
930 gc_list_init(&unreachable);
931 move_unreachable(young, &unreachable);
932
933 /* Move reachable objects to next generation. */
934 if (young != old) {
935 if (generation == NUM_GENERATIONS - 2) {
936 long_lived_pending += gc_list_size(young);
937 }
938 gc_list_merge(young, old);
939 }
940 else {
941 /* We only untrack dicts in full collections, to avoid quadratic
942 dict build-up. See issue #14775. */
943 untrack_dicts(young);
944 long_lived_pending = 0;
945 long_lived_total = gc_list_size(young);
946 }
947
948 /* All objects in unreachable are trash, but objects reachable from
949 * finalizers can't safely be deleted. Python programmers should take
950 * care not to create such things. For Python, finalizers means
951 * instance objects with __del__ methods. Weakrefs with callbacks
952 * can also call arbitrary Python code but they will be dealt with by
953 * handle_weakrefs().
954 */
955 gc_list_init(&finalizers);
956 move_finalizers(&unreachable, &finalizers);
957 /* finalizers contains the unreachable objects with a finalizer;
958 * unreachable objects reachable *from* those are also uncollectable,
959 * and we move those into the finalizers list too.
960 */
961 move_finalizer_reachable(&finalizers);
962
963 /* Collect statistics on collectable objects found and print
964 * debugging information.
965 */
966 for (gc = unreachable.gc.gc_next; gc != &unreachable;
967 gc = gc->gc.gc_next) {
968 m++;
969 if (debug & DEBUG_COLLECTABLE) {
970 debug_cycle("collectable", FROM_GC(gc));
971 }
972 }
973
974 /* Clear weakrefs and invoke callbacks as necessary. */
975 m += handle_weakrefs(&unreachable, old);
976
977 /* Call tp_clear on objects in the unreachable set. This will cause
978 * the reference cycles to be broken. It may also cause some objects
979 * in finalizers to be freed.
980 */
981 delete_garbage(&unreachable, old);
982
983 /* Collect statistics on uncollectable objects found and print
984 * debugging information. */
985 for (gc = finalizers.gc.gc_next;
986 gc != &finalizers;
987 gc = gc->gc.gc_next) {
988 n++;
989 if (debug & DEBUG_UNCOLLECTABLE)
990 debug_cycle("uncollectable", FROM_GC(gc));
991 }
992 if (debug & DEBUG_STATS) {
993 double t2 = get_time();
994 if (m == 0 && n == 0)
995 PySys_WriteStderr("gc: done");
996 else
997 PySys_WriteStderr(
998 "gc: done, "
999 "%" PY_FORMAT_SIZE_T "d unreachable, "
1000 "%" PY_FORMAT_SIZE_T "d uncollectable",
1001 n+m, n);
1002 if (t1 && t2) {
1003 PySys_WriteStderr(", %.4fs elapsed", t2-t1);
1004 }
1005 PySys_WriteStderr(".\n");
1006 }
1007
1008 /* Append instances in the uncollectable set to a Python
1009 * reachable list of garbage. The programmer has to deal with
1010 * this if they insist on creating this type of structure.
1011 */
1012 handle_finalizers(&finalizers, old);
1013
1014 /* Clear free list only during the collection of the highest
1015 * generation */
1016 if (generation == NUM_GENERATIONS-1) {
1017 clear_freelists();
1018 }
1019
1020 if (PyErr_Occurred()) {
1021 if (gc_str == NULL)
1022 gc_str = PyString_FromString("garbage collection");
1023 PyErr_WriteUnraisable(gc_str);
1024 Py_FatalError("unexpected exception during garbage collection");
1025 }
1026 return n+m;
1027 }
1028
1029 static Py_ssize_t
collect_generations(void)1030 collect_generations(void)
1031 {
1032 int i;
1033 Py_ssize_t n = 0;
1034
1035 /* Find the oldest generation (highest numbered) where the count
1036 * exceeds the threshold. Objects in the that generation and
1037 * generations younger than it will be collected. */
1038 for (i = NUM_GENERATIONS-1; i >= 0; i--) {
1039 if (generations[i].count > generations[i].threshold) {
1040 /* Avoid quadratic performance degradation in number
1041 of tracked objects. See comments at the beginning
1042 of this file, and issue #4074.
1043 */
1044 if (i == NUM_GENERATIONS - 1
1045 && long_lived_pending < long_lived_total / 4)
1046 continue;
1047 n = collect(i);
1048 break;
1049 }
1050 }
1051 return n;
1052 }
1053
1054 PyDoc_STRVAR(gc_enable__doc__,
1055 "enable() -> None\n"
1056 "\n"
1057 "Enable automatic garbage collection.\n");
1058
1059 static PyObject *
gc_enable(PyObject * self,PyObject * noargs)1060 gc_enable(PyObject *self, PyObject *noargs)
1061 {
1062 enabled = 1;
1063 Py_INCREF(Py_None);
1064 return Py_None;
1065 }
1066
1067 PyDoc_STRVAR(gc_disable__doc__,
1068 "disable() -> None\n"
1069 "\n"
1070 "Disable automatic garbage collection.\n");
1071
1072 static PyObject *
gc_disable(PyObject * self,PyObject * noargs)1073 gc_disable(PyObject *self, PyObject *noargs)
1074 {
1075 enabled = 0;
1076 Py_INCREF(Py_None);
1077 return Py_None;
1078 }
1079
1080 PyDoc_STRVAR(gc_isenabled__doc__,
1081 "isenabled() -> status\n"
1082 "\n"
1083 "Returns true if automatic garbage collection is enabled.\n");
1084
1085 static PyObject *
gc_isenabled(PyObject * self,PyObject * noargs)1086 gc_isenabled(PyObject *self, PyObject *noargs)
1087 {
1088 return PyBool_FromLong((long)enabled);
1089 }
1090
1091 PyDoc_STRVAR(gc_collect__doc__,
1092 "collect([generation]) -> n\n"
1093 "\n"
1094 "With no arguments, run a full collection. The optional argument\n"
1095 "may be an integer specifying which generation to collect. A ValueError\n"
1096 "is raised if the generation number is invalid.\n\n"
1097 "The number of unreachable objects is returned.\n");
1098
1099 static PyObject *
gc_collect(PyObject * self,PyObject * args,PyObject * kws)1100 gc_collect(PyObject *self, PyObject *args, PyObject *kws)
1101 {
1102 static char *keywords[] = {"generation", NULL};
1103 int genarg = NUM_GENERATIONS - 1;
1104 Py_ssize_t n;
1105
1106 if (!PyArg_ParseTupleAndKeywords(args, kws, "|i", keywords, &genarg))
1107 return NULL;
1108
1109 else if (genarg < 0 || genarg >= NUM_GENERATIONS) {
1110 PyErr_SetString(PyExc_ValueError, "invalid generation");
1111 return NULL;
1112 }
1113
1114 if (collecting)
1115 n = 0; /* already collecting, don't do anything */
1116 else {
1117 collecting = 1;
1118 n = collect(genarg);
1119 collecting = 0;
1120 }
1121
1122 return PyInt_FromSsize_t(n);
1123 }
1124
1125 PyDoc_STRVAR(gc_set_debug__doc__,
1126 "set_debug(flags) -> None\n"
1127 "\n"
1128 "Set the garbage collection debugging flags. Debugging information is\n"
1129 "written to sys.stderr.\n"
1130 "\n"
1131 "flags is an integer and can have the following bits turned on:\n"
1132 "\n"
1133 " DEBUG_STATS - Print statistics during collection.\n"
1134 " DEBUG_COLLECTABLE - Print collectable objects found.\n"
1135 " DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects found.\n"
1136 " DEBUG_INSTANCES - Print instance objects.\n"
1137 " DEBUG_OBJECTS - Print objects other than instances.\n"
1138 " DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.\n"
1139 " DEBUG_LEAK - Debug leaking programs (everything but STATS).\n");
1140
1141 static PyObject *
gc_set_debug(PyObject * self,PyObject * args)1142 gc_set_debug(PyObject *self, PyObject *args)
1143 {
1144 if (!PyArg_ParseTuple(args, "i:set_debug", &debug))
1145 return NULL;
1146
1147 Py_INCREF(Py_None);
1148 return Py_None;
1149 }
1150
1151 PyDoc_STRVAR(gc_get_debug__doc__,
1152 "get_debug() -> flags\n"
1153 "\n"
1154 "Get the garbage collection debugging flags.\n");
1155
1156 static PyObject *
gc_get_debug(PyObject * self,PyObject * noargs)1157 gc_get_debug(PyObject *self, PyObject *noargs)
1158 {
1159 return Py_BuildValue("i", debug);
1160 }
1161
1162 PyDoc_STRVAR(gc_set_thresh__doc__,
1163 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1164 "\n"
1165 "Sets the collection thresholds. Setting threshold0 to zero disables\n"
1166 "collection.\n");
1167
1168 static PyObject *
gc_set_thresh(PyObject * self,PyObject * args)1169 gc_set_thresh(PyObject *self, PyObject *args)
1170 {
1171 int i;
1172 if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
1173 &generations[0].threshold,
1174 &generations[1].threshold,
1175 &generations[2].threshold))
1176 return NULL;
1177 for (i = 2; i < NUM_GENERATIONS; i++) {
1178 /* generations higher than 2 get the same threshold */
1179 generations[i].threshold = generations[2].threshold;
1180 }
1181
1182 Py_INCREF(Py_None);
1183 return Py_None;
1184 }
1185
1186 PyDoc_STRVAR(gc_get_thresh__doc__,
1187 "get_threshold() -> (threshold0, threshold1, threshold2)\n"
1188 "\n"
1189 "Return the current collection thresholds\n");
1190
1191 static PyObject *
gc_get_thresh(PyObject * self,PyObject * noargs)1192 gc_get_thresh(PyObject *self, PyObject *noargs)
1193 {
1194 return Py_BuildValue("(iii)",
1195 generations[0].threshold,
1196 generations[1].threshold,
1197 generations[2].threshold);
1198 }
1199
1200 PyDoc_STRVAR(gc_get_count__doc__,
1201 "get_count() -> (count0, count1, count2)\n"
1202 "\n"
1203 "Return the current collection counts\n");
1204
1205 static PyObject *
gc_get_count(PyObject * self,PyObject * noargs)1206 gc_get_count(PyObject *self, PyObject *noargs)
1207 {
1208 return Py_BuildValue("(iii)",
1209 generations[0].count,
1210 generations[1].count,
1211 generations[2].count);
1212 }
1213
1214 static int
referrersvisit(PyObject * obj,PyObject * objs)1215 referrersvisit(PyObject* obj, PyObject *objs)
1216 {
1217 Py_ssize_t i;
1218 for (i = 0; i < PyTuple_GET_SIZE(objs); i++)
1219 if (PyTuple_GET_ITEM(objs, i) == obj)
1220 return 1;
1221 return 0;
1222 }
1223
1224 static int
gc_referrers_for(PyObject * objs,PyGC_Head * list,PyObject * resultlist)1225 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1226 {
1227 PyGC_Head *gc;
1228 PyObject *obj;
1229 traverseproc traverse;
1230 for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) {
1231 obj = FROM_GC(gc);
1232 traverse = Py_TYPE(obj)->tp_traverse;
1233 if (obj == objs || obj == resultlist)
1234 continue;
1235 if (traverse(obj, (visitproc)referrersvisit, objs)) {
1236 if (PyList_Append(resultlist, obj) < 0)
1237 return 0; /* error */
1238 }
1239 }
1240 return 1; /* no error */
1241 }
1242
1243 PyDoc_STRVAR(gc_get_referrers__doc__,
1244 "get_referrers(*objs) -> list\n\
1245 Return the list of objects that directly refer to any of objs.");
1246
1247 static PyObject *
gc_get_referrers(PyObject * self,PyObject * args)1248 gc_get_referrers(PyObject *self, PyObject *args)
1249 {
1250 int i;
1251 PyObject *result = PyList_New(0);
1252 if (!result) return NULL;
1253
1254 for (i = 0; i < NUM_GENERATIONS; i++) {
1255 if (!(gc_referrers_for(args, GEN_HEAD(i), result))) {
1256 Py_DECREF(result);
1257 return NULL;
1258 }
1259 }
1260 return result;
1261 }
1262
1263 /* Append obj to list; return true if error (out of memory), false if OK. */
1264 static int
referentsvisit(PyObject * obj,PyObject * list)1265 referentsvisit(PyObject *obj, PyObject *list)
1266 {
1267 return PyList_Append(list, obj) < 0;
1268 }
1269
1270 PyDoc_STRVAR(gc_get_referents__doc__,
1271 "get_referents(*objs) -> list\n\
1272 Return the list of objects that are directly referred to by objs.");
1273
1274 static PyObject *
gc_get_referents(PyObject * self,PyObject * args)1275 gc_get_referents(PyObject *self, PyObject *args)
1276 {
1277 Py_ssize_t i;
1278 PyObject *result = PyList_New(0);
1279
1280 if (result == NULL)
1281 return NULL;
1282
1283 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
1284 traverseproc traverse;
1285 PyObject *obj = PyTuple_GET_ITEM(args, i);
1286
1287 if (! PyObject_IS_GC(obj))
1288 continue;
1289 traverse = Py_TYPE(obj)->tp_traverse;
1290 if (! traverse)
1291 continue;
1292 if (traverse(obj, (visitproc)referentsvisit, result)) {
1293 Py_DECREF(result);
1294 return NULL;
1295 }
1296 }
1297 return result;
1298 }
1299
1300 PyDoc_STRVAR(gc_get_objects__doc__,
1301 "get_objects() -> [...]\n"
1302 "\n"
1303 "Return a list of objects tracked by the collector (excluding the list\n"
1304 "returned).\n");
1305
1306 static PyObject *
gc_get_objects(PyObject * self,PyObject * noargs)1307 gc_get_objects(PyObject *self, PyObject *noargs)
1308 {
1309 int i;
1310 PyObject* result;
1311
1312 result = PyList_New(0);
1313 if (result == NULL)
1314 return NULL;
1315 for (i = 0; i < NUM_GENERATIONS; i++) {
1316 if (append_objects(result, GEN_HEAD(i))) {
1317 Py_DECREF(result);
1318 return NULL;
1319 }
1320 }
1321 return result;
1322 }
1323
1324 PyDoc_STRVAR(gc_is_tracked__doc__,
1325 "is_tracked(obj) -> bool\n"
1326 "\n"
1327 "Returns true if the object is tracked by the garbage collector.\n"
1328 "Simple atomic objects will return false.\n"
1329 );
1330
1331 static PyObject *
gc_is_tracked(PyObject * self,PyObject * obj)1332 gc_is_tracked(PyObject *self, PyObject *obj)
1333 {
1334 PyObject *result;
1335
1336 if (PyObject_IS_GC(obj) && IS_TRACKED(obj))
1337 result = Py_True;
1338 else
1339 result = Py_False;
1340 Py_INCREF(result);
1341 return result;
1342 }
1343
1344
1345 PyDoc_STRVAR(gc__doc__,
1346 "This module provides access to the garbage collector for reference cycles.\n"
1347 "\n"
1348 "enable() -- Enable automatic garbage collection.\n"
1349 "disable() -- Disable automatic garbage collection.\n"
1350 "isenabled() -- Returns true if automatic collection is enabled.\n"
1351 "collect() -- Do a full collection right now.\n"
1352 "get_count() -- Return the current collection counts.\n"
1353 "set_debug() -- Set debugging flags.\n"
1354 "get_debug() -- Get debugging flags.\n"
1355 "set_threshold() -- Set the collection thresholds.\n"
1356 "get_threshold() -- Return the current the collection thresholds.\n"
1357 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1358 "is_tracked() -- Returns true if a given object is tracked.\n"
1359 "get_referrers() -- Return the list of objects that refer to an object.\n"
1360 "get_referents() -- Return the list of objects that an object refers to.\n");
1361
1362 static PyMethodDef GcMethods[] = {
1363 {"enable", gc_enable, METH_NOARGS, gc_enable__doc__},
1364 {"disable", gc_disable, METH_NOARGS, gc_disable__doc__},
1365 {"isenabled", gc_isenabled, METH_NOARGS, gc_isenabled__doc__},
1366 {"set_debug", gc_set_debug, METH_VARARGS, gc_set_debug__doc__},
1367 {"get_debug", gc_get_debug, METH_NOARGS, gc_get_debug__doc__},
1368 {"get_count", gc_get_count, METH_NOARGS, gc_get_count__doc__},
1369 {"set_threshold", gc_set_thresh, METH_VARARGS, gc_set_thresh__doc__},
1370 {"get_threshold", gc_get_thresh, METH_NOARGS, gc_get_thresh__doc__},
1371 {"collect", (PyCFunction)gc_collect,
1372 METH_VARARGS | METH_KEYWORDS, gc_collect__doc__},
1373 {"get_objects", gc_get_objects,METH_NOARGS, gc_get_objects__doc__},
1374 {"is_tracked", gc_is_tracked, METH_O, gc_is_tracked__doc__},
1375 {"get_referrers", gc_get_referrers, METH_VARARGS,
1376 gc_get_referrers__doc__},
1377 {"get_referents", gc_get_referents, METH_VARARGS,
1378 gc_get_referents__doc__},
1379 {NULL, NULL} /* Sentinel */
1380 };
1381
1382 PyMODINIT_FUNC
initgc(void)1383 initgc(void)
1384 {
1385 PyObject *m;
1386
1387 m = Py_InitModule4("gc",
1388 GcMethods,
1389 gc__doc__,
1390 NULL,
1391 PYTHON_API_VERSION);
1392 if (m == NULL)
1393 return;
1394
1395 if (garbage == NULL) {
1396 garbage = PyList_New(0);
1397 if (garbage == NULL)
1398 return;
1399 }
1400 Py_INCREF(garbage);
1401 if (PyModule_AddObject(m, "garbage", garbage) < 0)
1402 return;
1403
1404 /* Importing can't be done in collect() because collect()
1405 * can be called via PyGC_Collect() in Py_Finalize().
1406 * This wouldn't be a problem, except that <initialized> is
1407 * reset to 0 before calling collect which trips up
1408 * the import and triggers an assertion.
1409 */
1410 if (tmod == NULL) {
1411 tmod = PyImport_ImportModuleNoBlock("time");
1412 if (tmod == NULL)
1413 PyErr_Clear();
1414 }
1415
1416 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return
1417 ADD_INT(DEBUG_STATS);
1418 ADD_INT(DEBUG_COLLECTABLE);
1419 ADD_INT(DEBUG_UNCOLLECTABLE);
1420 ADD_INT(DEBUG_INSTANCES);
1421 ADD_INT(DEBUG_OBJECTS);
1422 ADD_INT(DEBUG_SAVEALL);
1423 ADD_INT(DEBUG_LEAK);
1424 #undef ADD_INT
1425 }
1426
1427 /* API to invoke gc.collect() from C */
1428 Py_ssize_t
PyGC_Collect(void)1429 PyGC_Collect(void)
1430 {
1431 Py_ssize_t n;
1432
1433 if (collecting)
1434 n = 0; /* already collecting, don't do anything */
1435 else {
1436 PyObject *exc, *value, *tb;
1437 collecting = 1;
1438 PyErr_Fetch(&exc, &value, &tb);
1439 n = collect(NUM_GENERATIONS - 1);
1440 PyErr_Restore(exc, value, tb);
1441 collecting = 0;
1442 }
1443
1444 return n;
1445 }
1446
1447 /* for debugging */
1448 void
_PyGC_Dump(PyGC_Head * g)1449 _PyGC_Dump(PyGC_Head *g)
1450 {
1451 _PyObject_Dump(FROM_GC(g));
1452 }
1453
1454 /* extension modules might be compiled with GC support so these
1455 functions must always be available */
1456
1457 #undef PyObject_GC_Track
1458 #undef PyObject_GC_UnTrack
1459 #undef PyObject_GC_Del
1460 #undef _PyObject_GC_Malloc
1461
1462 void
PyObject_GC_Track(void * op)1463 PyObject_GC_Track(void *op)
1464 {
1465 _PyObject_GC_TRACK(op);
1466 }
1467
1468 /* for binary compatibility with 2.2 */
1469 void
_PyObject_GC_Track(PyObject * op)1470 _PyObject_GC_Track(PyObject *op)
1471 {
1472 PyObject_GC_Track(op);
1473 }
1474
1475 void
PyObject_GC_UnTrack(void * op)1476 PyObject_GC_UnTrack(void *op)
1477 {
1478 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to
1479 * call PyObject_GC_UnTrack twice on an object.
1480 */
1481 if (IS_TRACKED(op))
1482 _PyObject_GC_UNTRACK(op);
1483 }
1484
1485 /* for binary compatibility with 2.2 */
1486 void
_PyObject_GC_UnTrack(PyObject * op)1487 _PyObject_GC_UnTrack(PyObject *op)
1488 {
1489 PyObject_GC_UnTrack(op);
1490 }
1491
1492 PyObject *
_PyObject_GC_Malloc(size_t basicsize)1493 _PyObject_GC_Malloc(size_t basicsize)
1494 {
1495 PyObject *op;
1496 PyGC_Head *g;
1497 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head))
1498 return PyErr_NoMemory();
1499 g = (PyGC_Head *)PyObject_MALLOC(
1500 sizeof(PyGC_Head) + basicsize);
1501 if (g == NULL)
1502 return PyErr_NoMemory();
1503 g->gc.gc_refs = GC_UNTRACKED;
1504 generations[0].count++; /* number of allocated GC objects */
1505 if (generations[0].count > generations[0].threshold &&
1506 enabled &&
1507 generations[0].threshold &&
1508 !collecting &&
1509 !PyErr_Occurred()) {
1510 collecting = 1;
1511 collect_generations();
1512 collecting = 0;
1513 }
1514 op = FROM_GC(g);
1515 return op;
1516 }
1517
1518 PyObject *
_PyObject_GC_New(PyTypeObject * tp)1519 _PyObject_GC_New(PyTypeObject *tp)
1520 {
1521 PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp));
1522 if (op != NULL)
1523 op = PyObject_INIT(op, tp);
1524 return op;
1525 }
1526
1527 PyVarObject *
_PyObject_GC_NewVar(PyTypeObject * tp,Py_ssize_t nitems)1528 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
1529 {
1530 const size_t size = _PyObject_VAR_SIZE(tp, nitems);
1531 PyVarObject *op = (PyVarObject *) _PyObject_GC_Malloc(size);
1532 if (op != NULL)
1533 op = PyObject_INIT_VAR(op, tp, nitems);
1534 return op;
1535 }
1536
1537 PyVarObject *
_PyObject_GC_Resize(PyVarObject * op,Py_ssize_t nitems)1538 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
1539 {
1540 const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
1541 PyGC_Head *g = AS_GC(op);
1542 assert(!IS_TRACKED(op));
1543 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head))
1544 return (PyVarObject *)PyErr_NoMemory();
1545 g = (PyGC_Head *)PyObject_REALLOC(g, sizeof(PyGC_Head) + basicsize);
1546 if (g == NULL)
1547 return (PyVarObject *)PyErr_NoMemory();
1548 op = (PyVarObject *) FROM_GC(g);
1549 Py_SIZE(op) = nitems;
1550 return op;
1551 }
1552
1553 void
PyObject_GC_Del(void * op)1554 PyObject_GC_Del(void *op)
1555 {
1556 PyGC_Head *g = AS_GC(op);
1557 if (IS_TRACKED(op))
1558 gc_list_remove(g);
1559 if (generations[0].count > 0) {
1560 generations[0].count--;
1561 }
1562 PyObject_FREE(g);
1563 }
1564
1565 /* for binary compatibility with 2.2 */
1566 #undef _PyObject_GC_Del
1567 void
_PyObject_GC_Del(PyObject * op)1568 _PyObject_GC_Del(PyObject *op)
1569 {
1570 PyObject_GC_Del(op);
1571 }
1572