From e545ead66ce725aae6fb0ad5d733abe806c19750 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 24 Oct 2024 09:33:11 -0400 Subject: [PATCH] gh-125859: Fix crash when `gc.get_objects` is called during GC (#125882) This fixes a crash when `gc.get_objects()` or `gc.get_referrers()` is called during a GC in the free threading build. Switch to `_PyObjectStack` to avoid corrupting the `struct worklist` linked list maintained by the GC. Also, don't return objects that are frozen (`gc.freeze()`) or in the process of being collected to more closely match the behavior of the default build. --- Include/internal/pycore_object_stack.h | 10 ++ Lib/test/test_free_threading/test_gc.py | 61 ++++++++ Lib/test/test_gc.py | 23 +++ ...-10-23-14-42-27.gh-issue-125859.m3EF9E.rst | 2 + Python/gc_free_threading.c | 137 ++++++++---------- 5 files changed, 160 insertions(+), 73 deletions(-) create mode 100644 Lib/test/test_free_threading/test_gc.py create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-42-27.gh-issue-125859.m3EF9E.rst diff --git a/Include/internal/pycore_object_stack.h b/Include/internal/pycore_object_stack.h index c607ea8bc52545..39e69b7cde52a1 100644 --- a/Include/internal/pycore_object_stack.h +++ b/Include/internal/pycore_object_stack.h @@ -71,6 +71,16 @@ _PyObjectStack_Pop(_PyObjectStack *stack) return obj; } +static inline Py_ssize_t +_PyObjectStack_Size(_PyObjectStack *stack) +{ + Py_ssize_t size = 0; + for (_PyObjectStackChunk *buf = stack->head; buf != NULL; buf = buf->prev) { + size += buf->n; + } + return size; +} + // Merge src into dst, leaving src empty extern void _PyObjectStack_Merge(_PyObjectStack *dst, _PyObjectStack *src); diff --git a/Lib/test/test_free_threading/test_gc.py b/Lib/test/test_free_threading/test_gc.py new file mode 100644 index 00000000000000..401067fe9c612c --- /dev/null +++ b/Lib/test/test_free_threading/test_gc.py @@ -0,0 +1,61 @@ +import unittest + +import threading +from threading import Thread +from unittest import TestCase +import gc + +from test.support import threading_helper + + +class MyObj: + pass + + +@threading_helper.requires_working_threading() +class TestGC(TestCase): + def test_get_objects(self): + event = threading.Event() + + def gc_thread(): + for i in range(100): + o = gc.get_objects() + event.set() + + def mutator_thread(): + while not event.is_set(): + o1 = MyObj() + o2 = MyObj() + o3 = MyObj() + o4 = MyObj() + + gcs = [Thread(target=gc_thread)] + mutators = [Thread(target=mutator_thread) for _ in range(4)] + with threading_helper.start_threads(gcs + mutators): + pass + + def test_get_referrers(self): + event = threading.Event() + + obj = MyObj() + + def gc_thread(): + for i in range(100): + o = gc.get_referrers(obj) + event.set() + + def mutator_thread(): + while not event.is_set(): + d1 = { "key": obj } + d2 = { "key": obj } + d3 = { "key": obj } + d4 = { "key": obj } + + gcs = [Thread(target=gc_thread) for _ in range(2)] + mutators = [Thread(target=mutator_thread) for _ in range(4)] + with threading_helper.start_threads(gcs + mutators): + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index bb7df1f5cfa7f7..cc2b4fac05b48b 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1065,6 +1065,29 @@ def test_get_referents_on_capsule(self): self.assertEqual(len(gc.get_referents(untracked_capsule)), 0) gc.get_referents(tracked_capsule) + @cpython_only + def test_get_objects_during_gc(self): + # gh-125859: Calling gc.get_objects() or gc.get_referrers() during a + # collection should not crash. + test = self + collected = False + + class GetObjectsOnDel: + def __del__(self): + nonlocal collected + collected = True + objs = gc.get_objects() + # NB: can't use "in" here because some objects override __eq__ + for obj in objs: + test.assertTrue(obj is not self) + test.assertEqual(gc.get_referrers(self), []) + + obj = GetObjectsOnDel() + obj.cycle = obj + del obj + + gc.collect() + self.assertTrue(collected) class IncrementalGCTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-42-27.gh-issue-125859.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-42-27.gh-issue-125859.m3EF9E.rst new file mode 100644 index 00000000000000..d36aa8fbe7482f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-42-27.gh-issue-125859.m3EF9E.rst @@ -0,0 +1,2 @@ +Fix a crash in the free threading build when :func:`gc.get_objects` or +:func:`gc.get_referrers` is called during an in-progress garbage collection. diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 8558d4555a9a3a..1969ed608ea524 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1401,10 +1401,32 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) return n + m; } +static PyObject * +list_from_object_stack(_PyObjectStack *stack) +{ + PyObject *list = PyList_New(_PyObjectStack_Size(stack)); + if (list == NULL) { + PyObject *op; + while ((op = _PyObjectStack_Pop(stack)) != NULL) { + Py_DECREF(op); + } + return NULL; + } + + PyObject *op; + Py_ssize_t idx = 0; + while ((op = _PyObjectStack_Pop(stack)) != NULL) { + assert(idx < PyList_GET_SIZE(list)); + PyList_SET_ITEM(list, idx++, op); + } + assert(idx == PyList_GET_SIZE(list)); + return list; +} + struct get_referrers_args { struct visitor_args base; PyObject *objs; - struct worklist results; + _PyObjectStack results; }; static int @@ -1428,11 +1450,21 @@ visit_get_referrers(const mi_heap_t *heap, const mi_heap_area_t *area, if (op == NULL) { return true; } + if (op->ob_gc_bits & (_PyGC_BITS_UNREACHABLE | _PyGC_BITS_FROZEN)) { + // Exclude unreachable objects (in-progress GC) and frozen + // objects from gc.get_objects() to match the default build. + return true; + } struct get_referrers_args *arg = (struct get_referrers_args *)args; + if (op == arg->objs) { + // Don't include the tuple itself in the referrers list. + return true; + } if (Py_TYPE(op)->tp_traverse(op, referrersvisit, arg->objs)) { - op->ob_tid = 0; // we will restore the refcount later - worklist_push(&arg->results, op); + if (_PyObjectStack_Push(&arg->results, Py_NewRef(op)) < 0) { + return false; + } } return true; @@ -1441,48 +1473,25 @@ visit_get_referrers(const mi_heap_t *heap, const mi_heap_area_t *area, PyObject * _PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs) { - PyObject *result = PyList_New(0); - if (!result) { - return NULL; - } - - _PyEval_StopTheWorld(interp); - - // Append all objects to a worklist. This abuses ob_tid. We will restore - // it later. NOTE: We can't append to the PyListObject during - // gc_visit_heaps() because PyList_Append() may reclaim an abandoned - // mimalloc segments while we are traversing them. + // NOTE: We can't append to the PyListObject during gc_visit_heaps() + // because PyList_Append() may reclaim an abandoned mimalloc segments + // while we are traversing them. struct get_referrers_args args = { .objs = objs }; - gc_visit_heaps(interp, &visit_get_referrers, &args.base); - - bool error = false; - PyObject *op; - while ((op = worklist_pop(&args.results)) != NULL) { - gc_restore_tid(op); - if (op != objs && PyList_Append(result, op) < 0) { - error = true; - break; - } - } - - // In case of error, clear the remaining worklist - while ((op = worklist_pop(&args.results)) != NULL) { - gc_restore_tid(op); - } - + _PyEval_StopTheWorld(interp); + int err = gc_visit_heaps(interp, &visit_get_referrers, &args.base); _PyEval_StartTheWorld(interp); - if (error) { - Py_DECREF(result); - return NULL; + PyObject *list = list_from_object_stack(&args.results); + if (err < 0) { + PyErr_NoMemory(); + Py_CLEAR(list); } - - return result; + return list; } struct get_objects_args { struct visitor_args base; - struct worklist objects; + _PyObjectStack objects; }; static bool @@ -1493,54 +1502,36 @@ visit_get_objects(const mi_heap_t *heap, const mi_heap_area_t *area, if (op == NULL) { return true; } + if (op->ob_gc_bits & (_PyGC_BITS_UNREACHABLE | _PyGC_BITS_FROZEN)) { + // Exclude unreachable objects (in-progress GC) and frozen + // objects from gc.get_objects() to match the default build. + return true; + } struct get_objects_args *arg = (struct get_objects_args *)args; - op->ob_tid = 0; // we will restore the refcount later - worklist_push(&arg->objects, op); - + if (_PyObjectStack_Push(&arg->objects, Py_NewRef(op)) < 0) { + return false; + } return true; } PyObject * _PyGC_GetObjects(PyInterpreterState *interp, int generation) { - PyObject *result = PyList_New(0); - if (!result) { - return NULL; - } - - _PyEval_StopTheWorld(interp); - - // Append all objects to a worklist. This abuses ob_tid. We will restore - // it later. NOTE: We can't append to the list during gc_visit_heaps() - // because PyList_Append() may reclaim an abandoned mimalloc segment - // while we are traversing it. + // NOTE: We can't append to the PyListObject during gc_visit_heaps() + // because PyList_Append() may reclaim an abandoned mimalloc segments + // while we are traversing them. struct get_objects_args args = { 0 }; - gc_visit_heaps(interp, &visit_get_objects, &args.base); - - bool error = false; - PyObject *op; - while ((op = worklist_pop(&args.objects)) != NULL) { - gc_restore_tid(op); - if (op != result && PyList_Append(result, op) < 0) { - error = true; - break; - } - } - - // In case of error, clear the remaining worklist - while ((op = worklist_pop(&args.objects)) != NULL) { - gc_restore_tid(op); - } - + _PyEval_StopTheWorld(interp); + int err = gc_visit_heaps(interp, &visit_get_objects, &args.base); _PyEval_StartTheWorld(interp); - if (error) { - Py_DECREF(result); - return NULL; + PyObject *list = list_from_object_stack(&args.objects); + if (err < 0) { + PyErr_NoMemory(); + Py_CLEAR(list); } - - return result; + return list; } static bool