From 183aef0b3e1eddbb76d813db639ab0a4d496c0ac Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Thu, 28 Mar 2024 11:14:12 +0000 Subject: [PATCH 1/4] [3.11] gh-106883 Fix deadlock in threaded application When using threaded applications, there is a high risk of a deadlock in the intepreter. It's a lock ordering deadlock with HEAD_LOCK(&_PyRuntime); and the GIL. It has been suggested to disable GC during the _PyThread_CurrentFrames() and _PyThread_CurrentExceptions() calls. --- Python/pystate.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Python/pystate.c b/Python/pystate.c index db2ce878af64ec..264191f943e81d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2,6 +2,7 @@ /* Thread and interpreter state structures and their interfaces */ #include "Python.h" +#include "objimpl.h" #include "pycore_ceval.h" #include "pycore_code.h" // stats #include "pycore_frame.h" @@ -1388,6 +1389,10 @@ PyThreadState_Next(PyThreadState *tstate) { PyObject * _PyThread_CurrentFrames(void) { + // Disable the GC as this can cause a deadlock the interpreter. + // See issues 116969 and 106883. + PyGC_Disable(); + PyThreadState *tstate = _PyThreadState_GET(); if (_PySys_Audit(tstate, "sys._current_frames", NULL) < 0) { return NULL; @@ -1440,12 +1445,20 @@ _PyThread_CurrentFrames(void) done: HEAD_UNLOCK(runtime); + + // Once we release the runtime, the GC can be reenabled. + PyGC_Enable(); + return result; } PyObject * _PyThread_CurrentExceptions(void) { + // Disable the GC as this can cause a deadlock the interpreter. + // See issues 116969 and 106883. + PyGC_Disable(); + PyThreadState *tstate = _PyThreadState_GET(); _Py_EnsureTstateNotNULL(tstate); @@ -1499,6 +1512,10 @@ _PyThread_CurrentExceptions(void) done: HEAD_UNLOCK(runtime); + + // Once we release the runtime, the GC can be reenabled. + PyGC_Enable(); + return result; } From 20a2b04e8f7d1cc7bf85332966d0f252e0cc3d3b Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Thu, 28 Mar 2024 11:14:12 +0000 Subject: [PATCH 2/4] gh-106883 Fix deadlock in threaded application When using threaded applications, there is a high risk of a deadlock in the intepreter. It's a lock ordering deadlock with HEAD_LOCK(&_PyRuntime); and the GIL. By disabling the GC during the _PyThread_CurrentFrames() and _PyThread_CurrentExceptions() calls fixes the issue. --- Lib/test/test_sys.py | 49 ++++++++++++++++++++++++++++++++++++++++++++ Python/pystate.c | 27 ++++++++++++------------ 2 files changed, 63 insertions(+), 13 deletions(-) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 86cf1a794f973c..7bed9440e5c20b 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -471,6 +471,55 @@ def g456(): leave_g.set() t.join() + @threading_helper.reap_threads + @threading_helper.requires_working_threading() + @support.requires_fork() + def test_current_frames_exceptions_deadlock(self): + """ + Try to reproduce the bug raised in GH-106883 and GH-116969. + """ + import threading + import time + + class MockObject: + def __init__(self): + # Create some garbage + self._list = list(range(10000)) + # Call the functions under test + self._trace = sys._current_frames() + self._exceptions = sys._current_exceptions() + + def thread_function(num_objects): + obj = None + for _ in range(num_objects): + # The sleep is needed to have a syscall: in interrupts the + # current thread, releases the GIL and gives way to other + # threads to be executed. In this way there are more chances + # to reproduce the bug. + time.sleep(0) + obj = MockObject() + + NUM_OBJECTS = 25 + NUM_THREADS = 1000 + + # 60 seconds should be enough for the test to be executed: if it + # is more than 60 seconds it means that the process is in deadlock + # hence the test fails + TIMEOUT = 60 + + # Test the sys._current_frames and sys._current_exceptions calls + pid = os.fork() + if pid: # parent process + support.wait_process(pid, exitcode=0, timeout=TIMEOUT) + else: # child process + # Run the actual test in the forked process. + for i in range(NUM_THREADS): + thread = threading.Thread( + target=thread_function, args=(NUM_OBJECTS,) + ) + thread.start() + os._exit(0) + @threading_helper.reap_threads @threading_helper.requires_working_threading() def test_current_exceptions(self): diff --git a/Python/pystate.c b/Python/pystate.c index 264191f943e81d..a45116665fe195 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2,7 +2,6 @@ /* Thread and interpreter state structures and their interfaces */ #include "Python.h" -#include "objimpl.h" #include "pycore_ceval.h" #include "pycore_code.h" // stats #include "pycore_frame.h" @@ -1389,10 +1388,6 @@ PyThreadState_Next(PyThreadState *tstate) { PyObject * _PyThread_CurrentFrames(void) { - // Disable the GC as this can cause a deadlock the interpreter. - // See issues 116969 and 106883. - PyGC_Disable(); - PyThreadState *tstate = _PyThreadState_GET(); if (_PySys_Audit(tstate, "sys._current_frames", NULL) < 0) { return NULL; @@ -1403,6 +1398,9 @@ _PyThread_CurrentFrames(void) return NULL; } + // gh-106883: Disable the GC as this can cause the interpreter to deadlock + int gc_was_enabled = PyGC_Disable(); + /* for i in all interpreters: * for t in all of i's thread states: * if t's frame isn't NULL, map t's id to its frame @@ -1446,8 +1444,10 @@ _PyThread_CurrentFrames(void) done: HEAD_UNLOCK(runtime); - // Once we release the runtime, the GC can be reenabled. - PyGC_Enable(); + // Once the runtime is released, the GC can be reenabled. + if (gc_was_enabled) { + PyGC_Enable(); + } return result; } @@ -1455,10 +1455,6 @@ _PyThread_CurrentFrames(void) PyObject * _PyThread_CurrentExceptions(void) { - // Disable the GC as this can cause a deadlock the interpreter. - // See issues 116969 and 106883. - PyGC_Disable(); - PyThreadState *tstate = _PyThreadState_GET(); _Py_EnsureTstateNotNULL(tstate); @@ -1472,6 +1468,9 @@ _PyThread_CurrentExceptions(void) return NULL; } + // gh-106883: Disable the GC as this can cause the interpreter to deadlock + int gc_was_enabled = PyGC_Disable(); + /* for i in all interpreters: * for t in all of i's thread states: * if t's frame isn't NULL, map t's id to its frame @@ -1513,8 +1512,10 @@ _PyThread_CurrentExceptions(void) done: HEAD_UNLOCK(runtime); - // Once we release the runtime, the GC can be reenabled. - PyGC_Enable(); + // Once the runtime is released, the GC can be reenabled. + if (gc_was_enabled) { + PyGC_Enable(); + } return result; } From 6d25f1167489131a0c212a0c6cf775a9e6766dc3 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 5 Apr 2024 14:32:22 +0000 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/C API/2024-04-05-14-32-21.gh-issue-106883.OKmc0Q.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/C API/2024-04-05-14-32-21.gh-issue-106883.OKmc0Q.rst diff --git a/Misc/NEWS.d/next/C API/2024-04-05-14-32-21.gh-issue-106883.OKmc0Q.rst b/Misc/NEWS.d/next/C API/2024-04-05-14-32-21.gh-issue-106883.OKmc0Q.rst new file mode 100644 index 00000000000000..01c7fb0c790708 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-04-05-14-32-21.gh-issue-106883.OKmc0Q.rst @@ -0,0 +1 @@ +Disable GC during the _PyThread_CurrentFrames() and _PyThread_CurrentExceptions() calls to avoid the interpreter to deadlock. From 6a88da50f28aace79d3a613887262003caea9f13 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Tue, 9 Apr 2024 11:29:37 +0100 Subject: [PATCH 4/4] Improve test case --- Lib/test/test_sys.py | 49 +++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 7bed9440e5c20b..28931039f3792e 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -14,6 +14,7 @@ from test.support.script_helper import assert_python_ok, assert_python_failure from test.support import threading_helper from test.support import import_helper +from test.support import skip_if_sanitizer import textwrap import unittest import warnings @@ -471,15 +472,18 @@ def g456(): leave_g.set() t.join() + @skip_if_sanitizer(memory=True, address=True, reason= "Test too slow " + "when the address sanitizer is enabled.") @threading_helper.reap_threads @threading_helper.requires_working_threading() @support.requires_fork() def test_current_frames_exceptions_deadlock(self): """ - Try to reproduce the bug raised in GH-106883 and GH-116969. + Reproduce the bug raised in GH-106883 and GH-116969. """ import threading import time + import signal class MockObject: def __init__(self): @@ -489,35 +493,56 @@ def __init__(self): self._trace = sys._current_frames() self._exceptions = sys._current_exceptions() + def __del__(self): + # The presence of the __del__ method causes the deadlock when + # there is one thread executing the _current_frames or + # _current_exceptions functions and the other thread is + # running the GC: + # thread 1 has the interpreter lock and it is trying to + # acquire the GIL; thread 2 holds the GIL but is trying to + # acquire the interpreter lock. + # When the GC is running and it finds that an + # object has the __del__ method, it needs to execute the + # Python code in it and it requires the GIL to execute it + # (which will never happen because it is held by another thread + # blocked on the acquisition of the interpreter lock) + pass + def thread_function(num_objects): obj = None for _ in range(num_objects): - # The sleep is needed to have a syscall: in interrupts the - # current thread, releases the GIL and gives way to other - # threads to be executed. In this way there are more chances - # to reproduce the bug. - time.sleep(0) obj = MockObject() - NUM_OBJECTS = 25 - NUM_THREADS = 1000 + # The number of objects should be big enough to increase the + # chances to call the GC. + NUM_OBJECTS = 1000 + NUM_THREADS = 10 - # 60 seconds should be enough for the test to be executed: if it - # is more than 60 seconds it means that the process is in deadlock + # 40 seconds should be enough for the test to be executed: if it + # is more than 40 seconds it means that the process is in deadlock # hence the test fails - TIMEOUT = 60 + TIMEOUT = 40 # Test the sys._current_frames and sys._current_exceptions calls pid = os.fork() if pid: # parent process - support.wait_process(pid, exitcode=0, timeout=TIMEOUT) + try: + support.wait_process(pid, exitcode=0, timeout=TIMEOUT) + except KeyboardInterrupt: + # When pressing CTRL-C kill the deadlocked process + os.kill(pid, signal.SIGTERM) + raise else: # child process # Run the actual test in the forked process. + threads = [] for i in range(NUM_THREADS): thread = threading.Thread( target=thread_function, args=(NUM_OBJECTS,) ) + threads.append(thread) thread.start() + for t in threads: + t.join() os._exit(0) @threading_helper.reap_threads