diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 1e1b664000f1081..9aa2e5bf918a7b9 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -86,9 +86,9 @@ PyAPI_FUNC(void) _Py_NO_RETURN _Py_FatalRefcountErrorFunc( built against the pre-3.12 stable ABI. */ PyAPI_DATA(Py_ssize_t) _Py_RefTotal; -extern void _Py_AddRefTotal(PyInterpreterState *, Py_ssize_t); -extern void _Py_IncRefTotal(PyInterpreterState *); -extern void _Py_DecRefTotal(PyInterpreterState *); +extern void _Py_AddRefTotal(PyThreadState *, Py_ssize_t); +extern void _Py_IncRefTotal(PyThreadState *); +extern void _Py_DecRefTotal(PyThreadState *); # define _Py_DEC_REFTOTAL(interp) \ interp->object_state.reftotal-- @@ -101,7 +101,7 @@ static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n) return; } #ifdef Py_REF_DEBUG - _Py_AddRefTotal(_PyInterpreterState_GET(), n); + _Py_AddRefTotal(_PyThreadState_GET(), n); #endif #if !defined(Py_GIL_DISABLED) op->ob_refcnt += n; @@ -393,7 +393,7 @@ _Py_TryIncrefFast(PyObject *op) { _Py_INCREF_STAT_INC(); _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); #ifdef Py_REF_DEBUG - _Py_IncRefTotal(_PyInterpreterState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); #endif return 1; } @@ -416,7 +416,7 @@ _Py_TryIncRefShared(PyObject *op) &shared, shared + (1 << _Py_REF_SHARED_SHIFT))) { #ifdef Py_REF_DEBUG - _Py_IncRefTotal(_PyInterpreterState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); #endif _Py_INCREF_STAT_INC(); return 1; diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index e268e6fbbb087b2..733e3172a1c0ff6 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -38,6 +38,10 @@ typedef struct _PyThreadStateImpl { struct _brc_thread_state brc; #endif +#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED) + Py_ssize_t reftotal; // this thread's total refcount operations +#endif + } _PyThreadStateImpl; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index d7b0c6b7b01aa98..d576dd93f05e103 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3118,7 +3118,7 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize) PyObject_Realloc(v, PyBytesObject_SIZE + newsize); if (*pv == NULL) { #ifdef Py_REF_DEBUG - _Py_DecRefTotal(_PyInterpreterState_GET()); + _Py_DecRefTotal(_PyThreadState_GET()); #endif PyObject_Free(v); PyErr_NoMemory(); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 9218b1aa470663e..e7993e4b051433a 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -445,7 +445,7 @@ dictkeys_incref(PyDictKeysObject *dk) return; } #ifdef Py_REF_DEBUG - _Py_IncRefTotal(_PyInterpreterState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); #endif INCREF_KEYS(dk); } @@ -458,7 +458,7 @@ dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk, bool use_qsbr) } assert(dk->dk_refcnt > 0); #ifdef Py_REF_DEBUG - _Py_DecRefTotal(_PyInterpreterState_GET()); + _Py_DecRefTotal(_PyThreadState_GET()); #endif if (DECREF_KEYS(dk) == 1) { if (DK_IS_UNICODE(dk)) { @@ -790,7 +790,7 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) } } #ifdef Py_REF_DEBUG - _Py_IncRefTotal(_PyInterpreterState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); #endif dk->dk_refcnt = 1; dk->dk_log2_size = log2_size; @@ -978,7 +978,7 @@ clone_combined_dict_keys(PyDictObject *orig) we have it now; calling dictkeys_incref would be an error as keys->dk_refcnt is already set to 1 (after memcpy). */ #ifdef Py_REF_DEBUG - _Py_IncRefTotal(_PyInterpreterState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); #endif return keys; } @@ -2021,7 +2021,7 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp, if (oldkeys != Py_EMPTY_KEYS) { #ifdef Py_REF_DEBUG - _Py_DecRefTotal(_PyInterpreterState_GET()); + _Py_DecRefTotal(_PyThreadState_GET()); #endif assert(oldkeys->dk_kind != DICT_KEYS_SPLIT); assert(oldkeys->dk_refcnt == 1); diff --git a/Objects/object.c b/Objects/object.c index 60642d899bcafad..c8e6f8fc1a2b40e 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -73,21 +73,16 @@ get_legacy_reftotal(void) interp->object_state.reftotal static inline void -reftotal_increment(PyInterpreterState *interp) +reftotal_add(PyThreadState *tstate, Py_ssize_t n) { - REFTOTAL(interp)++; -} - -static inline void -reftotal_decrement(PyInterpreterState *interp) -{ - REFTOTAL(interp)--; -} - -static inline void -reftotal_add(PyInterpreterState *interp, Py_ssize_t n) -{ - REFTOTAL(interp) += n; +#ifdef Py_GIL_DISABLED + _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + // relaxed store to avoid data race with read in get_reftotal() + Py_ssize_t reftotal = tstate_impl->reftotal + n; + _Py_atomic_store_ssize_relaxed(&tstate_impl->reftotal, reftotal); +#else + REFTOTAL(tstate->interp) += n; +#endif } static inline Py_ssize_t get_global_reftotal(_PyRuntimeState *); @@ -117,7 +112,15 @@ get_reftotal(PyInterpreterState *interp) { /* For a single interpreter, we ignore the legacy _Py_RefTotal, since we can't determine which interpreter updated it. */ - return REFTOTAL(interp); + Py_ssize_t total = REFTOTAL(interp); +#ifdef Py_GIL_DISABLED + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { + /* This may race with other threads modifications to their reftotal */ + _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)p; + total += _Py_atomic_load_ssize_relaxed(&tstate_impl->reftotal); + } +#endif + return total; } static inline Py_ssize_t @@ -129,7 +132,7 @@ get_global_reftotal(_PyRuntimeState *runtime) HEAD_LOCK(&_PyRuntime); PyInterpreterState *interp = PyInterpreterState_Head(); for (; interp != NULL; interp = PyInterpreterState_Next(interp)) { - total += REFTOTAL(interp); + total += get_reftotal(interp); } HEAD_UNLOCK(&_PyRuntime); @@ -222,32 +225,32 @@ _Py_NegativeRefcount(const char *filename, int lineno, PyObject *op) void _Py_INCREF_IncRefTotal(void) { - reftotal_increment(_PyInterpreterState_GET()); + reftotal_add(_PyThreadState_GET(), 1); } /* This is used strictly by Py_DECREF(). */ void _Py_DECREF_DecRefTotal(void) { - reftotal_decrement(_PyInterpreterState_GET()); + reftotal_add(_PyThreadState_GET(), -1); } void -_Py_IncRefTotal(PyInterpreterState *interp) +_Py_IncRefTotal(PyThreadState *tstate) { - reftotal_increment(interp); + reftotal_add(tstate, 1); } void -_Py_DecRefTotal(PyInterpreterState *interp) +_Py_DecRefTotal(PyThreadState *tstate) { - reftotal_decrement(interp); + reftotal_add(tstate, -1); } void -_Py_AddRefTotal(PyInterpreterState *interp, Py_ssize_t n) +_Py_AddRefTotal(PyThreadState *tstate, Py_ssize_t n) { - reftotal_add(interp, n); + reftotal_add(tstate, n); } /* This includes the legacy total @@ -267,7 +270,10 @@ _Py_GetLegacyRefTotal(void) Py_ssize_t _PyInterpreterState_GetRefTotal(PyInterpreterState *interp) { - return get_reftotal(interp); + HEAD_LOCK(&_PyRuntime); + Py_ssize_t total = get_reftotal(interp); + HEAD_UNLOCK(&_PyRuntime); + return total; } #endif /* Py_REF_DEBUG */ @@ -345,7 +351,7 @@ _Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) if (should_queue) { #ifdef Py_REF_DEBUG - _Py_IncRefTotal(_PyInterpreterState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); #endif _Py_brc_queue_object(o); } @@ -405,7 +411,7 @@ _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra) &shared, new_shared)); #ifdef Py_REF_DEBUG - _Py_AddRefTotal(_PyInterpreterState_GET(), extra); + _Py_AddRefTotal(_PyThreadState_GET(), extra); #endif _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); @@ -2376,7 +2382,7 @@ void _Py_NewReference(PyObject *op) { #ifdef Py_REF_DEBUG - reftotal_increment(_PyInterpreterState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); #endif new_reference(op); } diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index d9dc00da368a84c..5ae1ee9a89af84c 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -946,7 +946,7 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) if (sv == NULL) { *pv = NULL; #ifdef Py_REF_DEBUG - _Py_DecRefTotal(_PyInterpreterState_GET()); + _Py_DecRefTotal(_PyThreadState_GET()); #endif PyObject_GC_Del(v); return -1; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e135638c696fa47..59b350f0a609a65 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14916,7 +14916,7 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p) decrements to these objects will not be registered so they need to be accounted for in here. */ for (Py_ssize_t i = 0; i < Py_REFCNT(s) - 2; i++) { - _Py_DecRefTotal(_PyInterpreterState_GET()); + _Py_DecRefTotal(_PyThreadState_GET()); } #endif _Py_SetImmortal(s); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 7e4137a8e342b13..111632ffb776410 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -168,7 +168,7 @@ merge_refcount(PyObject *op, Py_ssize_t extra) refcount += extra; #ifdef Py_REF_DEBUG - _Py_AddRefTotal(_PyInterpreterState_GET(), extra); + _Py_AddRefTotal(_PyThreadState_GET(), extra); #endif // No atomics necessary; all other threads in this interpreter are paused. @@ -307,7 +307,7 @@ merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state) // decref and deallocate the object once we start the world again. op->ob_ref_shared += (1 << _Py_REF_SHARED_SHIFT); #ifdef Py_REF_DEBUG - _Py_IncRefTotal(_PyInterpreterState_GET()); + _Py_IncRefTotal(_PyThreadState_GET()); #endif worklist_push(&state->objs_to_decref, op); } diff --git a/Python/pystate.c b/Python/pystate.c index cee481c564b0cb6..4a52f6444ba10a6 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1698,6 +1698,14 @@ tstate_delete_common(PyThreadState *tstate) decrement_stoptheworld_countdown(&runtime->stoptheworld); } } + +#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED) + // Add our portion of the total refcount to the interpreter's total. + _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + tstate->interp->object_state.reftotal += tstate_impl->reftotal; + tstate_impl->reftotal = 0; +#endif + HEAD_UNLOCK(runtime); #ifdef Py_GIL_DISABLED